<a href="https://colab.research.google.com/github/kdats/regulation_of_post_virality/blob/main/collab_filter_for_AI_recommentations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
import random

# Load the dataset
file_path = '/content/engagement_data_set.csv'
interaction_df = pd.read_csv(file_path)

interaction_df.drop(columns =['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'], inplace=True)

# Preprocessing: Fill NaN values in 'numbr_likes' and 'number_comments' with 0
interaction_df['numbr_likes'] = interaction_df['numbr_likes'].fillna(0)
interaction_df['number_comments'] = interaction_df['number_comments'].fillna(0)

# Calculate the engagement score as the sum of likes and comments
interaction_df['engagement_score'] = interaction_df['numbr_likes'] + interaction_df['number_comments']

# drop duplicates
interaction_df.drop_duplicates(["post_id"] ).shape, interaction_df.shape



((5714, 30), (5731, 30))

In [22]:

# Aggregate engagement metrics for unique combinations of profile_id and post_id
interaction_df = interaction_df.groupby(['profile_id', 'post_id'], as_index=False).agg({
    'numbr_likes': 'sum',
    'number_comments': 'sum'
})

# Calculate the engagement score as the sum of likes and comments
interaction_df['engagement_score'] = interaction_df['numbr_likes'] + interaction_df['number_comments']
# Filter out posts and users with minimal engagement
interaction_df = interaction_df[interaction_df['engagement_score'] > 0]
# Create a user-item matrix with engagement scores
user_item_matrix = interaction_df.pivot(index='profile_id', columns='post_id', values='engagement_score').fillna(0)

# Check matrix density (percentage of non-zero values)
density = np.count_nonzero(user_item_matrix.values) / user_item_matrix.size
print(f"Matrix Density: {density * 100:.2f}%")

# Filter out users or posts with very low interaction to reduce sparsity further, if density is too low
min_engagement_threshold = 2  # Minimum engagement threshold
user_item_matrix = user_item_matrix.loc[user_item_matrix.sum(axis=1) > min_engagement_threshold]
user_item_matrix = user_item_matrix.loc[:, user_item_matrix.sum(axis=0) > min_engagement_threshold]

# Ensure matrix is of type float
user_item_matrix = user_item_matrix.astype(float)

# Perform Singular Value Decomposition (SVD), re-checking for density
if np.count_nonzero(user_item_matrix.values) / user_item_matrix.size > 0.01:  # Proceed if density is reasonable
    U, sigma, Vt = svds(user_item_matrix, k=5)
    sigma = np.diag(sigma)

    # Calculate predicted engagement scores
    predicted_scores = np.dot(np.dot(U, sigma), Vt)
    predicted_df = pd.DataFrame(predicted_scores, columns=user_item_matrix.columns, index=user_item_matrix.index)

    # Function to get basic recommendations using collaborative filtering
    def recommend_posts(user_id, num_recommendations=5):
        if user_id not in predicted_df.index:
            return "User not found in data."
        user_row = predicted_df.loc[user_id].sort_values(ascending=False)
        return user_row.index[:num_recommendations].tolist()

    # Function to get regulated recommendations
    def regulated_recommend_posts(user_id, num_recommendations=5):
        # Basic recommendations from collaborative filtering
        recommended_posts = recommend_posts(user_id, num_recommendations * 2)

        # Add diversity: sample additional content randomly
        all_posts = user_item_matrix.columns.tolist()
        additional_posts = random.sample(all_posts, min(num_recommendations, len(all_posts)))

        # Balance original recommendations with additional content
        balanced_recommendations = recommended_posts[:num_recommendations] + additional_posts

        # Apply time-decay weights for recency preference
        recency_weights = {post: np.exp(-i/len(balanced_recommendations)) for i, post in enumerate(balanced_recommendations)}
        balanced_recommendations = sorted(balanced_recommendations, key=lambda post: recency_weights[post], reverse=True)

        # Remove duplicates and limit recommendations
        unique_recommendations = []
        for post in balanced_recommendations:
            if post not in unique_recommendations:
                unique_recommendations.append(post)
            if len(unique_recommendations) >= num_recommendations:
                break

        return unique_recommendations

    # Example usage: Get recommendations for a specific user_id
    user_id_example = interaction_df['profile_id'].iloc[0]  # example user ID
    print("Basic Recommendations:", recommend_posts(user_id=user_id_example))
    print("Regulated Recommendations:", regulated_recommend_posts(user_id=user_id_example))
else:
    print("Matrix density too low for collaborative filtering.")

Matrix Density: 0.02%
Matrix density too low for collaborative filtering.
