 Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

 Load Datasets

In [None]:
users = pd.read_csv("/content/Users.csv")
posts = pd.read_csv("/content/Posts.csv")
engagements = pd.read_csv("/content/Engagements.csv")

print("Users:", users.shape)
print("Posts:", posts.shape)
print("Engagements:", engagements.shape)


Users: (50, 5)
Posts: (100, 4)
Engagements: (1000, 3)


In [None]:
display(users.head())
display(posts.head())
display(engagements.head())

Unnamed: 0,user_id,age,gender,top_3_interests,past_engagement_score
0,U1,24,F,"sports, art, gaming",0.61
1,U2,32,F,"travel, food, fashion",0.93
2,U3,28,Other,"sports, travel, fashion",0.4
3,U4,25,M,"fashion, music, tech",0.53
4,U5,24,M,"fashion, food, fitness",0.8


Unnamed: 0,post_id,creator_id,content_type,tags
0,P1,U44,video,"sports, food"
1,P2,U26,video,"music, travel"
2,P3,U32,text,"sports, travel"
3,P4,U6,image,"music, gaming"
4,P5,U32,image,"food, fashion"


Unnamed: 0,user_id,post_id,engagement
0,U1,P52,1
1,U1,P44,0
2,U1,P1,1
3,U1,P4,1
4,U1,P65,0


Preprocessing  #Handle missing values

In [None]:
users['top_3_interests'] = users['top_3_interests'].fillna("")
posts['tags'] = posts['tags'].fillna("")

# Engagement matrix: user x post
user_post_matrix = engagements.pivot_table(
    index="user_id", columns="post_id", values="engagement", aggfunc="max"
).fillna(0)

 Content-Based Filtering (Users ↔ Posts)

In [None]:
# Step 1: Vectorize user interests
user_vectorizer = TfidfVectorizer(stop_words="english")
user_tfidf = user_vectorizer.fit_transform(users['top_3_interests'])

In [None]:
# Step 2: Vectorize post tags
post_vectorizer = TfidfVectorizer(stop_words="english")
post_tfidf = post_vectorizer.fit_transform(posts['tags'])

In [None]:
# Step 3: Compute similarity (User ↔ Post)
content_similarity = cosine_similarity(user_tfidf, post_tfidf)

 Collaborative Filtering (Engagement Patterns)

In [None]:
svd = TruncatedSVD(n_components=20, random_state=42)
latent_user_matrix = svd.fit_transform(user_post_matrix)
latent_post_matrix = svd.components_

In [None]:
# Predict engagement score
collab_scores = np.dot(latent_user_matrix, latent_post_matrix)

 Hybrid Recommendation

In [None]:
def recommend_top_3(user_id, alpha=0.6):
    """
    Recommend top 3 posts for a user combining content + engagement.
    alpha controls weight between content-based and collaborative filtering.
    """
    if user_id not in users['user_id'].values:
        return []

    # Map user to index
    user_index = users[users['user_id'] == user_id].index[0]

    # Content-based scores
    content_scores = content_similarity[user_index]

     # Collaborative scores
    if user_id in user_post_matrix.index:
        collab_index = user_post_matrix.index.get_loc(user_id)
        collab_user_scores = collab_scores[collab_index]
    else:
        collab_user_scores = np.zeros(len(posts))

     # Hybrid score
    hybrid_scores = alpha * content_scores + (1 - alpha) * collab_user_scores

    # Exclude already engaged posts
    engaged_posts = engagements[(engagements['user_id']==user_id) & (engagements['engagement']==1)]['post_id'].tolist()
    for pid in engaged_posts:
        post_idx = posts[posts['post_id']==pid].index[0]
        hybrid_scores[post_idx] = -np.inf

    # Top 3 recommendations
    top_indices = np.argsort(hybrid_scores)[-3:][::-1]
    return posts.iloc[top_indices][['post_id', 'content_type', 'tags']]

Test Recommendations

In [None]:
sample_user = users['user_id'].iloc[0]
print(f"Top 3 recommendations for user {sample_user}:")
display(recommend_top_3(sample_user))


Top 3 recommendations for user U1:


Unnamed: 0,post_id,content_type,tags
32,P33,image,gaming
74,P75,image,gaming
57,P58,video,"food, sports"


 Evaluation (Precision@3 Example)

In [None]:
def precision_at_k(user_id, k=3):
    actual = engagements[(engagements['user_id']==user_id) & (engagements['engagement']==1)]['post_id'].tolist()
    predicted = recommend_top_3(user_id)['post_id'].tolist()
    if not actual:
        return None
    return len(set(predicted[:k]) & set(actual)) / float(k)

In [None]:
# Example evaluation
test_user = users['user_id'].iloc[1]
print("Precision@3 for user", test_user, ":", precision_at_k(test_user))

Precision@3 for user U2 : 0.0
