In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler


In [2]:
# Simulated users.csv
users_data = {
    "user_id": [1, 2, 3],
    "preferred_categories": [["tutorial", "opinion"], ["case-study"], ["opinion", "tutorial"]],
    "historical_avg_sentiment": [0.8, 0.4, 0.6],
    "avg_engagement_time": [210, 180, 240]
}
users_df = pd.DataFrame(users_data)
users_df.to_csv("users.csv", index=False)
users_df


Unnamed: 0,user_id,preferred_categories,historical_avg_sentiment,avg_engagement_time
0,1,"[tutorial, opinion]",0.8,210
1,2,[case-study],0.4,180
2,3,"[opinion, tutorial]",0.6,240


In [3]:
# Simulated final_sentiment_engagement_data.csv
posts_data = {
    "post_id": [101, 102, 103, 104, 105, 106],
    "category": ["tutorial", "opinion", "case-study", "tutorial", "opinion", "case-study"],
    "relevance_score": [1.8, 1.2, 0.9, 1.6, 0.8, 1.1],
    "avg_read_time_seconds": [240, 150, 180, 200, 220, 160]
}
posts_df = pd.DataFrame(posts_data)
posts_df.to_csv("final_sentiment_engagement_data.csv", index=False)
posts_df


Unnamed: 0,post_id,category,relevance_score,avg_read_time_seconds
0,101,tutorial,1.8,240
1,102,opinion,1.2,150
2,103,case-study,0.9,180
3,104,tutorial,1.6,200
4,105,opinion,0.8,220
5,106,case-study,1.1,160


In [4]:
def recommend_for_user(user, posts_df):
    preferred_posts = posts_df[posts_df['category'].isin(user['preferred_categories'])].copy()

    preferred_posts['score_diff'] = abs(preferred_posts['relevance_score'] - user['historical_avg_sentiment'])
    preferred_posts['time_diff'] = abs(preferred_posts['avg_read_time_seconds'] - user['avg_engagement_time'])

    # Final personalization score (lower is better)
    preferred_posts['personalization_score'] = preferred_posts['score_diff'] + (preferred_posts['time_diff'] / 300)

    # Limit to 3 posts per category
    preferred_posts = preferred_posts.sort_values('personalization_score')
    top_posts = preferred_posts.groupby('category').head(3)
    top_recommendations = top_posts.nsmallest(5, 'personalization_score')

    return top_recommendations[['post_id', 'category', 'personalization_score']]


In [5]:
recommendations = {}

for idx, user_row in users_df.iterrows():
    user = {
        "user_id": user_row["user_id"],
        "preferred_categories": user_row["preferred_categories"],
        "historical_avg_sentiment": user_row["historical_avg_sentiment"],
        "avg_engagement_time": user_row["avg_engagement_time"]
    }
    top_recs = recommend_for_user(user, posts_df)
    recommendations[user["user_id"]] = top_recs
    print(f"\nUser {user['user_id']} Recommendations:\n", top_recs)



User 1 Recommendations:
    post_id  category  personalization_score
4      105   opinion               0.033333
1      102   opinion               0.600000
3      104  tutorial               0.833333
0      101  tutorial               1.100000

User 2 Recommendations:
    post_id    category  personalization_score
2      103  case-study               0.500000
5      106  case-study               0.766667

User 3 Recommendations:
    post_id  category  personalization_score
4      105   opinion               0.266667
1      102   opinion               0.900000
3      104  tutorial               1.133333
0      101  tutorial               1.200000


In [6]:
for user_id, recs in recommendations.items():
    recs.to_csv(f"user_{user_id}_recommendations.csv", index=False)
