<a href="https://colab.research.google.com/github/manola1109/Recommender-system-with-Python/blob/main/Article_Recommendation_Non_Personalized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("train.csv")

# Total number of unique users
total_users = df['user_id'].nunique()
threshold = total_users * 0.05  # 5% of users

# Group by article to get count and average rating
article_stats = df.groupby('article_id').agg(
    rating_count=('rating', 'count'),
    average_rating=('rating', 'mean')
).reset_index()

### Q1: Top 10 Articles by Average Rating (rated by at least 5% of users)
q1_df = article_stats[article_stats['rating_count'] >= threshold]
q1_top_10 = q1_df.sort_values(by='average_rating', ascending=False).head(10)
print("Q1: Top 10 articles based on average rating (with >= 5% user ratings):")
print(q1_top_10)

### Q2: Most Read 10 Articles (average rating > 1.5)
q2_df = article_stats[article_stats['average_rating'] > 1.5]
q2_top_10 = q2_df.sort_values(by='rating_count', ascending=False).head(10)
print("\nQ2: Top 10 most read articles with average rating > 1.5:")
print(q2_top_10)

### Q3: Top 10 Articles by Weighted Rating
C = df['rating'].mean()  # Mean rating across all articles
m = 2  # Minimum number of ratings required

# Compute weighted rating
article_stats['weighted_rating'] = (
    (article_stats['average_rating'] * article_stats['rating_count'] + C * m) /
    (article_stats['rating_count'] + m)
)

q3_df = article_stats[article_stats['rating_count'] >= m]
q3_top_10 = q3_df.sort_values(by='weighted_rating', ascending=False).head(10)
print("\nQ3: Top 10 articles based on weighted rating:")
print(q3_top_10)


Q1: Top 10 articles based on average rating (with >= 5% user ratings):
      article_id  rating_count  average_rating
488          580            48        1.833333
1067        1249            58        1.706897
2364        2781            60        1.700000
1220        1433            61        1.639344
820          967           122        1.442623
182          221            64        1.421875
521          618            50        1.420000
1496        1755            54        1.407407
382          456            69        1.391304
2036        2388            52        1.384615

Q2: Top 10 most read articles with average rating > 1.5:
      article_id  rating_count  average_rating
1220        1433            61        1.639344
2364        2781            60        1.700000
1067        1249            58        1.706897
488          580            48        1.833333
2012        2361            42        1.523810
1163        1366            41        1.731707
1858        2178         