<a href="https://colab.research.google.com/github/manola1109/Recommender-system-with-Python/blob/main/article_recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📚 Article Recommender System
This notebook implements User-Based, Item-Based, and Matrix Factorization (SVD) Collaborative Filtering methods to predict user-article ratings.

In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds
import warnings
warnings.filterwarnings('ignore')

# -----------------------------
# Step 1: Load Data
# -----------------------------
train = pd.read_csv('train.csv')
articles = pd.read_csv('article_info.csv')
test = pd.read_csv('test.csv')

# Create user-item matrix
ratings_matrix = train.pivot(index='user_id', columns='article_id', values='rating')
ratings_matrix_filled = ratings_matrix.fillna(0)

# -----------------------------
# Step 2: User-Based CF
# -----------------------------
user_similarity = cosine_similarity(ratings_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=ratings_matrix.index, columns=ratings_matrix.index)

def predict_user_based(user_id, article_id):
    if article_id not in ratings_matrix.columns:
        return 3.0
    sim_scores = user_similarity_df[user_id]
    ratings = ratings_matrix[article_id]
    mask = ratings.notna()
    if mask.sum() == 0:
        return 3.0
    sim_scores = sim_scores[mask]
    ratings = ratings[mask]
    pred = np.dot(sim_scores, ratings) / sim_scores.sum()
    return pred

# -----------------------------
# Step 3: Item-Based CF
# -----------------------------
item_similarity = cosine_similarity(ratings_matrix_filled.T)
item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

def predict_item_based(user_id, article_id):
    if article_id not in ratings_matrix.columns:
        return 3.0
    user_ratings = ratings_matrix.loc[user_id]
    mask = user_ratings.notna()
    if mask.sum() == 0:
        return 3.0
    sim_scores = item_similarity_df[article_id][mask]
    ratings = user_ratings[mask]
    pred = np.dot(sim_scores, ratings) / sim_scores.sum()
    return pred

# -----------------------------
# Step 4: Matrix Factorization (SVD)
# -----------------------------
ratings_filled = ratings_matrix.fillna(0)
U, sigma, Vt = svds(ratings_filled.values, k=50)
sigma = np.diag(sigma)
pred_ratings = np.dot(np.dot(U, sigma), Vt)
svd_preds = pd.DataFrame(pred_ratings, index=ratings_matrix.index, columns=ratings_matrix.columns)

def predict_svd(user_id, article_id):
    try:
        return svd_preds.loc[user_id, article_id]
    except:
        return 3.0

# -----------------------------
# Step 5: RMSE Evaluation on Validation Set
# -----------------------------
train_set, val_set = train_test_split(train, test_size=0.2, random_state=42)

val_preds = []
val_truth = []

for _, row in val_set.iterrows():
    uid = row['user_id']
    aid = row['article_id']
    true_rating = row['rating']

    user_cf = predict_user_based(uid, aid)
    item_cf = predict_item_based(uid, aid)
    svd_cf = predict_svd(uid, aid)

    final_rating = np.mean([user_cf, item_cf, svd_cf])
    val_preds.append(final_rating)
    val_truth.append(true_rating)

rmse_score = np.sqrt(mean_squared_error(val_truth, val_preds)) # Calculate RMSE manually using NumPy
print(f"Validation RMSE Score: {rmse_score:.4f}")

# -----------------------------
# Step 6: Make Predictions on Test Set
# -----------------------------
predictions = []
for _, row in test.iterrows():
    uid = row['user_id']
    aid = row['article_id']

    user_cf = predict_user_based(uid, aid)
    item_cf = predict_item_based(uid, aid)
    svd_cf = predict_svd(uid, aid)

    final_rating = np.mean([user_cf, item_cf, svd_cf])
    predictions.append([uid, aid, final_rating])

# -----------------------------
# Step 7: Save Predictions
# -----------------------------
submission = pd.DataFrame(predictions, columns=['user_id', 'article_id', 'rating'])
submission.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' generated.")


Validation RMSE Score: 0.6047
Submission file 'submission.csv' generated.
