<a href="https://colab.research.google.com/github/josh130588/MLAIMAR2024/blob/main/Assignment04_RecommendationSystems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
from scipy.sparse.linalg import svds
import numpy as np
from sklearn.metrics import mean_squared_error

# 1. Loading the Dataset
file_path = 'jokes-data.csv'
df = pd.read_csv(file_path)
df.head()

# 2. Data Preprocessing
df.fillna(0, inplace=True)
user_item_matrix = df.pivot_table(index='user_id', columns='joke_id', values='Rating')
user_item_matrix_norm = user_item_matrix.sub(user_item_matrix.mean(axis=1), axis=0)
user_item_matrix_norm.head()

# 3. Building the Recommendation System
# Convert the DataFrame to a NumPy array before passing it to svds
U, sigma, Vt = svds(user_item_matrix_norm.fillna(0).to_numpy(), k=50)
sigma = np.diag(sigma)
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_item_matrix.mean(axis=1).values.reshape(-1, 1)
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns, index=user_item_matrix.index)
predicted_ratings_df.head()

# 4. Evaluating the Recommendation System
actual_ratings = user_item_matrix.values.flatten()
predicted_ratings_flat = predicted_ratings_df.values.flatten()

# Handle potential NaNs in BOTH actual and predicted ratings
actual_ratings = np.nan_to_num(actual_ratings)  # Replace NaNs with zeros in actual ratings as well
predicted_ratings_flat = np.nan_to_num(predicted_ratings_flat)  # Replace NaNs with zeros

rmse = np.sqrt(mean_squared_error(actual_ratings, predicted_ratings_flat))
print(f'RMSE: {rmse}')

# 5. Making Recommendations
def recommend_jokes(user_id, num_recommendations):
    user_predicted_ratings = predicted_ratings_df.loc[user_id]
    sorted_jokes = user_predicted_ratings.sort_values(ascending=False)
    recommendations = sorted_jokes.head(num_recommendations)
    return recommendations

recommendations = recommend_jokes(user_id=1, num_recommendations=5)
print(recommendations)

RMSE: 3.158454999198309
joke_id
79    9.883790
26    9.556643
62    9.478234
66    9.219894
25    9.134533
Name: 1, dtype: float64
