In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Step 2: Load Dataset
def load_data():
    ratings_url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.data'
    movies_url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.item'

    ratings = pd.read_csv(ratings_url, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
    movies = pd.read_csv(movies_url, sep='|', encoding='latin-1', usecols=[0, 1], names=['item_id', 'title'])

    return pd.merge(ratings, movies, on='item_id')

df = load_data()

# Step 3: Create User-Item Matrix
def create_user_item_matrix(df):
    user_item = df.pivot_table(index='user_id', columns='title', values='rating')
    user_item.fillna(0, inplace=True)
    return user_item

user_item_matrix = create_user_item_matrix(df)

# Step 4: Compute Cosine Similarity
def calculate_user_similarity(user_item_matrix):
    similarity_matrix = cosine_similarity(user_item_matrix)
    return pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

user_similarity_df = calculate_user_similarity(user_item_matrix)

# Step 5: Get Top-N Similar Users
def get_top_similar_users(user_id, similarity_df, top_n=5):
    similar_users = similarity_df[user_id].sort_values(ascending=False).drop(user_id)
    return similar_users.head(top_n)

# Step 6: Recommend Movies
def recommend_movies(user_id, user_item_matrix, similarity_df, top_n=5, num_recommendations=5):
    similar_users = get_top_similar_users(user_id, similarity_df, top_n)
    # Initialize weighted_scores with zeros for all movies
    weighted_scores = pd.Series(0, index=user_item_matrix.columns, dtype='float64')

    for sim_user, sim_score in similar_users.items():
        weighted_scores = weighted_scores.add(user_item_matrix.loc[sim_user] * sim_score, fill_value=0)

    watched = user_item_matrix.loc[user_id]
    watched = watched[watched > 0].index
    weighted_scores = weighted_scores.drop(watched, errors='ignore')

    return weighted_scores.sort_values(ascending=False).head(num_recommendations)

# Step 7: Run the Recommendation System
test_user = 100
recommendations = recommend_movies(test_user, user_item_matrix, user_similarity_df, top_n=5, num_recommendations=5)

print(f"\nTop 5 Recommendations for User {test_user}:\n")
print(recommendations)



Top 5 Recommendations for User 100:

title
Cop Land (1997)                 12.494812
Devil's Advocate, The (1997)    11.330446
Edge, The (1997)                10.140343
Alien: Resurrection (1997)       8.367077
Murder at 1600 (1997)            8.176302
dtype: float64
