In [2]:
'''
Build a simple user-based recommender by clustering users with K-Means 
(an unsupervised algorithm) and then suggesting items popular in a user’s cluster.
'''
#1. Setup & Data Loading
#Install dependencies (if not already):

#pip install pandas numpy scikit-learn
#Download the MovieLens 100K dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip

#Extract and place u.data and u.item in your working directory.
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

# Load ratings: user_id, item_id, rating, timestamp
ratings = pd.read_csv(
    'D:\\Downloads\\recommendationenginedataset\\ml-100k\\u.data', sep='\t',
    names=['user_id','item_id','rating','timestamp']
)

# Load movie titles
movies = pd.read_csv(
    'D:\\Downloads\\recommendationenginedataset\\ml-100k\\u.item', sep='|', encoding='latin-1',
    names=[
      'item_id','title','release_date','video_release_date','IMDb_URL',
      'unknown','Action','Adventure','Animation','Children','Comedy','Crime',
      'Documentary','Drama','Fantasy','Film-Noir','Horror','Musical',
      'Mystery','Romance','Sci-Fi','Thriller','War','Western'
    ],
    usecols=['item_id','title']
)

In [4]:
#2. Build & Normalize User–Item Matrix
#Create a matrix where rows are users, columns are movies, and cells are ratings (0 if missing).
# Pivot to user–item matrix
user_item = ratings.pivot_table(
    index='user_id', columns='item_id', values='rating'
).fillna(0)

# Optional: normalize each user vector (e.g., subtract mean) to center ratings
user_means = user_item.replace(0, np.NaN).mean(axis=1)
user_item_norm = user_item.sub(user_means, axis=0).fillna(0)


In [5]:
#3. Cluster Users with K-Means
#Group similar users into k clusters. Here we choose k=10.
k = 10
kmeans = KMeans(n_clusters=k, random_state=42)
# Fit on normalized ratings
clusters = kmeans.fit_predict(user_item_norm)

# Attach cluster labels to each user
user_clusters = pd.DataFrame({
    'user_id': user_item_norm.index,
    'cluster': clusters
}).set_index('user_id')




In [6]:
'''
Generate Recommendations for a User
For a given target_user_id:

Find their cluster.

Compute average ratings for each movie across all users in that cluster.

Exclude movies the target user has already rated.

Recommend top-N movies by cluster average.

'''
def recommend_for_user(target_user_id, n_recommendations=5):
    # Cluster of the target user
    cluster_id = user_clusters.loc[target_user_id, 'cluster']
    # All users in same cluster
    members = user_clusters[
        user_clusters['cluster']==cluster_id
    ].index
    
    # Compute mean rating per movie in cluster
    cluster_ratings = user_item.loc[members].mean(axis=0)
    
    # Movies the user has already rated
    watched = ratings[ratings.user_id==target_user_id].item_id.tolist()
    
    # Filter out watched and sort
    recommendations = (
        cluster_ratings.drop(labels=watched)
                       .sort_values(ascending=False)
                       .head(n_recommendations)
                       .index
    )
    # Map item_ids back to titles
    return movies.set_index('item_id').loc[recommendations].title.tolist()

In [7]:
# Example: recommend 5 movies for user #50
print("Top 5 recommendations for User 50:")
for title in recommend_for_user(50):
    print("-", title)

Top 5 recommendations for User 50:
- Contact (1997)
- Titanic (1997)
- Full Monty, The (1997)
- Air Force One (1997)
- L.A. Confidential (1997)
