In [2]:
# ✅ Step 1: Import Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import os, zipfile, urllib.request

# ✅ Step 2: Download and extract MovieLens dataset automatically
dataset_url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
zip_path = "ml-latest-small.zip"
extract_folder = "ml-latest-small"

# Download the zip if not present
if not os.path.exists(zip_path):
    print("Downloading dataset...")
    urllib.request.urlretrieve(dataset_url, zip_path)

# Extract if not already extracted
if not os.path.exists(extract_folder):
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(".")

# ✅ Step 3: Load ratings and movies data
ratings = pd.read_csv(f"{extract_folder}/ratings.csv")
movies = pd.read_csv(f"{extract_folder}/movies.csv")

# ✅ Step 4: Merge to get movie titles with ratings
data = pd.merge(ratings, movies, on='movieId')

# ✅ Step 5: Create user-item rating matrix
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating').fillna(0)

# ✅ Step 6: Compute cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

# ✅ Step 7: Recommendation Function
def recommend_movies(user_id, top_n_users=5, top_n_movies=5):
    if user_id not in user_movie_matrix.index:
        return "User ID not found in dataset."

    # Step 1: Find similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:top_n_users+1].index

    # Step 2: Movies already rated by target user
    user_rated = set(user_movie_matrix.columns[user_movie_matrix.loc[user_id] > 0])

    # Step 3: Get ratings from similar users
    similar_users_ratings = user_movie_matrix.loc[similar_users]

    # Step 4: Average and recommend unrated movies
    avg_ratings = similar_users_ratings.mean().sort_values(ascending=False)
    recommendations = avg_ratings[~avg_ratings.index.isin(user_rated)]

    return recommendations.head(top_n_movies)

# ✅ Step 8: Try with a user (e.g., user 1)
print("🎯 Top movie recommendations for User 1:")
print(recommend_movies(user_id=1))



Downloading dataset...
Extracting dataset...
🎯 Top movie recommendations for User 1:
title
Aliens (1986)                       4.8
Hunt for Red October, The (1990)    4.3
Blade Runner (1982)                 4.0
Godfather, The (1972)               4.0
Die Hard (1988)                     4.0
dtype: float64
