In [3]:
import os
import requests
import zipfile

# URL of the MovieLens dataset
url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
zip_file = 'ml-latest-small.zip'

# Download the dataset
response = requests.get(url)
with open(zip_file, 'wb') as file:
    file.write(response.content)

# Extract the dataset
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall()

# Remove the zip file
os.remove(zip_file)


In [4]:
import pandas as pd

# Load the dataset
ratings_file = 'ml-latest-small/ratings.csv'
movies_file = 'ml-latest-small/movies.csv'

ratings = pd.read_csv(ratings_file)
movies = pd.read_csv(movies_file)

# Merge ratings and movies dataframes
data = pd.merge(ratings, movies, on='movieId')


In [5]:
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')


In [6]:
from sklearn.metrics.pairwise import cosine_similarity

# User-based collaborative filtering
user_similarity = cosine_similarity(user_movie_matrix.fillna(0))
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)


In [7]:
def recommend_movies(user_id, user_movie_matrix, user_similarity_df, num_recommendations=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]  # Exclude the user itself
    similar_users_ratings = user_movie_matrix.loc[similar_users]

    # Calculate weighted average of ratings from similar users
    weighted_ratings = similar_users_ratings.T.dot(user_similarity_df[user_id].sort_values(ascending=False)[1:])

    # Exclude movies already rated by the user
    user_rated_movies = user_movie_matrix.loc[user_id].dropna().index
    recommendations = weighted_ratings.drop(user_rated_movies).sort_values(ascending=False).head(num_recommendations)
    return recommendations

# Example usage
user_id = 1
recommendations = recommend_movies(user_id, user_movie_matrix, user_similarity_df)
print(f"Recommendations for User {user_id}:")
print(recommendations)


Recommendations for User 1:
title
'71 (2014)                                NaN
'Hellboy': The Seeds of Creation (2004)   NaN
'Round Midnight (1986)                    NaN
'Salem's Lot (2004)                       NaN
'Til There Was You (1997)                 NaN
dtype: float64
