In [13]:
import os 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD


In [2]:
%cd /content/drive/MyDrive/SINA

/content/drive/MyDrive/SINA


In [3]:
!unzip /content/drive/MyDrive/SINA/ml-latest-small.zip

Archive:  /content/drive/MyDrive/SINA/ml-latest-small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  


In [20]:
links = pd.read_csv('/content/drive/MyDrive/SINA/ml-latest-small/links.csv')
movies = pd.read_csv('/content/drive/MyDrive/SINA/ml-latest-small/movies.csv')
ratings = pd.read_csv('/content/drive/MyDrive/SINA/ml-latest-small/ratings.csv')
tags =pd.read_csv('/content/drive/MyDrive/SINA/ml-latest-small/tags.csv')

In [21]:
# Merge movies and ratings dataframes
data = pd.merge(ratings, movies, on='movieId')

# Create a user-item matrix
matrix = data.pivot(index='userId', columns='movieId', values='rating').fillna(0)
matrix_sparse = csr_matrix(matrix.values)


In [22]:
# Perform matrix factorization
model = TruncatedSVD(n_components=100, random_state=42)
matrix_reduced = model.fit_transform(matrix_sparse)


In [23]:
movie_id_title_map = dict(zip(movies['movieId'], movies['title']))


In [32]:
def recommend_movies(user_id, top_n=3):
    # Get the user's predicted ratings
    user_ratings = matrix_reduced[user_id - 1]

    # Get the indices of the top-rated movies
    top_movie_indices = np.argsort(-user_ratings)[:top_n]

    # Get the movie IDs, titles, and predicted ratings of the top-rated movies
    top_movie_ids = []
    top_movie_titles = []
    top_movie_ratings = []
    for index in top_movie_indices:
        movie_id = index + 1
        if movie_id in movie_id_title_map:
            top_movie_ids.append(movie_id)
            top_movie_titles.append(movie_id_title_map[movie_id])
            rating = min(user_ratings[index], 5.0)  # Cap the rating at 5.0 if it exceeds
            top_movie_ratings.append(rating)

    return pd.DataFrame({'userID': [user_id] * len(top_movie_ids), 'title': top_movie_titles, 'movieId': top_movie_ids, 'predicted_rating': top_movie_ratings})


In [33]:
user_id = 1  # Example user ID
top_movies = recommend_movies(user_id)


In [34]:
top_movies

Unnamed: 0,userID,title,movieId,predicted_rating
0,1,Toy Story (1995),1,5.0
1,1,Beautiful Girls (1996),94,5.0
2,1,Screamers (1995),76,5.0
