In [None]:
pip install pandas numpy scikit-learn surprise


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357248 sha256=f2ca0cc16cd6042483a7a0bab1bf0be4c13fd2a0b5cc3f872f9dfe00ce9980ae
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully inst

In [None]:
pip install ipywidgets


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi
Successfully installed jedi-0.19.1


In [None]:
import io
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Create file upload widgets
upload_movies = widgets.FileUpload(accept='.csv', multiple=False)
upload_ratings = widgets.FileUpload(accept='.csv', multiple=False)

# Display the widgets
display(upload_movies)
display(upload_ratings)

# Function to convert the uploaded file content to a DataFrame
def load_uploaded_file(uploaded):
    content = next(iter(uploaded.values()))
    return pd.read_csv(io.BytesIO(content['content']))

# Load the datasets once the files are uploaded
def load_datasets(change):
    if upload_movies.value and upload_ratings.value:
        movies = load_uploaded_file(upload_movies.value)
        ratings = load_uploaded_file(upload_ratings.value)

        # Merge the ratings and movies dataframes on the 'movieId' column
        merged_data = pd.merge(ratings, movies, on='movieId')

        # Display the first few rows of the merged dataframe
        display(merged_data.head())

# Observe changes in the upload widgets
upload_movies.observe(load_datasets, names='value')
upload_ratings.observe(load_datasets, names='value')


FileUpload(value={}, accept='.csv', description='Upload')

FileUpload(value={}, accept='.csv', description='Upload')

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,9,16,4.0,842686699,Casino (1995),Crime|Drama
2,12,16,1.5,1144396284,Casino (1995),Crime|Drama
3,24,16,4.0,963468757,Casino (1995),Crime|Drama
4,29,16,3.0,836820223,Casino (1995),Crime|Drama


In [None]:
import pandas as pd
from google.colab import files

# Upload files
uploaded = files.upload()

# Assume 'movies.csv' and 'ratings.csv' are uploaded
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# Merge datasets
data = pd.merge(ratings, movies, on='movieId')


Saving movies.csv to movies (2).csv
Saving ratings.csv to ratings (1).csv


In [None]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

# Load data into Surprise
reader = Reader(rating_scale=(0.5, 5.0))
data_surprise = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

# Build and train the model using SVD algorithm
algo = SVD()
cross_validate(algo, data_surprise, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Train on the full dataset
trainset = data_surprise.build_full_trainset()
algo.fit(trainset)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8750  0.8673  0.8729  0.8674  0.8639  0.8693  0.0041  
MAE (testset)     0.6743  0.6686  0.6718  0.6696  0.6634  0.6696  0.0036  
Fit time          1.58    1.65    1.58    1.59    2.32    1.75    0.29    
Test time         0.24    0.13    0.23    0.14    0.15    0.18    0.05    


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7b5372c946a0>

In [None]:
def get_top_n_recommendations(user_id, n=10):
    # Get all movie IDs
    movie_ids = data['movieId'].unique()

    # Predict ratings for all movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in movie_ids]

    # Sort predictions by estimated rating
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)

    # Get top N recommendations
    top_n = recommendations[:n]

    # Get movie titles for top N recommendations
    top_n_movies = [movies[movies['movieId'] == int(pred.iid)]['title'].values[0] for pred in top_n]

    return top_n_movies

# Get top 10 recommendations for user with ID 1
print(get_top_n_recommendations(user_id=1, n=10))


['Monty Python and the Holy Grail (1975)', 'Godfather, The (1972)', 'Princess Mononoke (Mononoke-hime) (1997)', 'Spirited Away (Sen to Chihiro no kamikakushi) (2001)', "Schindler's List (1993)", 'Godfather: Part II, The (1974)', 'Pi (1998)', 'Silence of the Lambs, The (1991)', 'Departed, The (2006)', 'Shawshank Redemption, The (1994)']
