In [2]:
import pandas as pd

ratings = pd.read_csv('../../data/movielens/ratings.csv')
movies = pd.read_csv('../../data/movielens/movies.csv')


In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [6]:
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


User-based CF

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(user_item_matrix)

def predict_user_rating(user_item_matrix, user_similarity, user_id, movie_id):
    similar_users = user_similarity[user_id]
    rated_movies = user_item_matrix.loc[user_item_matrix.index != user_id, movie_id]
    similarity_sum = similar_users.sum()
    if similarity_sum == 0:
        return 0
    return (similar_users.dot(rated_movies) / similarity_sum)


Item-based CF

In [9]:
item_similarity = cosine_similarity(user_item_matrix.T)

def predict_item_rating(user_item_matrix, item_similarity, user_id, movie_id):
    rated_movies = user_item_matrix.loc[user_id]
    similarity_scores = item_similarity[movie_id]
    similarity_sum = similarity_scores.sum()
    if similarity_sum == 0:
        return 0
    return (similarity_scores.dot(rated_movies) / similarity_sum)


Prediction

In [10]:
def recommend_movies(user_item_matrix, user_similarity, user_id, n_recommendations=10):
    unrated_movies = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] == 0].index
    ratings = [(movie_id, predict_user_rating(user_item_matrix, user_similarity, user_id, movie_id)) for movie_id in unrated_movies]
    ratings.sort(key=lambda x: x[1], reverse=True)
    recommended_movies = ratings[:n_recommendations]
    return recommended_movies


# SVD

In [3]:
!pip3 install scikit-surprise

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-macosx_12_0_arm64.whl size=525388 sha256=622be39a1f1e4ce1a49b5ec3e63286ba220696d724255dd7572c9cce2e47901a
  Stored in directory: /Users/metildachee/Library/Caches/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0

In [1]:
import pandas as pd

ratings = pd.read_csv('../../data/movielens/ratings.csv')
movies = pd.read_csv('../../data/movielens/movies.csv')


In [6]:
from surprise import Dataset, Reader
from surprise import SVD
import pandas as pd

def predict_top_movies(user_past_clicked_items, ratings_file_path, movies_file_path):
    # Load the ratings dataset
    reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(1, 5), skip_lines=1)
    ratings = Dataset.load_from_file(ratings_file_path, reader=reader)

    # Load the movies dataset
    movies = pd.read_csv(movies_file_path)

    # Create a user-item matrix
    user_item_df = pd.DataFrame(user_past_clicked_items, columns=['userId', 'movieId', 'rating'])
    user_item_matrix = user_item_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

    # Use the SVD algorithm
    algo = SVD()

    # Train the model on the full dataset
    trainset = ratings.build_full_trainset()
    algo.fit(trainset)

    # Predict ratings for all movies
    all_movies = movies['movieId'].unique()
    user_id = user_past_clicked_items[0]['userId']
    predictions = [(movie_id, algo.predict(user_id, movie_id).est) for movie_id in all_movies]

    # Sort the predictions by rating in descending order
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Get the top recommended movies
    top_movies = predictions[:10]  # Get top 10 movies
    top_movie_ids = [movie_id for movie_id, _ in top_movies]

    # Get movie titles for the top recommended movies
    top_movie_titles = movies[movies['movieId'].isin(top_movie_ids)]['title']

    return top_movie_titles.tolist()

# Example usage
user_past_clicked_items = [{'userId': 1, 'movieId': 1, 'rating': 4},
                            {'userId': 1, 'movieId': 2, 'rating': 5},
                            {'userId': 1, 'movieId': 3, 'rating': 3}]
ratings_file_path = '../../data/movielens/ratings.csv'
movies_file_path = '../../data/movielens/movies.csv'

recommended_movies = predict_top_movies(user_past_clicked_items, ratings_file_path, movies_file_path)
print(recommended_movies)


['Toy Story (1995)', 'Jumanji (1995)', 'Grumpier Old Men (1995)', 'Waiting to Exhale (1995)', 'Father of the Bride Part II (1995)', 'Heat (1995)', 'Sabrina (1995)', 'Tom and Huck (1995)', 'Sudden Death (1995)', 'GoldenEye (1995)']


Content Based

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import pandas as pd

ratings_file_path = '../../data/movielens/ratings.csv'
movies_file_path = '../../data/movielens/movies.csv'
links_file_path = '../../data/movielens/links.csv'

ratings_df = pd.read_csv(ratings_file_path)
movies_df = pd.read_csv(movies_file_path)

# Merge ratings and movies data
movies_with_ratings = pd.merge(ratings_df, movies_df, on='movieId')

# Create a TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['genres'])


In [13]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

def get_recommendations(title, cosine_sim=cosine_sim):
    idx = movies_df.loc[movies_df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get the top 10 most similar movies
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['title'].iloc[movie_indices].tolist()

In [14]:
print(get_recommendations('Toy Story (1995)'))

['Antz (1998)', 'Toy Story 2 (1999)', 'Adventures of Rocky and Bullwinkle, The (2000)', "Emperor's New Groove, The (2000)", 'Monsters, Inc. (2001)', 'Wild, The (2006)', 'Shrek the Third (2007)', 'Tale of Despereaux, The (2008)', 'Asterix and the Vikings (Astérix et les Vikings) (2006)', 'Turbo (2013)']
