In [43]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from math import sqrt

# User ratings
user_ratings = [
    {'title': 'Breakfast Club, The', 'rating': 4},
    {'title': 'Toy Story', 'rating': 2.5},
    {'title': 'Jumanji', 'rating': 3},
    {'title': "Pulp Fiction", 'rating': 4.5},
    {'title': 'Akira', 'rating': 5}
]

# Convert user ratings to a DataFrame
input_movie = pd.DataFrame(user_ratings)
# Load movie and rating data
movie = pd.read_csv('../data/movies.csv')
rating = pd.read_csv('../data/ratings.csv')
title_year = movie['title'].str.extract(r'^(.*?)\s*\((\d{4})\)\s*$')
movie['title'] = title_year[0].str.strip()
movie['year'] = title_year[1]
# Add movieId to user ratings
input_movie = input_movie.merge(movie[['title', 'movieId']], on='title', how='inner')
print(input_movie)


# Filter users who have watched movies that the input has watched
users = rating[rating['movieId'].isin(input_movie['movieId'].tolist())]

# Group by userId
user_subset_group = users.groupby(['userId'])

# Calculate Pearson correlation
pearson_cor_dict = {}
for name, group in user_subset_group:
    group = group.sort_values(by='movieId')
    input_movie = input_movie.sort_values(by='movieId')

    n = len(group)
    temp = input_movie[input_movie['movieId'].isin(group['movieId'].tolist())]
    temp_rating_list = temp['rating'].tolist()
    temp_group_list = group['rating'].tolist()

    sxx = sum([i**2 for i in temp_rating_list]) - pow(sum(temp_rating_list), 2) / float(n)
    syy = sum([i**2 for i in temp_group_list]) - pow(sum(temp_group_list), 2) / float(n)
    sxy = sum(i * j for i, j in zip(temp_rating_list, temp_group_list)) - sum(temp_rating_list) * sum(temp_group_list) / float(n)

    if sxx != 0 and syy != 0:
        pearson_cor_dict[name] = sxy / sqrt(sxx * syy)
    else:
        pearson_cor_dict[name] = 0

# Create a DataFrame from the Pearson correlation dictionary
pearson_df = pd.DataFrame.from_dict(pearson_cor_dict, orient='index')
pearson_df.columns = ['similarityIndex']
pearson_df['userId'] = pearson_df.index
pearson_df.index = range(len(pearson_df))


# Select top users
top_users = pearson_df.sort_values(by='similarityIndex', ascending=False)[:50]


# Merge with rating data
top_users['userId'] = top_users['userId'].astype(str)

top_users['userId'] = top_users['userId'].str.extract(r'\((\d+),\)').astype(int).astype(str)

print(top_users.head())
rating['userId'] = rating['userId'].astype(str)
top_users_rating = top_users.merge(rating, left_on='userId', right_on='userId', how='inner')

# Calculate weighted rating
top_users_rating['weightedRating'] = top_users_rating['similarityIndex'] * top_users_rating['rating']

# Group by movieId and calculate sums
temp_top_users_rating = top_users_rating.groupby('movieId').sum()[['similarityIndex', 'weightedRating']]
temp_top_users_rating.columns = ['sum_similarityIndex', 'sum_weightedRating']

# Create an empty dataframe
recommendation_df = pd.DataFrame()

# Calculate the weighted average recommendation score
recommendation_df['weighted_average_recommendation_score'] = temp_top_users_rating['sum_weightedRating'] / temp_top_users_rating['sum_similarityIndex']
recommendation_df['movieId'] = temp_top_users_rating.index

# Sort recommendations
recommendation_df = recommendation_df.sort_values(by='weighted_average_recommendation_score', ascending=False)

# Matching movie information
recommendations = movie.loc[movie['movieId'].isin(recommendation_df.head(10)['movieId'].tolist())]

# Display the final recommendations
print(recommendations[['title', 'genres']])



                 title  rating  movieId
0  Breakfast Club, The     4.0     1968
1            Toy Story     2.5        1
2              Jumanji     3.0        2
3         Pulp Fiction     4.5      296
4                Akira     5.0     1274
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
     similarityIndex  userId
256              1.0  (370,)
393              1.0  (580,)
312              1.0  (446,)
123              1.0  (178,)
175              1.0  (254,)
     similarityIndex userId
256              1.0    370
393              1.0    580
312              1.0    446
123              1.0    178
175              1.0    254
                                     title                   genres
259                                 Priest                    Drama
1529                            Roger & Me            

In [2]:
input_movie

Unnamed: 0,title,rating
0,"Breakfast Club, The",4.0
1,Toy Story,2.5
2,Jumanji,3.0
3,Pulp Fiction,4.5
4,Akira,5.0


In [8]:
movie

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995
...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic,Action|Animation|Comedy|Fantasy,2017
9738,193583,No Game No Life: Zero,Animation|Comedy|Fantasy,2017
9739,193585,Flint,Drama,2017
9740,193587,Bungo Stray Dogs: Dead Apple,Action|Animation,2018


Unnamed: 0,title,rating,movieId
0,"Breakfast Club, The",4.0,1968
1,Toy Story,2.5,1
2,Jumanji,3.0,2
3,Pulp Fiction,4.5,296
4,Akira,5.0,1274


Unnamed: 0,similarityIndex
"(1,)",-1.000000
"(4,)",-1.000000
"(5,)",1.000000
"(6,)",-1.000000
"(7,)",0.000000
...,...
"(606,)",0.823329
"(607,)",-0.240192
"(608,)",0.836718
"(609,)",1.000000


                                                title  \
556         Song of the Little Road (Pather Panchali)   
557                   World of Apu, The (Apur Sansar)   
581                                          Dead Man   
585   Wallace & Gromit: The Best of Aardman Animation   
1472                                    Exorcist, The   
3758                             Seems Like Old Times   
5908           Sisterhood of the Traveling Pants, The   
5942                                 Calcium Kid, The   
6023                   Goal! The Dream Begins (Goal!)   
8212                 Craig Ferguson: I'm Here To Help   

                          genres  
556                        Drama  
557                        Drama  
581        Drama|Mystery|Western  
585   Adventure|Animation|Comedy  
1472              Horror|Mystery  
3758              Comedy|Romance  
5908      Adventure|Comedy|Drama  
5942                      Comedy  
6023                       Drama  
8212          Comedy|D