In [None]:
#Importing the required packages

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#Reading the data sets

df1 = pd.read_csv(r'/content/movies.csv')
df2 = pd.read_csv(r'/content/sample_ratings.csv')

In [None]:
df1.head()

Unnamed: 0,itemId,title
0,101,The Matrix
1,102,Inception
2,103,Titanic
3,104,The Godfather
4,105,Avengers: Endgame


In [None]:
df2.head()

Unnamed: 0,userId,itemId,rating
0,1,101,4
1,1,102,5
2,1,103,3
3,2,101,2
4,2,104,4


In [33]:
df = df2.merge(df1, left_on='itemId', right_on='itemId', how='left')
df

Unnamed: 0,userId,itemId,rating,title
0,1,101,4,The Matrix
1,1,102,5,Inception
2,1,103,3,Titanic
3,2,101,2,The Matrix
4,2,104,4,The Godfather
5,3,102,5,Inception
6,3,105,4,Avengers: Endgame
7,4,101,3,The Matrix
8,4,105,5,Avengers: Endgame
9,5,104,4,The Godfather


In [34]:
df.head()

Unnamed: 0,userId,itemId,rating,title
0,1,101,4,The Matrix
1,1,102,5,Inception
2,1,103,3,Titanic
3,2,101,2,The Matrix
4,2,104,4,The Godfather


In [35]:
user_movie_matrix = pd.pivot_table(df, values = 'rating', index='itemId', columns = 'userId')
user_movie_matrix

userId,1,2,3,4,5
itemId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,4.0,2.0,,3.0,
102,5.0,,5.0,,
103,3.0,,,,
104,,4.0,,,4.0
105,,,4.0,5.0,


In [36]:
user_movie_matrix = user_movie_matrix.fillna(0)
user_movie_matrix.head()

userId,1,2,3,4,5
itemId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,4.0,2.0,0.0,3.0,0.0
102,5.0,0.0,5.0,0.0,0.0
103,3.0,0.0,0.0,0.0,0.0
104,0.0,4.0,0.0,0.0,4.0
105,0.0,0.0,4.0,5.0,0.0


In [37]:
#user-based collaborative filtering

user_user_matrix = user_movie_matrix.corr(method='pearson')
user_user_matrix

userId,1,2,3,4,5
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,-0.388514,0.148281,-0.339623,-0.582772
2,-0.388514,1.0,-0.606168,-0.218539,0.875
3,0.148281,-0.606168,1.0,0.244227,-0.404112
4,-0.339623,-0.218539,0.244227,1.0,-0.388514
5,-0.582772,0.875,-0.404112,-0.388514,1.0


In [38]:
#Extarcing top 10 similar users for User2 by sorting them in descending order based on their similarties

user_user_matrix.loc[2].sort_values(ascending=False).head(10)

Unnamed: 0_level_0,2
userId,Unnamed: 1_level_1
2,1.0
5,0.875
4,-0.218539
1,-0.388514
3,-0.606168


In [39]:
#Converting the above data into a DF and removing the user2 itself (A user will be always similar to iteself with a rating of 1)

df_2 = pd.DataFrame(user_user_matrix.loc[2].sort_values(ascending=False).head(10))
df_2 = df_2.reset_index()
df_2.columns = ['userId', 'similarity']

In [40]:
df_2 = df_2.drop((df_2[df_2['userId'] ==2]).index)
df_2

Unnamed: 0,userId,similarity
1,5,0.875
2,4,-0.218539
3,1,-0.388514
4,3,-0.606168


In [41]:
#Now we are creating a new DF which has all the similar users and their rated movies

final_df = df_2.merge(df, left_on='userId', right_on='userId', how='left')
final_df

Unnamed: 0,userId,similarity,itemId,rating,title
0,5,0.875,104,4,The Godfather
1,4,-0.218539,101,3,The Matrix
2,4,-0.218539,105,5,Avengers: Endgame
3,1,-0.388514,101,4,The Matrix
4,1,-0.388514,102,5,Inception
5,1,-0.388514,103,3,Titanic
6,3,-0.606168,102,5,Inception
7,3,-0.606168,105,4,Avengers: Endgame


In [42]:
final_df['score'] = final_df['similarity']*final_df['rating']
final_df

Unnamed: 0,userId,similarity,itemId,rating,title,score
0,5,0.875,104,4,The Godfather,3.5
1,4,-0.218539,101,3,The Matrix,-0.655618
2,4,-0.218539,105,5,Avengers: Endgame,-1.092697
3,1,-0.388514,101,4,The Matrix,-1.554057
4,1,-0.388514,102,5,Inception,-1.942572
5,1,-0.388514,103,3,Titanic,-1.165543
6,3,-0.606168,102,5,Inception,-3.030839
7,3,-0.606168,105,4,Avengers: Endgame,-2.424672


In [43]:
#Creating a df for all the movies which are already watched by our target user2

watched_df = df[df['userId'] == 2]
watched_df

Unnamed: 0,userId,itemId,rating,title
3,2,101,2,The Matrix
4,2,104,4,The Godfather


We will exclude movies the user has already watched from the recommendation list — since it doesn't make sense to recommend the same movie again!

In [44]:
cond = final_df['itemId'].isin(watched_df['itemId'])
final_df.drop(final_df[cond].index, inplace = True)

In [45]:
recommended_df = final_df.sort_values(by = 'score', ascending = False)['title'].head(10)
recommended_df = recommended_df.reset_index()
del recommended_df['index']

Here is the list of top 10 recommended movies for user

In [46]:
recommended_df

Unnamed: 0,title
0,Avengers: Endgame
1,Titanic
2,Inception
3,Avengers: Endgame
4,Inception
