# Recommender System

## 1. Importing Library and Data Merging

In [52]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Files used
movies_df = pd.read_csv("movies.csv")
ratings_df = pd.read_csv("ratings.csv")

# Merging both the above files on'movieId' column
df = pd.merge(ratings_df, movies_df, on="movieId")

In [53]:
df

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...,...
100831,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller
100832,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama
100833,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller
100834,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller


## 2. Dropping the timestamp columns (not in use)

In [54]:
del df['timestamp']

In [55]:
df.head()

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


## 3. Transforming dataframes into user matrix based on Movie and Rating

In [56]:
user_movie_matrix = pd.pivot_table(df, values = 'rating', index='movieId', columns = 'userId')
user_movie_matrix

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,4.0,,4.0,,,...,,4.0,,5.0,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,2.0,,
4,,,,,,3.0,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,3.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,,,,,,,,,,,...,,,,,,,,,,
193583,,,,,,,,,,,...,,,,,,,,,,
193585,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


## 4. Imputation for the NULL values

In [57]:
user_movie_matrix = user_movie_matrix.fillna(0)
user_movie_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


## 5. Using Collabrative Filtering on Movie's ratings based on the User

In [58]:
user_user_matrix = user_movie_matrix.corr(method='pearson')
user_user_matrix

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.019400,0.053056,0.176920,0.120866,0.104418,0.143793,0.128547,0.055268,-0.000298,...,0.066256,0.149942,0.186978,0.056530,0.134412,0.121981,0.254200,0.262241,0.085434,0.098719
2,0.019400,1.000000,-0.002594,-0.003804,0.013183,0.016257,0.021567,0.023750,-0.003448,0.061880,...,0.198549,0.010888,-0.004030,-0.005345,-0.007919,0.011299,0.005813,0.032730,0.024373,0.089329
3,0.053056,-0.002594,1.000000,-0.004556,0.001887,-0.004577,-0.005634,0.001703,-0.003111,-0.005501,...,0.000150,-0.000585,0.011211,-0.004822,0.003678,-0.003246,0.012885,0.008096,-0.002963,0.015962
4,0.176920,-0.003804,-0.004556,1.000000,0.121018,0.065719,0.100602,0.054235,0.002417,0.015615,...,0.072848,0.114287,0.281866,0.039699,0.065493,0.164831,0.115118,0.116861,0.023930,0.062523
5,0.120866,0.013183,0.001887,0.121018,1.000000,0.294138,0.101725,0.426576,-0.004185,0.023471,...,0.061912,0.414931,0.095394,0.254117,0.141077,0.090158,0.145764,0.122607,0.258289,0.040372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.121981,0.011299,-0.003246,0.164831,0.090158,0.047506,0.172499,0.081913,0.057989,0.054877,...,0.153892,0.084208,0.224637,0.035251,0.106752,1.000000,0.115999,0.188354,0.052385,0.093851
607,0.254200,0.005813,0.012885,0.115118,0.145764,0.142169,0.173293,0.178133,0.003257,-0.004809,...,0.080034,0.187588,0.173025,0.126267,0.101138,0.115999,1.000000,0.258245,0.142533,0.098518
608,0.262241,0.032730,0.008096,0.116861,0.122607,0.137954,0.305439,0.175912,0.086229,0.048373,...,0.136316,0.174069,0.164479,0.133734,0.144896,0.188354,0.258245,1.000000,0.109563,0.248944
609,0.085434,0.024373,-0.002963,0.023930,0.258289,0.207124,0.084494,0.421627,-0.003937,0.014983,...,0.029664,0.331053,0.046000,0.232115,0.089810,0.052385,0.142533,0.109563,1.000000,0.033713


## 6. Recommending Top 5 Movies for Sample userId = 4

In [59]:
user_user_matrix.loc[4].sort_values(ascending=False).head(5)

userId
4      1.000000
391    0.299328
603    0.281866
156    0.273834
275    0.261452
Name: 4, dtype: float64

## 7. Converting the above data into a new DF and removing the userId = 4 (to check the similar user with the rating 1)

In [60]:
df_2 = pd.DataFrame(user_user_matrix.loc[4].sort_values(ascending=False).head(5))
df_2 = df_2.reset_index()
df_2.columns = ['userId', 'similarity']

In [61]:
df_2 = df_2.drop((df_2[df_2['userId']==4]).index)
df_2

Unnamed: 0,userId,similarity
1,391,0.299328
2,603,0.281866
3,156,0.273834
4,275,0.261452


## 8. Creating a new DF which has all similar users based on their movie ratings

In [62]:
final_df = df_2.merge(df, left_on = 'userId',right_on = 'userId', how ='left')
final_df

Unnamed: 0,userId,similarity,movieId,rating,title,genres
0,391,0.299328,1,3.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,391,0.299328,6,4.0,Heat (1995),Action|Crime|Thriller
2,391,0.299328,47,3.0,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
3,391,0.299328,50,4.0,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
4,391,0.299328,70,1.0,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller
...,...,...,...,...,...,...
2125,275,0.261452,5745,5.0,"Four Seasons, The (1981)",Comedy|Drama
2126,275,0.261452,5884,1.0,Chopper Chicks in Zombietown (1989),Comedy|Horror
2127,275,0.261452,5938,5.0,Deathtrap (1982),Comedy|Crime|Mystery|Thriller
2128,275,0.261452,6122,5.0,Richard Pryor Live on the Sunset Strip (1982),Comedy|Documentary


## 9. Creating the higher order in recommendation by multiplying the user similarity and movie rating

In [63]:
final_df['score'] = final_df['similarity']*final_df['rating']
final_df

Unnamed: 0,userId,similarity,movieId,rating,title,genres,score
0,391,0.299328,1,3.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0.897984
1,391,0.299328,6,4.0,Heat (1995),Action|Crime|Thriller,1.197312
2,391,0.299328,47,3.0,Seven (a.k.a. Se7en) (1995),Mystery|Thriller,0.897984
3,391,0.299328,50,4.0,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,1.197312
4,391,0.299328,70,1.0,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller,0.299328
...,...,...,...,...,...,...,...
2125,275,0.261452,5745,5.0,"Four Seasons, The (1981)",Comedy|Drama,1.307259
2126,275,0.261452,5884,1.0,Chopper Chicks in Zombietown (1989),Comedy|Horror,0.261452
2127,275,0.261452,5938,5.0,Deathtrap (1982),Comedy|Crime|Mystery|Thriller,1.307259
2128,275,0.261452,6122,5.0,Richard Pryor Live on the Sunset Strip (1982),Comedy|Documentary,1.307259


## 10. Now, creating a DF for all the movies which has already watched by out target userId = 4

In [64]:
watched_df = df[df['userId'] == 4]
watched_df

Unnamed: 0,userId,movieId,rating,title,genres
370,4,47,2.0,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
1499,4,235,2.0,Ed Wood (1994),Comedy|Drama
1569,4,260,5.0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
1820,4,296,1.0,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2957,4,441,1.0,Dazed and Confused (1993),Comedy
...,...,...,...,...,...
24886,4,4765,5.0,L.I.E. (2001),Drama
24889,4,4881,3.0,"Man Who Wasn't There, The (2001)",Crime|Drama
24906,4,4896,4.0,Harry Potter and the Sorcerer's Stone (a.k.a. ...,Adventure|Children|Fantasy
25013,4,4902,4.0,"Devil's Backbone, The (Espinazo del diablo, El...",Drama|Fantasy|Horror|Thriller|War


## 11. Next, removing already watched movies from our recommendations, so we can not suggest the same movie again

In [65]:
cond = final_df['movieId'].isin(watched_df['movieId'])
final_df.drop(final_df[cond].index, inplace = True)

In [66]:
recommended_df = final_df.sort_values(by = 'score', ascending = False)['title'].head(5)
recommended_df = recommended_df.reset_index()
del recommended_df['index']

## 12. Top 5 recommended movies for userId = 4

In [67]:
recommended_df

Unnamed: 0,title
0,"Amelie (Fabuleux destin d'Amélie Poulain, Le) ..."
1,Cinema Paradiso (Nuovo cinema Paradiso) (1989)
2,Trainspotting (1996)
3,"Red Violin, The (Violon rouge, Le) (1998)"
4,Boys Don't Cry (1999)


## With the help of ChatGPT, Made the code Dynamic so that it shows for different users and runs on combined run stage

In [68]:
import pandas as pd

def recommend_movies(user_id, top_n=5):
    # Load data
    movies_df = pd.read_csv("movies.csv")
    ratings_df = pd.read_csv("ratings.csv")

    # Merging both the above files on 'movieId' column
    df = pd.merge(ratings_df, movies_df, on="movieId")

    # Dropping the timestamp columns (not in use)
    del df['timestamp']

    # Transforming dataframes into user matrix based on Movie and Rating
    user_movie_matrix = pd.pivot_table(df, values='rating', index='movieId', columns='userId')

    # Imputation for the NULL values
    user_movie_matrix = user_movie_matrix.fillna(0)

    # Using Collabrative Filtering on Movie's ratings based on the User
    user_user_matrix = user_movie_matrix.corr(method='pearson')

    # Recommending Top N Movies for the specified userId
    recommended_movies = user_user_matrix.loc[user_id].sort_values(ascending=False).head(top_n)

    # Converting the above data into a new DF and removing the specified userId
    df_2 = pd.DataFrame(recommended_movies)
    df_2 = df_2.reset_index()
    df_2.columns = ['userId', 'similarity']
    df_2 = df_2.drop((df_2[df_2['userId'] == user_id]).index)

    # Creating a new DF which has all similar users based on their movie ratings
    final_df = df_2.merge(df, left_on='userId', right_on='userId', how='left')

    # Creating the higher order in recommendation by multiplying the user similarity and movie rating
    final_df['score'] = final_df['similarity'] * final_df['rating']

    # Creating a DF for all the movies which has already watched by the target userId
    watched_df = df[df['userId'] == user_id]

    # Removing already watched movies from recommendations
    cond = final_df['movieId'].isin(watched_df['movieId'])
    final_df.drop(final_df[cond].index, inplace=True)

    # Getting the top N recommended movies for the specified userId
    recommended_df = final_df.sort_values(by='score', ascending=False)['title'].head(top_n)

    return recommended_df.reset_index(drop=True)

# Example usage to get top 5 recommended movies for userId = 4
user_id = 4
recommended_movies = recommend_movies(user_id)
print(recommended_movies)

0    Amelie (Fabuleux destin d'Amélie Poulain, Le) ...
1       Cinema Paradiso (Nuovo cinema Paradiso) (1989)
2                                 Trainspotting (1996)
3            Red Violin, The (Violon rouge, Le) (1998)
4                                Boys Don't Cry (1999)
Name: title, dtype: object
