<h1>  Movies Recommendation System using Collaborative Filtering <h1>

In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime

from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

pd.set_option('display.max_rows',50)
pd.set_option('display.max_columns', 50)

In [2]:
info = pd.read_csv('C:\Users\bhava\Downloads/u.info' , sep=" ", header = None)
info.columns = ['Counts' , 'Type']

occupation = pd.read_csv('C:\Users\bhava\Downloads/u.occupation' , header = None)
occupation.columns = ['Occupations']

items = pd.read_csv('C:\Users\bhava\Downloads/u.item' , header = None , sep = "|" , encoding='latin-1')
items.columns = ['movie id' , 'movie title' , 'release date' , 'video release date' ,
              'IMDb URL' , 'unknown' , 'Action' , 'Adventure' , 'Animation' ,
              'Childrens' , 'Comedy' , 'Crime' , 'Documentary' , 'Drama' , 'Fantasy' ,
              'Film_Noir' , 'Horror' , 'Musical' , 'Mystery' , 'Romance' , 'Sci_Fi' ,
              'Thriller' , 'War' , 'Western']

data = pd.read_csv('C:\Users\bhava\Downloads/u.data', header= None , sep = '\t')
user = pd.read_csv('C:\Users\bhava\Downloads/u.user', header= None , sep = '|')
genre = pd.read_csv('C:\Users\bhava\Downloads/u.genre', header= None , sep = '|' )

genre.columns = ['Genre' , 'genre_id']
data.columns = ['user id' , 'movie id' , 'rating' , 'timestamp']
user.columns = ['user id' , 'age' , 'gender' , 'occupation' , 'zip code']

In [3]:

grouping_user = user

In [4]:
display(info)
display(user.shape)
display(items.shape)
display(data.shape)

Unnamed: 0,Counts,Type
0,943,users
1,1682,items
2,100000,ratings


(943, 5)

(1682, 24)

(100000, 4)

In [5]:

data = data.merge(user , on='user id')
data = data.merge(items , on='movie id')

In [6]:

def convert_time(x):
    return datetime.utcfromtimestamp(x).strftime('%d-%m-%Y')

def date_diff(date):
    d1 = date['release date'].split('-')[2]
    d2 = date['rating time'].split('-')[2]
    return abs(int(d2) - int(d1))


data.dropna(subset = ['release date'] , inplace = True)

user_details = data.groupby('user id').size().reset_index()
user_details.columns = ['user id' , 'number of user ratings']
data = data.merge(user_details , on='user id')

movie_details = data.groupby('movie id').size().reset_index()
movie_details.columns = ['movie id' , 'number of movie ratings']
data = data.merge(movie_details , on='movie id')

user_details = data.groupby('user id')['rating'].agg('mean').reset_index()
user_details.columns = ['user id' , 'average of user ratings']
data = data.merge(user_details , on='user id')

movie_details = data.groupby('movie id')['rating'].agg('mean').reset_index()
movie_details.columns = ['movie id' , 'average of movie ratings']
data = data.merge(movie_details , on='movie id')


user_details = data.groupby('user id')['rating'].agg('std').reset_index()
user_details.columns = ['user id' , 'std of user ratings']
data = data.merge(user_details , on='user id')

movie_details = data.groupby('movie id')['rating'].agg('std').reset_index()
movie_details.columns = ['movie id' , 'std of movie ratings']
data = data.merge(movie_details , on='movie id')

data['age_group'] = data['age']//10
data['rating time'] = data.timestamp.apply(convert_time)
data['time difference'] = data[['release date' , 'rating time']].apply(date_diff, axis =1)

data['total rating'] = (data['number of user ratings']*data['average of user ratings'] + data['number of movie ratings']*data['average of movie ratings'])/(data['number of movie ratings']+data['number of user ratings'])
data['rating_new'] = data['rating'] - data['total rating']

del movie_details
del user_details

In [7]:
pivot_table_user = pd.pivot_table(data=data,values='rating_new',index='user id',columns='movie id')
pivot_table_user = pivot_table_user.fillna(0)
pivot_table_movie = pd.pivot_table(data=data,values='rating',index='user id',columns='movie id')
pivot_table_movie = pivot_table_movie.fillna(0)

In [8]:
user_based_similarity = 1 - pairwise_distances(pivot_table_user.values, metric="cosine")
movie_based_similarity = 1 - pairwise_distances(pivot_table_movie.T.values, metric="cosine")

In [9]:
user_based_similarity = pd.DataFrame(user_based_similarity)
user_based_similarity.columns = user_based_similarity.columns+1
user_based_similarity.index = user_based_similarity.index+1

movie_based_similarity = pd.DataFrame(movie_based_similarity)
movie_based_similarity.columns = movie_based_similarity.columns+1
movie_based_similarity.index = movie_based_similarity.index+1

In [10]:
def rec_movie(movie_id, num_movies=10):
    temp_table = pd.DataFrame(columns=items.columns)
    movies = movie_based_similarity[movie_id].sort_values(ascending=False).index.tolist()[:num_movies + 1]
    for mov in movies:
        temp_table = pd.concat([temp_table, items[items['movie id'] == mov]], ignore_index=True)
    return temp_table

In [11]:
display(rec_movie(176))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,176,Aliens (1986),01-Jan-1986,,http://us.imdb.com/M/title-exact?Aliens%20(1986),0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0
1,183,Alien (1979),01-Jan-1979,,http://us.imdb.com/M/title-exact?Alien%20(1979),0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0
2,195,"Terminator, The (1984)",01-Jan-1984,,"http://us.imdb.com/M/title-exact?Terminator,%2...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
3,174,Raiders of the Lost Ark (1981),01-Jan-1981,,http://us.imdb.com/M/title-exact?Raiders%20of%...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,96,Terminator 2: Judgment Day (1991),01-Jan-1991,,http://us.imdb.com/M/title-exact?Terminator%20...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
5,98,"Silence of the Lambs, The (1991)",01-Jan-1991,,http://us.imdb.com/M/title-exact?Silence%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
6,228,Star Trek: The Wrath of Khan (1982),01-Jan-1982,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
7,172,"Empire Strikes Back, The (1980)",01-Jan-1980,,http://us.imdb.com/M/title-exact?Empire%20Stri...,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0
8,89,Blade Runner (1982),01-Jan-1982,,http://us.imdb.com/M/title-exact?Blade%20Runne...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
9,79,"Fugitive, The (1993)",01-Jan-1993,,"http://us.imdb.com/M/title-exact?Fugitive,%20T...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [12]:
display(rec_movie(500))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,500,Fly Away Home (1996),13-Sep-1996,,http://us.imdb.com/M/title-exact?Fly%20Away%20...,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,417,"Parent Trap, The (1961)",01-Jan-1961,,http://us.imdb.com/M/title-exact?Parent%20Trap...,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,99,Snow White and the Seven Dwarfs (1937),01-Jan-1937,,http://us.imdb.com/M/title-exact?Snow%20White%...,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,403,Batman (1989),01-Jan-1989,,http://us.imdb.com/M/title-exact?Batman%20(1989),0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
4,587,"Hour of the Pig, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Hour%20of%20t...,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0
5,419,Mary Poppins (1964),01-Jan-1964,,http://us.imdb.com/M/title-exact?Mary%20Poppin...,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0
6,418,Cinderella (1950),01-Jan-1950,,http://us.imdb.com/M/title-exact?Cinderella%20...,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
7,431,Highlander (1986),01-Jan-1986,,http://us.imdb.com/M/title-exact?Highlander%20...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,71,"Lion King, The (1994)",01-Jan-1994,,"http://us.imdb.com/M/title-exact?Lion%20King,%...",0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
9,422,Aladdin and the King of Thieves (1996),01-Jan-1996,,http://us.imdb.com/M/title-exact?Aladdin%20and...,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
def get_similar_users(user_id, num_users=100):
    temp_table = pd.DataFrame(columns=user.columns)
    us = user_based_similarity[user_id].sort_values(ascending=False).index.tolist()[:num_users + 1]
    for u in us:
        temp_table = pd.concat([temp_table, user[user['user id'] == u]], ignore_index=True)
    return temp_table

def rec_movie_user_similarity(x, num_movies=10, num_users=100):
    similar_users = get_similar_users(x, num_users)
    similar_users.drop(columns=['age', 'gender', 'occupation', 'zip code'], inplace=True)
    ratings = similar_users.merge(pivot_table_movie, on='user id')
    ratings = ratings.set_index('user id')
    ratings.replace(0, np.nan, inplace=True)
    ratings.drop(ratings.index[0], inplace=True)
    
    ratings_mean = pd.DataFrame(ratings.mean(axis=0, skipna=True), columns=['ratings'])
    df_top_movies = ratings_mean.sort_values(by="ratings", ascending=False).head(num_movies)
    df_top_movies_indexes = df_top_movies.index.tolist()

    mov_table = pd.DataFrame(columns=items.columns)
    for mov in df_top_movies_indexes:
        mov_table = pd.concat([mov_table, items[items['movie id'] == mov]], ignore_index=True)

    return mov_table

In [14]:
display(rec_movie_user_similarity(771))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,1311,Waiting to Exhale (1995),15-Jan-1996,,http://us.imdb.com/M/title-exact?Waiting%20to%...,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
1,1398,Anna (1996),13-Nov-1996,,http://us.imdb.com/M/title-exact?Anna%20(1996),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,745,"Ruling Class, The (1972)",01-Jan-1972,,http://us.imdb.com/M/title-exact?Ruling%20Clas...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1242,"Old Lady Who Walked in the Sea, The (Vieille q...",01-Jan-1991,,http://us.imdb.com/M/title-exact?Vieille%20qui...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,889,"Tango Lesson, The (1997)",01-Jan-1997,,"http://us.imdb.com/M/title-exact?Tango+Lesson,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
5,57,Priest (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Priest%20(1994),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
6,989,Cats Don't Dance (1997),26-Mar-1997,,http://us.imdb.com/M/title-exact?Cats%20Don%27...,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
7,641,Paths of Glory (1957),01-Jan-1957,,http://us.imdb.com/M/title-exact?Paths%20of%20...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
8,793,Crooklyn (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Crooklyn%20(1...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1018,Tie Me Up! Tie Me Down! (1990),01-Jan-1990,,http://us.imdb.com/Title?%A1%C1tame%21+(1990),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [15]:
display(rec_movie_user_similarity(900))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,974,Eye for an Eye (1996),01-Jan-1996,,http://us.imdb.com/Title?Eye+for+an+Eye+(1996),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,1233,Nénette et Boni (1996),01-Jan-1996,,http://us.imdb.com/Title?N%E9nette+et+Boni+(1996),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,785,Only You (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Only%20You%20...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3,1104,C'est arrivé près de chez vous (1992),01-Jan-1992,,http://us.imdb.com/M/title-exact?C%27est%20arr...,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0
4,1099,"Red Firecracker, Green Firecracker (1994)",01-Jan-1994,,http://us.imdb.com/M/title-exact?Pao%20Da%20Sh...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5,1462,"Thieves (Voleurs, Les) (1996)",25-Dec-1996,,"http://us.imdb.com/M/title-exact?Voleurs,%20Le...",0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0
6,1084,Anne Frank Remembered (1995),23-Feb-1996,,http://us.imdb.com/M/title-exact?Anne%20Frank%...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
7,1467,"Saint of Fort Washington, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Saint%20of%20...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
8,500,Fly Away Home (1996),13-Sep-1996,,http://us.imdb.com/M/title-exact?Fly%20Away%20...,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1394,Swept from the Sea (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Swept+from+th...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [16]:

grouping_user["gender"] = grouping_user["gender"].map({"M": 0, "F": 1})


ohe = OneHotEncoder()
occupation_encoded = ohe.fit_transform(grouping_user[["occupation"]]).toarray()
grouping_user = pd.concat([grouping_user, pd.DataFrame(occupation_encoded, columns=ohe.get_feature_names_out(["occupation"]))], axis=1)
grouping_user = grouping_user.drop(["occupation", "zip code"], axis=1)

In [17]:

def get_similar_users_new(x, num_users=100):
    df_similarity = grouping_user.drop("user id", axis=1).copy()
    cosine_similarities = cosine_similarity(df_similarity, df_similarity.iloc[x-1:x])
    
     
    df_cosine = pd.DataFrame({"user id": grouping_user["user id"], "cosine_similarity": cosine_similarities.flatten()})
    
    
    df_top_users = df_cosine.sort_values(by="cosine_similarity", ascending=False).head(num_users)
    df_top_users = df_top_users.drop(["cosine_similarity"], axis=1)
    
    return df_top_users

def rec_movie_user_similarity_new(x, num_movies=10, num_users=100):
    similar_users = get_similar_users_new(x, num_users)
    
    ratings = similar_users.merge(pivot_table_movie , on= 'user id')
    ratings = ratings.set_index('user id')
    ratings.replace(0, np.nan, inplace=True)
    ratings_mean = pd.DataFrame(ratings.mean(axis = 0 , skipna = True), columns=['ratings'])
    df_top_movies = ratings_mean.sort_values(by="ratings", ascending=False).head(num_movies)
    df_top_movies_indexes = df_top_movies.index.tolist()

    mov_table = pd.DataFrame(columns = items.columns)
    for mov in df_top_movies_indexes:
        mov_table = pd.concat([mov_table, items[items['movie id'] == mov]], ignore_index=True)

    return mov_table

In [18]:
display(rec_movie_user_similarity_new(771))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,1137,Beautiful Thing (1996),09-Oct-1996,,http://us.imdb.com/M/title-exact?Beautiful%20T...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1,1375,"Cement Garden, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Cement%20Gard...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,1467,"Saint of Fort Washington, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Saint%20of%20...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,884,Year of the Horse (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Year+of+the+H...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,888,One Night Stand (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?One+Night+Sta...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5,1175,Hugo Pool (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Hugo+Pool+(1997),0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
6,170,Cinema Paradiso (1988),01-Jan-1988,,http://us.imdb.com/M/title-exact?Nuovo%20cinem...,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0
7,1189,Prefontaine (1997),24-Jan-1997,,http://us.imdb.com/M/title-exact?Prefontaine%2...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
8,1396,Stonewall (1995),26-Jul-1996,,http://us.imdb.com/M/title-exact?Stonewall%20(...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9,914,Wild Things (1998),14-Mar-1998,,http://us.imdb.com/Title?Wild+Things+(1998),0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0


In [19]:
display(rec_movie_user_similarity_new(771))

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,1137,Beautiful Thing (1996),09-Oct-1996,,http://us.imdb.com/M/title-exact?Beautiful%20T...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1,1375,"Cement Garden, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Cement%20Gard...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,1467,"Saint of Fort Washington, The (1993)",01-Jan-1993,,http://us.imdb.com/M/title-exact?Saint%20of%20...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,884,Year of the Horse (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Year+of+the+H...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,888,One Night Stand (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?One+Night+Sta...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5,1175,Hugo Pool (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Hugo+Pool+(1997),0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
6,170,Cinema Paradiso (1988),01-Jan-1988,,http://us.imdb.com/M/title-exact?Nuovo%20cinem...,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0
7,1189,Prefontaine (1997),24-Jan-1997,,http://us.imdb.com/M/title-exact?Prefontaine%2...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
8,1396,Stonewall (1995),26-Jul-1996,,http://us.imdb.com/M/title-exact?Stonewall%20(...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9,914,Wild Things (1998),14-Mar-1998,,http://us.imdb.com/Title?Wild+Things+(1998),0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0
