In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Import Dataset

In [6]:
columns_names = ["user_id", "item_id", "rating", "timestamp"]

df = pd.read_csv('archive/ratings.dat', sep='\::', names=columns_names)

In [7]:
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [8]:
movies_title=pd.read_csv('archive/movies.dat', sep="::", header=None)

In [10]:
movies_title.shape

(3883, 3)

In [11]:
movies_title=movies_title[[0,1]]
movies_title.columns=['item_id','title']

In [12]:
movies_title.head()

Unnamed: 0,item_id,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


## Working on Given Data

In [13]:
df = pd.merge(df,movies_title, on='item_id')

In [14]:
df

Unnamed: 0,user_id,item_id,rating,timestamp,title
0,1,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975)
1,2,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975)
2,12,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975)
3,15,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975)
4,17,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975)
...,...,...,...,...,...
1000204,5949,2198,5,958846401,Modulations (1998)
1000205,5675,2703,3,976029116,Broken Vessels (1998)
1000206,5780,2845,1,958153068,White Boys (1999)
1000207,5851,3607,5,957756608,One Little Indian (1973)


In [15]:
ratings = pd.DataFrame(df.groupby('title').mean()['rating'])

In [16]:
ratings.head()

Unnamed: 0_level_0,rating
title,Unnamed: 1_level_1
"$1,000,000 Duck (1971)",3.027027
'Night Mother (1986),3.371429
'Til There Was You (1997),2.692308
"'burbs, The (1989)",2.910891
...And Justice for All (1979),3.713568


In [17]:
ratings['num of ratings'] = pd.DataFrame(df.groupby('title').count()['rating'])

In [19]:
ratings.head()

Unnamed: 0_level_0,rating,num of ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",3.027027,37
'Night Mother (1986),3.371429,70
'Til There Was You (1997),2.692308,52
"'burbs, The (1989)",2.910891,303
...And Justice for All (1979),3.713568,199


In [20]:
 moviemat = df.pivot_table(index='user_id', columns='title', values = 'rating')

In [21]:
moviemat

title,"$1,000,000 Duck (1971)",'Night Mother (1986),'Til There Was You (1997),"'burbs, The (1989)",...And Justice for All (1979),1-900 (1994),10 Things I Hate About You (1999),101 Dalmatians (1961),101 Dalmatians (1996),12 Angry Men (1957),...,"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Young and Innocent (1937),Your Friends and Neighbors (1998),Zachariah (1971),"Zed & Two Noughts, A (1985)",Zero Effect (1998),Zero Kelvin (Kj�rlighetens kj�tere) (1995),Zeus and Roxanne (1997),eXistenZ (1999)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,,3.0,,,,,2.0,4.0,,,...,,3.0,,,,,,,,2.0
6037,,,,,,,,,,4.0,...,,,,,,,,,,
6038,,,,,,,,,,,...,,,,,,,,,,
6039,,,,,,,,,,,...,,3.0,,,,,,,,


## Prediction Algorithm

In [22]:
def predict_movies(movie_name): 
    movie_user_ratings = moviemat[movie_name]
    similar_to_movie = moviemat.corrwith(movie_user_ratings)
    
    corr_movie = pd.DataFrame(similar_to_movie, columns=['Correlation'])
    corr_movie.dropna(inplace = True)
    
    corr_movie = corr_movie.join(ratings['num of ratings'])
    predictions = corr_movie[corr_movie['num of ratings']>100].sort_values('Correlation', ascending = False)
    return predictions

In [28]:
predictions = predict_movies('Titanic (1997)')

In [30]:
predictions.head(10)

Unnamed: 0_level_0,Correlation,num of ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Titanic (1997),1.0,1546
Titanic (1953),0.617493,219
"Flintstones in Viva Rock Vegas, The (2000)",0.518153,128
"Bodyguard, The (1992)",0.51187,626
"Apple Dumpling Gang Rides Again, The (1979)",0.510837,109
Home Alone (1990),0.470992,675
"Great Mouse Detective, The (1986)",0.467075,131
Autumn in New York (2000),0.467069,117
"Mirror Has Two Faces, The (1996)",0.46394,188
How Green Was My Valley (1941),0.463331,107
