# Popularity Based Recommendation System

In [1]:
import numpy as np
import pandas as pd

In [2]:
movie_names=pd.read_csv('movies.csv')
movie_names.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
ratings_data=pd.read_csv('ratings.csv')
ratings_data

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


In [4]:
movie_data=pd.merge(ratings_data, movie_names, on='movieId')

In [5]:
movie_data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,31,2.5,1260759144,Dangerous Minds (1995),Drama
1,7,31,3.0,851868750,Dangerous Minds (1995),Drama
2,31,31,4.0,1273541953,Dangerous Minds (1995),Drama
3,32,31,4.0,834828440,Dangerous Minds (1995),Drama
4,36,31,3.0,847057202,Dangerous Minds (1995),Drama
...,...,...,...,...,...,...
99999,664,64997,2.5,1343761859,War of the Worlds (2005),Action|Sci-Fi
100000,664,72380,3.5,1344435977,"Box, The (2009)",Drama|Horror|Mystery|Sci-Fi|Thriller
100001,665,129,3.0,995232528,Pie in the Sky (1996),Comedy|Romance
100002,665,4736,1.0,1010197684,Summer Catch (2001),Comedy|Drama|Romance


In [6]:
movie_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,31,2.5,1260759144,Dangerous Minds (1995),Drama
1,7,31,3.0,851868750,Dangerous Minds (1995),Drama
2,31,31,4.0,1273541953,Dangerous Minds (1995),Drama
3,32,31,4.0,834828440,Dangerous Minds (1995),Drama
4,36,31,3.0,847057202,Dangerous Minds (1995),Drama


In [7]:
movie_data.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
title        0
genres       0
dtype: int64

In [8]:
movie_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100004 entries, 0 to 100003
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100004 non-null  int64  
 1   movieId    100004 non-null  int64  
 2   rating     100004 non-null  float64
 3   timestamp  100004 non-null  int64  
 4   title      100004 non-null  object 
 5   genres     100004 non-null  object 
dtypes: float64(1), int64(3), object(2)
memory usage: 5.3+ MB


In [9]:
movie_data.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100004.0,100004.0,100004.0,100004.0
mean,347.01131,12548.664363,3.543608,1129639000.0
std,195.163838,26369.198969,1.058064,191685800.0
min,1.0,1.0,0.5,789652000.0
25%,182.0,1028.0,3.0,965847800.0
50%,367.0,2406.5,4.0,1110422000.0
75%,520.0,5418.0,4.0,1296192000.0
max,671.0,163949.0,5.0,1476641000.0


# Criteria For Population Based Recommendation system

The criteria is based on:
1. Movies with highest rating
2. Number of views

<b>Average ratings of movies

In [10]:
movie_data.groupby('title')['rating'].mean().head()

title
"Great Performances" Cats (1998)           1.750000
$9.99 (2008)                               3.833333
'Hellboy': The Seeds of Creation (2004)    2.000000
'Neath the Arizona Skies (1934)            0.500000
'Round Midnight (1986)                     2.250000
Name: rating, dtype: float64

In [11]:
movie_data.groupby('title')['rating'].mean().sort_values(ascending=False).head(50)

title
Ivan Vasilievich: Back to the Future (Ivan Vasilievich menyaet professiyu) (1973)    5.0
Alien Escape (1995)                                                                  5.0
Boiling Point (1993)                                                                 5.0
Bone Tomahawk (2015)                                                                 5.0
Borgman (2013)                                                                       5.0
Survive and Advance (2013)                                                           5.0
Supercop 2 (Project S) (Chao ji ji hua) (1993)                                       5.0
Last Dragon, The (1985)                                                              5.0
Boy Crazy (2009)                                                                     5.0
Brandon Teena Story, The (1998)                                                      5.0
Straight-Jacket (2004)                                                               5.0
Storefront Hitc

In [12]:
movie_data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,31,2.5,1260759144,Dangerous Minds (1995),Drama
1,7,31,3.0,851868750,Dangerous Minds (1995),Drama
2,31,31,4.0,1273541953,Dangerous Minds (1995),Drama
3,32,31,4.0,834828440,Dangerous Minds (1995),Drama
4,36,31,3.0,847057202,Dangerous Minds (1995),Drama
...,...,...,...,...,...,...
99999,664,64997,2.5,1343761859,War of the Worlds (2005),Action|Sci-Fi
100000,664,72380,3.5,1344435977,"Box, The (2009)",Drama|Horror|Mystery|Sci-Fi|Thriller
100001,665,129,3.0,995232528,Pie in the Sky (1996),Comedy|Romance
100002,665,4736,1.0,1010197684,Summer Catch (2001),Comedy|Drama|Romance


In [13]:
movie_data.groupby('title')['rating'].count().sort_values(ascending=False).head()

title
Forrest Gump (1994)                          341
Pulp Fiction (1994)                          324
Shawshank Redemption, The (1994)             311
Silence of the Lambs, The (1991)             304
Star Wars: Episode IV - A New Hope (1977)    291
Name: rating, dtype: int64

In [16]:
rating_means_count=pd.DataFrame(movie_data.groupby('title')['rating'].mean())

In [17]:
rating_means_count

Unnamed: 0_level_0,rating
title,Unnamed: 1_level_1
"""Great Performances"" Cats (1998)",1.750000
$9.99 (2008),3.833333
'Hellboy': The Seeds of Creation (2004),2.000000
'Neath the Arizona Skies (1934),0.500000
'Round Midnight (1986),2.250000
...,...
xXx (2002),2.478261
xXx: State of the Union (2005),1.000000
¡Three Amigos! (1986),3.258065
À nous la liberté (Freedom for Us) (1931),4.500000


In [18]:
rating_means_count['rating_Count']=pd.DataFrame(movie_data.groupby('title')['rating'].count())

In [19]:
rating_means_count

Unnamed: 0_level_0,rating,rating_Count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"""Great Performances"" Cats (1998)",1.750000,2
$9.99 (2008),3.833333,3
'Hellboy': The Seeds of Creation (2004),2.000000,1
'Neath the Arizona Skies (1934),0.500000,1
'Round Midnight (1986),2.250000,2
...,...,...
xXx (2002),2.478261,23
xXx: State of the Union (2005),1.000000,1
¡Three Amigos! (1986),3.258065,31
À nous la liberté (Freedom for Us) (1931),4.500000,1


In [20]:
rating_means_count['rating']=round(rating_means_count['rating'],1)

In [21]:
rating_means_count

Unnamed: 0_level_0,rating,rating_Count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"""Great Performances"" Cats (1998)",1.8,2
$9.99 (2008),3.8,3
'Hellboy': The Seeds of Creation (2004),2.0,1
'Neath the Arizona Skies (1934),0.5,1
'Round Midnight (1986),2.2,2
...,...,...
xXx (2002),2.5,23
xXx: State of the Union (2005),1.0,1
¡Three Amigos! (1986),3.3,31
À nous la liberté (Freedom for Us) (1931),4.5,1


In [29]:
rating_means_count=rating_means_count[(rating_means_count['rating']>4) & (rating_means_count['rating_Count']>100)]

In [30]:
rating_means_count

Unnamed: 0_level_0,rating,rating_Count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",4.1,125
American Beauty (1999),4.2,220
Apocalypse Now (1979),4.1,112
Casablanca (1942),4.2,117
"Dark Knight, The (2008)",4.2,121
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964),4.2,105
Eternal Sunshine of the Spotless Mind (2004),4.1,106
Fargo (1996),4.3,224
Fight Club (1999),4.2,202
Forrest Gump (1994),4.1,341


In [31]:
rating_means_count=rating_means_count.sort_values(by='rating', ascending=False)

In [32]:
rating_means_count

Unnamed: 0_level_0,rating,rating_Count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"Shawshank Redemption, The (1994)",4.5,311
"Godfather, The (1972)",4.5,200
"Usual Suspects, The (1995)",4.4,201
"Godfather: Part II, The (1974)",4.4,135
Fargo (1996),4.3,224
Pulp Fiction (1994),4.3,324
One Flew Over the Cuckoo's Nest (1975),4.3,144
Schindler's List (1993),4.3,244
Fight Club (1999),4.2,202
"Matrix, The (1999)",4.2,259
