# Introduction

This notebook contains `Popularity` based recommendation system. 
Details of how it works is available on my Hashnode Blog site.

Link is [here](http://)

# Import files

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv
/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv


# Read Dataset

In [2]:
credits = pd.read_csv("../input/tmdb-movie-metadata/tmdb_5000_credits.csv")
movies = pd.read_csv("../input/tmdb-movie-metadata/tmdb_5000_movies.csv")

# Basic Data Exploration

In [3]:
credits.columns

Index(['movie_id', 'title', 'cast', 'crew'], dtype='object')

In [4]:
movies.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

In [5]:
credits.shape, movies.shape

((4803, 4), (4803, 20))

In [6]:
credits.head(2)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [7]:
movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


## Combine both datasets

In [8]:
df = pd.merge(movies, credits, left_on="id", right_on="movie_id")

In [9]:
credits.shape, movies.shape, df.shape

((4803, 4), (4803, 20), (4803, 24))

## Check datatypes

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   original_title        4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

# Poplularity Based RS

In [11]:
df.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,spoken_languages,status,tagline,title_x,vote_average,vote_count,movie_id,title_y,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [12]:
df.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title_x', 'vote_average',
       'vote_count', 'movie_id', 'title_y', 'cast', 'crew'],
      dtype='object')

## Calculate Weighted Average

In [13]:
v = df['vote_count']
m = df['vote_count'].quantile(0.7)
R = df['vote_average']
c = df['vote_average'].mean()

df['weighted_avg'] = ((R * v)+(c * m))/(v+m)

In [14]:
top_20 = df.sort_values('weighted_avg', ascending=False)

In [15]:
top_20[['original_title', 'vote_count' ,'vote_average', 'weighted_avg']].head(20)

Unnamed: 0,original_title,vote_count,vote_average,weighted_avg
1881,The Shawshank Redemption,8205,8.5,8.340775
3337,The Godfather,5893,8.4,8.192887
662,Fight Club,9413,8.3,8.171648
3232,Pulp Fiction,8428,8.3,8.157615
65,The Dark Knight,12002,8.2,8.102674
809,Forrest Gump,7927,8.2,8.056059
1818,Schindler's List,4329,8.3,8.038748
3865,Whiplash,4254,8.3,8.034695
96,Inception,13752,8.1,8.018611
1990,The Empire Strikes Back,5879,8.2,8.010426


In [16]:
top_20[['original_title', 'vote_count' ,'vote_average', 'weighted_avg', 'popularity']].head(20)

Unnamed: 0,original_title,vote_count,vote_average,weighted_avg,popularity
1881,The Shawshank Redemption,8205,8.5,8.340775,136.747729
3337,The Godfather,5893,8.4,8.192887,143.659698
662,Fight Club,9413,8.3,8.171648,146.757391
3232,Pulp Fiction,8428,8.3,8.157615,121.463076
65,The Dark Knight,12002,8.2,8.102674,187.322927
809,Forrest Gump,7927,8.2,8.056059,138.133331
1818,Schindler's List,4329,8.3,8.038748,104.469351
3865,Whiplash,4254,8.3,8.034695,192.528841
96,Inception,13752,8.1,8.018611,167.58371
1990,The Empire Strikes Back,5879,8.2,8.010426,78.51783


## Scaling using MinMax

In [17]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
scaled = sc.fit_transform(top_20[['weighted_avg','popularity']])
scaled = pd.DataFrame(scaled, columns=['scaled_wt_avg', 'scaled_popularity'])
top_20[['scaled_wt_avg', 'scaled_popularity']] = scaled
top_20[['original_title', 'vote_count' ,'vote_average', 'weighted_avg', 'popularity', 'scaled_wt_avg','scaled_popularity']].head(20)

Unnamed: 0,original_title,vote_count,vote_average,weighted_avg,popularity,scaled_wt_avg,scaled_popularity
1881,The Shawshank Redemption,8205,8.5,8.340775,136.747729,0.404155,0.020092
3337,The Godfather,5893,8.4,8.192887,143.659698,0.377825,0.003182
662,Fight Club,9413,8.3,8.171648,146.757391,0.519508,0.030701
3232,Pulp Fiction,8428,8.3,8.157615,121.463076,0.379687,0.005801
65,The Dark Knight,12002,8.2,8.102674,187.322927,0.796938,0.067903
809,Forrest Gump,7927,8.2,8.056059,138.133331,0.494869,0.01745
1818,Schindler's List,4329,8.3,8.038748,104.469351,0.406806,0.00899
3865,Whiplash,4254,8.3,8.034695,192.528841,0.360269,0.007849
96,Inception,13752,8.1,8.018611,167.58371,0.755133,0.06294
1990,The Empire Strikes Back,5879,8.2,8.010426,78.51783,0.40027,0.030321
