# Popularity-Based Filtering

### Load the data

In [1]:
import pandas as pd

movies = pd.read_csv("movies.csv")
credits = pd.read_csv("credits.csv")
ratings = pd.read_csv("ratings.csv")

In [2]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


### Calculate a weighted rating

WR = (v / (v + m)) x R + (m / (v + m)) x C

v - number of votes for a movie

m - minimum number of votes required

R - average rating of the movie

C - average rating across all movies

In [3]:
m = movies["vote_count"].quantile(0.9)
m

1838.4000000000015

90% of the movies have a vote count < 1838 and 10% of the movies have a vote count > 1838

In [4]:
C = movies["vote_average"].mean()
C

6.092171559442016

In [5]:
movies_filtered = movies.copy().loc[movies["vote_count"] >= m]

In [6]:
def weighted_rating(df, m=m, C=C):
    R = df["vote_average"]
    v = df["vote_count"]
    wr = ((v / (v+m)) * R) + ((m / (v+m)) * C)
    return wr

In [7]:
movies_filtered["weighted_rating"] = movies_filtered.apply(weighted_rating, axis=1)

In [8]:
movies_filtered.sort_values("weighted_rating", ascending=False)[["title", "vote_count", "weighted_rating"]].head(10)

Unnamed: 0,title,vote_count,weighted_rating
1881,The Shawshank Redemption,8205,8.059258
662,Fight Club,9413,7.939256
65,The Dark Knight,12002,7.92002
3232,Pulp Fiction,8428,7.904645
96,Inception,13752,7.863239
3337,The Godfather,5893,7.851236
95,Interstellar,10867,7.809479
809,Forrest Gump,7927,7.803188
329,The Lord of the Rings: The Return of the King,8064,7.727243
1990,The Empire Strikes Back,5879,7.697884
