<a href="https://colab.research.google.com/github/stevengregori92/LearnRecSys/blob/main/ML_Engineering_Demographic_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [2]:
!gdown https://drive.google.com/uc?id=1kF6d9bJnZjehyPeoh6ixMETJnYbjFC3I
!unzip /content/rec.zip

Downloading...
From: https://drive.google.com/uc?id=1kF6d9bJnZjehyPeoh6ixMETJnYbjFC3I
To: /content/rec.zip
100% 18.1M/18.1M [00:00<00:00, 84.7MB/s]
Archive:  /content/rec.zip
  inflating: collaborative_filtering.csv  
  inflating: content_by_multiple.csv  
  inflating: content_by_synopsis.csv  
  inflating: demographic.csv         


In [3]:
class RecommendSystem:
  def __init__(self, data):
    self.df = pd.read_csv(data)

  def recommend(self, genre=None, duration=None, year=None, topk=10):
    df = self.df.copy()
    df = self.demographic_filter(df, genre=genre, duration=duration, year=year)
    df = self.compute_imdb_score(df)

    result = df.loc[:, 'title': 'release_year']
    result = result.sort_values('vote_average', ascending = False)
    result = result.head(topk)
    return result

  @staticmethod
  def demographic_filter(df, genre=None, duration=None, year=None):
    df = df.copy()

    if genre is not None:
      df = df[df[genre].all(axis=1)]
    if duration is not None:
      df = df[df.runtime.between(duration[0], duration[1])]
    if year is not None:
      df = df[df.release_year.between(year[0], year[1])]
    return df

  @staticmethod
  def compute_imdb_score(df, q=0.9):
    df = df.copy()
    m = df.vote_count.quantile(q)
    C = (df.vote_average * df.vote_count).sum() / df.vote_count.sum()

    df = df[df.vote_count >= m]
    df['score'] = df.apply(lambda x: (x.vote_average * x.vote_count + C*m) / (x.vote_count + m), axis = 1)

    return df

In [4]:
recsys = RecommendSystem(data='demographic.csv')
recsys.recommend(genre=['Action'], duration=(60, 150), year=(2015, 2020))

Unnamed: 0,title,genres,runtime,vote_average,vote_count,release_year
41905,Logan,Action; Drama; Science Fiction,137.0,7.6,6310.0,2017
25456,Kingsman: The Secret Service,Crime; Comedy; Action; Adventure,130.0,7.6,6069.0,2015
26482,Guardians of the Galaxy Vol. 2,Action; Adventure; Comedy; Science Fiction,137.0,7.6,4858.0,2017
44363,Dunkirk,Action; Drama; History; Thriller; War,107.0,7.5,2712.0,2017
26471,Star Wars: The Force Awakens,Action; Adventure; Science Fiction; Fantasy,136.0,7.5,7993.0,2015
41235,Rogue One: A Star Wars Story,Action; Adventure; Science Fiction,133.0,7.4,5111.0,2016
26480,Deadpool,Action; Adventure; Comedy,108.0,7.4,11444.0,2016
26469,Mad Max: Fury Road,Action; Adventure; Science Fiction; Thriller,120.0,7.3,9629.0,2015
26474,Avengers: Age of Ultron,Action; Adventure; Science Fiction,141.0,7.3,6908.0,2015
31822,Southpaw,Action; Drama,123.0,7.3,2112.0,2015
