In [6]:
import pandas as pd

In [50]:
class RecommendationSystem:
    def __init__(self,data):
        # self.df = pd.read_csv(data)
        self.df = data
    
    def recommend(self,genre=None,duration=None,year=None,topk=10):
        df = self.df.copy()
        df = self.demographic_filter(df,genre=genre,duration=duration,year=year)
        df = self.compute_imdb_score(df)
        
        # result = df.loc[:,"title":"release_year"]
        result = result.sort_values("vote_average",ascending=False)
        result = result.head(topk)
        return result
    
    @staticmethod
    def demographic_filter(df,genre=None, duration=None,year=None):
        df = df.copy()
        
        if genre is not None:
            df = df[df[genre].all(axis=1)]
        if duration is not None:
            df = df[df.runtime.between(duration[0],duration[1])]
        if year is not None:
            df = df[df.release_year.between(year[0],year[1])]
            
        return df
        
    @staticmethod
    def compute_imdb_score(df, q=0.8):
        df = df.copy()
        m = df.vote_count.quantile(q)
        C = (df.vote_average * df.vote_count).sum() / df.vote_count.sum()
        
        df = df[df.vote_count >= m]
        df["score"] = df.apply(lambda x: (x.vote_average * x.vote_count + C*m)/ (x.vote_count + m), axis=1)
        return df
    

In [1]:
from demographic_filter import RecommendationSystem

In [2]:
recsysModels =  RecommendationSystem(data="dataset/demograpic_dataset.csv")

In [4]:
recsysModels.recommend()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,release_year
10298,Dilwale Dulhania Le Jayenge,Comedy; Drama; Romance,190.0,9.1,661.0,1995
38872,Planet Earth,Documentary,550.0,8.8,176.0,2006
7632,Sansho the Bailiff,Drama,124.0,8.7,68.0,1954
29087,The Jinx: The Life and Deaths of Robert Durst,Documentary,240.0,8.6,85.0,2015
33276,Human,Documentary,263.0,8.6,98.0,2015
828,The Godfather,Drama; Crime,175.0,8.5,6024.0,1972
314,The Shawshank Redemption,Drama; Crime,142.0,8.5,8358.0,1994
40018,Your Name.,Romance; Animation; Drama,106.0,8.5,1030.0,2016
39019,O.J.: Made in America,Documentary,463.0,8.5,73.0,2016
38873,Life,Documentary,500.0,8.5,65.0,2009


## Lets try using different dataset

In [32]:
df_book_rate = pd.read_csv('dataset/BX-Book-Ratings.csv', sep=";", encoding='CP1252', escapechar='\\')
df_book = pd.read_csv('dataset/BX_Books.csv', sep=";", encoding='CP1252', escapechar='\\')
df_book_user = pd.read_csv('dataset/BX-Users.csv', sep=";", encoding='CP1252', escapechar='\\')

In [23]:
df_book_rate

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [47]:
df_book

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton & Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271374,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271375,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271376,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271377,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [52]:
df_book_rate_full  = df_book_rate.copy()
df_book_rate_full["vote_count"] = df_book_rate_full["Book-Rating"]
df_book_rate_full = (df_book_rate_full.groupby('ISBN').agg({'Book-Rating':'mean',"vote_count":'count'}).reset_index().rename(columns={"Book-Rating":"vote_average"}))
# df_book_rate_full.sort_values(by='rating_count' ,ascending=False)
df_book_rate_full

Unnamed: 0,ISBN,vote_average,vote_count
0,0330299891,3.0,2
1,0375404120,1.5,2
2,0586045007,0.0,1
3,9022906116,3.5,2
4,9032803328,0.0,1
...,...,...,...
340551,cn113107,0.0,1
340552,ooo7156103,7.0,1
340553,§423350229,0.0,1
340554,´3499128624,8.0,1


In [53]:
df_book_rate_full = df_book_rate_full.merge(df_book, on ='ISBN')
df_book_rate_full

Unnamed: 0,ISBN,vote_average,vote_count,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0000913154,8.0,1,The Way Things Work: An Illustrated Encycloped...,C. van Amerongen (translator),1967,Simon & Schuster,http://images.amazon.com/images/P/0000913154.0...,http://images.amazon.com/images/P/0000913154.0...,http://images.amazon.com/images/P/0000913154.0...
1,0001010565,0.0,2,Mog's Christmas,Judith Kerr,1992,Collins,http://images.amazon.com/images/P/0001010565.0...,http://images.amazon.com/images/P/0001010565.0...,http://images.amazon.com/images/P/0001010565.0...
2,0001046438,9.0,1,Liar,Stephen Fry,0,Harpercollins Uk,http://images.amazon.com/images/P/0001046438.0...,http://images.amazon.com/images/P/0001046438.0...,http://images.amazon.com/images/P/0001046438.0...
3,0001046713,0.0,1,Twopence to Cross the Mersey,Helen Forrester,1992,HarperCollins Publishers,http://images.amazon.com/images/P/0001046713.0...,http://images.amazon.com/images/P/0001046713.0...,http://images.amazon.com/images/P/0001046713.0...
4,000104687X,6.0,1,"T.S. Eliot Reading ""The Wasteland"" and Other P...",T.S. Eliot,1993,HarperCollins Publishers,http://images.amazon.com/images/P/000104687X.0...,http://images.amazon.com/images/P/000104687X.0...,http://images.amazon.com/images/P/000104687X.0...
...,...,...,...,...,...,...,...,...,...,...
270165,B000234N76,0.0,1,Falling Angels,Tracy Chevalier,2001,E P Dutton,http://images.amazon.com/images/P/B000234N76.0...,http://images.amazon.com/images/P/B000234N76.0...,http://images.amazon.com/images/P/B000234N76.0...
270166,B000234NC6,0.0,1,It Must've Been Something I Ate: The Return of...,Jeffrey Steingarten,2002,Knopf,http://images.amazon.com/images/P/B000234NC6.0...,http://images.amazon.com/images/P/B000234NC6.0...,http://images.amazon.com/images/P/B000234NC6.0...
270167,B00029DGGO,0.0,1,"Good Wife Strikes Back, The",Elizabeth Buchan,0,Viking Adult,http://images.amazon.com/images/P/B00029DGGO.0...,http://images.amazon.com/images/P/B00029DGGO.0...,http://images.amazon.com/images/P/B00029DGGO.0...
270168,B0002JV9PY,0.0,1,The Blockade Runners,Jules Verne,0,Digireads.com,http://images.amazon.com/images/P/B0002JV9PY.0...,http://images.amazon.com/images/P/B0002JV9PY.0...,http://images.amazon.com/images/P/B0002JV9PY.0...


In [54]:
recsysModels =  RecommendationSystem(data=df_book_rate_full)

TypeError: argument of type 'method' is not iterable