In [146]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD
import time
import atexit

class BookRecommendation(object):

    def __init__(self, verbose=0):
        """
        comment
        """
        init_timer = time.time()
        self.df_r, self.df_b = self._load_df(verbose=verbose)
        self.corr = self._init_corr(verbose=verbose)
        self.title_series = self.df_b["title"]
        if verbose > 0:
            print("Time to init:", time.time() - init_timer, "sec")
        self.base_update = False
        atexit.register(self.exit)
        self.rating_mean = self.df_r["rating"].mean()

    
    def exit(self):
        if self.base_update:
            df_r.to_csv("ratings.csv", index=False)
            self.base_update=False
        

    def _load_df(self, verbose=0):
        if verbose > 0:
            print("Loadind ratings.csv ...")
        df_r = pd.read_csv('ratings.csv')
        df_r.sort_values(by="user_id", inplace=True, ignore_index=True)
        df_r = df_r.reset_index(drop=True)
        if verbose > 0:
            print("Done")
            print("Loadind books.csv ...")
        df_b = pd.read_csv('books.csv')
        if verbose > 0:
            print("Done")
        return df_r, df_b


    def _init_corr(self, verbose=0):
        if verbose > 0:
            print("Loading matrix ...")
        df_r_pivot = self.df_r.pivot(index="book_id", columns ="user_id", values="rating")
        df_r_pivot = df_r_pivot.fillna(0)
        X = df_r_pivot.values
        if verbose > 0:
            print("Done")
            print("Fiting SVD ...")
        # SVD = TruncatedSVD(n_components=n_components, random_state=42)
        # matrix = SVD.fit_transform(X)
        if verbose > 0:
            print("Done")
            print("Loading corr ...")
        #corr = np.corrcoef(matrix)
        corr = np.corrcoef(X)
        if verbose > 0:
            print("Done")
        return corr
        

    def show_book_title_from_id(self, book_id):
        book_title = self.df_b.loc[self.df_b["book_id"] == book_id, "title"].values[0]
        print(book_id, book_title)


    def show_books(self, start, end):
        while start < end and start < len(self.title_series):
            print("book_id", start+1, "title", self.title_series[start])
            start += 1


    def related_books(self, book, n_books=10, unwanted_id=[]):
        idx = []
        if type(book) == str:
            book_id = self.df_b.loc[self.df_b["title"] == book, "book_id"].values[0]
        elif type(book) == int:
            book_id = book
        else:
            return idx
        
        book_corr = self.corr[book - 1]
        idx = (-book_corr).argsort()
        
        i = 0
        while i < len(idx):
            idx[i] += 1
            i += 1

        new_idx = []
        i = 0
        while len(new_idx) < n_books and i < len(idx):
            if idx[i] not in unanted_id:
                new_idx.append(idx[i])
            i += 1
            
        return new_idx


    def show_books_from_user_id(self, user_id):
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        df_user = df_user.sort_values(by="rating", ascending=False, ignore_index=True)
        for i in df_user.index:
            print(self.df_b[self.df_b["book_id"] == df_user["book_id"][i]]["title"].values[0], df_user["rating"][i])


    def recommend_books_from_user_id(self, user_id, n_books=100):
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        
        if df_user.shape[0] == 0:
            return self.popularity_recommender(n_books)
        
        df_user = df_user.sort_values(by="rating", ascending=False, ignore_index=True)
        
        user_read_books = df_user["book_id"].values.tolist()
        
        liked_books = df_user[df_user["rating"] >= self.rating_mean].count()
        
        book_list = []
        for i in df_user.index:
            if df_user["rating"][i] >= self.rating_mean:
                book_list += self.related_books(book_id=df_user["book_id"][i], n_books=max(1, n_books/liked_books), unwanted_id=user_read_books)
            
#         i = 0    
#         while i < len(df_user.index):
#             if df_user["rating"][df_user.index[i]]
#             i += 1
                
        if len(n_book_list) < n_books:
            n_book_list += self.popularity_recommender(n_books=n_books-len(n_book_list), unwanted_id=n_book_list+user_read_books)
            
        return n_book_list[:n_books]
    
    
    def popularity_recommender(self, n_books=50, unwanted_id=[]):
        # goal: create a dataframe of weighted ratings for each book, and return the 
        # indexes of the n_books best rated books.
        # (v*R + m*C) / (v+m)
        # v: number of votes for the book
        # m: minimum number of votes required to appear in the list
        # R: average rating of the book
        # C: mean value of all the votes
        # create a pandas Dataframe with book_id and the number of ratings for the book
        df_wr = self.df_r[["book_id", "rating"]].groupby("book_id", as_index=False).count().rename(columns={"rating": "v"})
        # add the mean score for each book
        df_wr["R"] = self.df_r[["book_id", "rating"]].groupby("book_id", as_index=False).mean()["rating"]
        # let m be the median of the numbers of votes (248)
        m = df_wr["v"].quantile(0.90)
        # print(m)
        C = self.df_r["rating"].mean()
        # compute the weighted ratings for each books
        df_wr["wr"] = df_wr.apply(lambda row: (row["v"]*row["R"] + m*C) / (row["v"] + m), axis=1)
        # sort the books by their weighted ratings
        df_wr = df_wr.sort_values(by="wr", ascending=False, ignore_index=True).reset_index(drop=True)
        book_list = []
        i = 0
        while len(book_list) < n_books and i < len(df_wr):
            if df_wr["book_id"][i] not in unwanted_id:
                book_list.append(df_wr["book_id"][i])
            i += 1
        return book_list

    
    def create_user(self, user_id, book_id, rating):
        if len(book_id) != len(rating) or self.df_r[self.df_r["user_id"] == user_id].shape[0] > 0:
            print("wrong value")
            print(self.df_r[self.df_r["user_id"] == user_id].shape)
            return
        _dict = {"user_id": [user_id]*len(book_id), "book_id": book_id, "rating": rating}
        df = pd.DataFrame(data=_dict)
        self.df_r = self.df_r.append(df, ignore_index=True)
        self.base_update = True
        # test
        display(self.df_r.tail())
    
    
    def del_user(self, user_id):
        print(self.df_r[self.df_r["user_id"] == user_id].index)
        self.df_r = self.df_r.drop(self.df_r[self.df_r["user_id"] == user_id].index)
        self.df_r.reset_index(drop=True)

In [147]:
book_recommendation = BookRecommendation(verbose=1)

Loadind ratings.csv ...
Done
Loadind books.csv ...
Done
Loading matrix ...
Done
Fiting SVD ...
Done
Loading corr ...
Done
Time to init: 57.913957834243774 sec


In [148]:
def show_books(start=0, end=10):
    print("\n", str(" "+str(start)+" ").center(50, "-"))
    book_recommendation.show_books(start, end)
    print(str(" "+str(end)+" ").center(50, "-"), "\n")

def show_related_books(book_id, n_books=10):
    book_recommendation.show_book_title_from_id(book_id)
    book_list = book_recommendation.related_books(book_id=book_id, n_books=n_books)
    print("\n", " 0 ".center(50, "-"))
    for i in book_list:
        book_recommendation.show_book_title_from_id(i)
    print("\n", str(" "+str(n_books)+" ").center(50, "-"))
    
def show_user(user_id):
    book_recommendation.show_books_from_user_id(user_id)

def recommend_user(user_id, n_books=10):
    print("\n", " already read ".center(50, "-"))
    book_recommendation.show_books_from_user_id(user_id)
    print("\n", " end ".center(50, "-"))
    print("\n", " recommendations ".center(50, "-"))
    book_list = book_recommendation.recommend_books_from_user_id(user_id, n_books=n_books)
    print("nb_books", len(book_list))
    for i in book_list:
        book_recommendation.show_book_title_from_id(i)
    print("\n", " end ".center(50, "-"))

def add_user(user_id=1000000, book_id=[1], rating=[5]):
    book_recommendation.create_user(user_id, book_id, rating)
    
def del_user(user_id=1000000):
    book_recommendation.del_user(user_id)

In [151]:
#show_books(0, 10)
#show_related_books(1, n_books=10)
#add_user(user_id=1000000, book_id=[1], rating=[1])
#del_user(user_id=1000000)
recommend_user(1000000, n_books=10)


 ------------------ already read ------------------
The Hunger Games (The Hunger Games, #1) 1

 ---------------------- end -----------------------

 ---------------- recommendations -----------------
nb_books 10
17 Catching Fire (The Hunger Games, #2)
20 Mockingjay (The Hunger Games, #3)
3 Twilight (Twilight, #1)
12 Divergent (Divergent, #1)
2 Harry Potter and the Sorcerer's Stone (Harry Potter, #1)
41 The Lightning Thief (Percy Jackson and the Olympians, #1)
51 City of Bones (The Mortal Instruments, #1)
31 The Help
73 The Host (The Host, #1)
25 Harry Potter and the Deathly Hallows (Harry Potter, #7)

 ---------------------- end -----------------------
