In [14]:
import numpy as np
import pandas as pd
import tqdm
import time
import random
import atexit
import difflib

from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import cross_validate

class BookRecommendation(object):

    def __init__(self, verbose=0):
        """
        params:
            verbose -> int
                if verbose > 0:
                    print progress
        """
        init_timer = time.time()
        self.df_r, self.df_b = self._load_df(verbose=verbose)
        self.svd = self._init_svd(verbose=verbose)
        if verbose > 0:
            print("Time to init:", time.time() - init_timer, "sec")
        self.base_update = False
        # use atexit if you want to update the database at the end of script
        atexit.register(self.exit)
        self.rating_mean = self.df_r["rating"].mean()

        
    def update_database(self):
        if self.base_update:
            df_r.to_csv("ratings.csv", index=False)
            self.base_update=False
    
    
    def exit(self):
        self.update_database()
        

    def _load_df(self, verbose=0):
        if verbose > 0:
            print("Loadind ratings.csv ...")
        df_r = pd.read_csv("databases/ratings.csv")
        df_r.sort_values(by="user_id", inplace=True, ignore_index=True)
        df_r = df_r.reset_index(drop=True)
        if verbose > 0:
            print("Done")
            print("Loadind books.csv ...")
        df_b = pd.read_csv("databases/books.csv")
        if verbose > 0:
            print("Done")
        return df_r, df_b


    def _init_svd(self, verbose=0):
        if verbose:
            verbose = True
        else:
            verbose = False
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(self.df_r[['user_id', 'book_id', 'rating']], reader)
        if verbose:
            print("Cross validation SVD ...")
        svd = SVD(verbose=verbose, n_epochs=10)
        cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=verbose)
        if verbose:
            print("Done\nFitting SVD ...")
        trainset = data.build_full_trainset()
        svd.fit(trainset)
        if verbose:
            print("Done")
        return svd

        
    def show_book_title_from_id(self, book_id):
        book_title = self.df_b.loc[self.df_b["book_id"] == book_id, "title"].values[0]
        print(book_id, book_title)


    def show_books(self, start, end):
        while start < end and start < len(self.title_series):
            print("book_id", start+1, "title", self.title_series[start])
            start += 1

            
    def get_book_id(self, book_title):
    
        """
        Gets the book ID for a book title based on the closest match in the metadata dataframe.
        """

        existing_titles = list(self.df_b['title'].values)
        closest_titles = difflib.get_close_matches(book_title, existing_titles)
        book_id = self.df_b[self.df_b['title'] == closest_titles[0]]['book_id'].values[0]
        return book_id
            
        
    def get_book_info(self, book_id):
        """
        Returns some basic information about a book given the book id and the metadata dataframe.
        """

        book_info = self.df_b[self.df_b['book_id'] == book_id][['book_id', 'isbn', 
                                                        'authors', 'title', 'original_title']]
        return book_info.to_dict(orient='records')        
            
        
    def predict_review(self, user_id, book_title):
    
        """
        Predicts the review (on a scale of 1-5) that a user would assign to a specific book. 
        """

        book_id = self.get_book_id(book_title)
        review_prediction = self.svd.predict(uid=user_id, iid=book_id)
        return review_prediction.est        
            
            
    def generate_recommendation(self, user_id, n_books=10, best=False):
    
        """
        Generates a book recommendation for a user based on a rating threshold. Only
        books with a predicted rating at or above the threshold will be recommended
        """

        book_titles = list(self.df_b['title'].values)
        random.shuffle(book_titles)

        book_list = {"book_info": [], "rating": []}

        for book_title in tqdm(book_titles):
            rating = self.predict_review(user_id, book_title)
            if rating >= self.rating_mean:
                book_id = self.get_book_id(book_title)
                
                book_list["book_info"].append(self.get_book_info(book_id))
                book_list["rating"].append(rating)
            if len(book_list["rating"]) >= n_books:
                break
                
        book_list = pd.DataFrame(data=book_list).sort_values(by="rating", ascending=False)["book_info"]["title"].values.tolist()
        return book_list


    def show_books_from_user_id(self, user_id):
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        df_user = df_user.sort_values(by="rating", ascending=False, ignore_index=True)
        for i in df_user.index:
            print(self.df_b[self.df_b["book_id"] == df_user["book_id"][i]]["title"].values[0], df_user["rating"][i])

    
    def add_ratings(self, user_id, book_id, rating):
        """
         if user doesn't exist -> create user
        """
        if len(book_id) == 0 or len(book_id) != len(rating):
            # print("wrong params")
            return
        
        if self.df_r[self.df_r["user_id"] == user_id].shape[0] > 0:
            # if user already exist
            # drop doublons
            all_books = self.df_r[self.df_r["user_id"] == user_id]["book_id"].values.tolist()
            known_books = []
            for i in all_books:
                if i in book_id:
                    known_books.append(i)
            i = 0
            while i < len(book_id):
                if book_id[i] in known_books:
                    book_id.pop(i)
                    rating.pop(i)
                    i -= 1
                i += 1
            if len(book_id) == 0 or len(book_id) != len(rating):
                return
            # and then add book_id and ratings on known user
        
        _dict = {"user_id": [user_id]*len(book_id), "book_id": book_id, "rating": rating}
        df = pd.DataFrame(data=_dict)
        self.df_r = self.df_r.append(df, ignore_index=True)
        self.base_update = True

        
    def del_user(self, user_id):
        self.df_r = self.df_r.drop(self.df_r[self.df_r["user_id"] == user_id].index)
        self.df_r.reset_index(drop=True)

In [15]:
book_recommendation = BookRecommendation(verbose=1)

Loadind ratings.csv ...
Done
Loadind books.csv ...
Done
Cross validation SVD ...
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8522  0.8528  0.8521  0.8524  0.0003  
MAE (testset)     0.6656  0.6664  0.6656  0.6658  0.0004  
Fit time          105.26  106.95  106.42  106.21  0.70    
Test time         28.92   26.53   28.17   27.87   1.00    


In [16]:
def show_books(start=0, end=10):
    print("\n", str(" "+str(start)+" ").center(50, "-"))
    book_recommendation.show_books(start, end)
    print(str(" "+str(end)+" ").center(50, "-"), "\n")
    
    
def show_user(user_id):
    book_recommendation.show_books_from_user_id(user_id)

    
def recommend_user(user_id, n_books=10):
    print("\n", " already read ".center(50, "-"))
    book_recommendation.show_books_from_user_id(user_id)
    print("\n", " end ".center(50, "-"))
    print("\n", " recommendations ".center(50, "-"))
    book_list = book_recommendation.generate_recommendation(user_id, n_books=n_books)
    print("nb_books", len(book_list))
    for i in book_list:
        print(i)
    print("\n", " end ".center(50, "-"))

    
def add_ratings(user_id=1000000, book_id=[1], rating=[5]):
    book_recommendation.add_ratings(user_id, book_id, rating)
    
    
def del_user(user_id=1000000):
    book_recommendation.del_user(user_id)

In [17]:
#show_books(0, 10)
#show_related_books(1, n_books=10)
#add_ratings(user_id=1000000, book_id=[1, 2, 3, 4, 5], rating=[3, 5, 5, 4, 5])
#show_user(user_id=1)
#del_user(user_id=1000000)
recommend_user(1, n_books=10)


 ------------------ already read ------------------
The Shadow of the Wind (The Cemetery of Forgotten Books,  #1) 5
All the Light We Cannot See 5
Caleb's Crossing 5
Those Who Leave and Those Who Stay (The Neapolitan Novels #3) 5
The Story of a New Name (The Neapolitan Novels #2) 5
My Brilliant Friend (The Neapolitan Novels #1) 5
Ender's Game (Ender's Saga, #1) 5
Antigone (The Theban Plays, #3) 5
Divine Secrets of the Ya-Ya Sisterhood 5
Peace Like a River 5
The Kite Runner 5
The Alchemist 5
To Kill a Mockingbird 5
The Glass Castle 5
Steve Jobs 5
The Idiot 5
The Death of Ivan Ilych 5
The Paris Wife 5
Where the Wild Things Are 5
Life of Pi 5
Gilead (Gilead, #1) 5
Crime and Punishment 5
Of Mice and Men 4
Year of Wonders 4
The Giver (The Giver, #1) 4
World Without End (The Kingsbridge Series, #2) 4
East of Eden 4
Pride and Prejudice 4
Memoirs of a Geisha 4
Cry to Heaven 4
A Year in Provence 4
West with the Night 4
1984 4
The Stranger 4
The Help 4
Little Bee 4
Unaccustomed Earth 4
Moonwalki

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



KeyboardInterrupt: 