In [25]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD
import time

class ContentRecommendation(object):

    def __init__(self, verbose=0, n_components=10):
        """
        
        """
        init_timer = time.time()
        self.df_r, self.df_b = self._load_df(verbose=verbose)
        self.corr = self._init_corr(verbose=verbose, n_components=n_components)
        self.title_series = self.df_b["title"]
        if verbose > 0:
            print("Time to init:", time.time() - init_timer, "sec")


    def _load_df(self, verbose=0):
        if verbose > 0:
            print("Loadind ratings.csv ...")
        df_r = pd.read_csv('ratings.csv')
        df_r.sort_values(by="user_id", inplace=True)
        df_r = df_r.reset_index()
        if verbose > 0:
            print("Done")
            print("Loadind books.csv ...")
        df_b = pd.read_csv('books.csv')
        if verbose > 0:
            print("Done")
        return df_r, df_b


    def _init_corr(self, verbose=0, n_components=20):
        if verbose > 0:
            print("Loading matrix ...")
        df_r_pivot = self.df_r.pivot(index="user_id", columns ="book_id", values="rating")
        df_r_pivot = df_r_pivot.fillna(0)
        X = df_r_pivot.values.T
        if verbose > 0:
            print("Done")
            print("Fiting SVD ...")
        # SVD = TruncatedSVD(n_components=n_components, random_state=42)
        # matrix = SVD.fit_transform(X)
        if verbose > 0:
            print("Done")
            print("Loading corr ...")
        #corr = np.corrcoef(matrix)
        corr = np.corrcoef(X)
        if verbose > 0:
            print("Done")
        return corr
        

    def show_book_title_from_id(self, book_id):
        book_title = self.df_b.loc[self.df_b["book_id"] == book_id, "title"].values[0]
        print(book_id, book_title)


    def show_books(self, start, end):
        while start < end and start < len(self.title_series):
            print("book_id", start+1, "title", self.title_series[start])
            start += 1


    def related_books(self, book_id=False, book_title=False, n_books=10):
        idx = []
        if book_id:
            book_corr = self.corr[book_id - 1]
            idx = (-book_corr).argsort()[:n_books]

        elif book_title:
            book_id = self.df_b.loc[self.df_b["title"] == book_title, "book_id"].values[0]
            book_corr = self.corr[book_id - 1]
            idx = (-book_corr).argsort()[:n_books]

        i = 0
        while i < len(idx):
            idx[i] += 1
            i += 1

        return idx


    def show_books_from_user_id(self, user_id):
        print("User:", user_id)
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        df_user.sort_values(by="rating", inplace=True)
        for i in df_user.index:
            print(self.df_b[self.df_b["book_id"] == df_user["book_id"][i]]["title"].values[0], df_user["rating"][i])


    def recommend_books_from_user_id(self, user_id, n_books=10):
        print("User:", user_id)
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        df_user.sort_values(by="rating", inplace=True)
        if df_user.shape[0] == 0:
            # return books id
            return self.popularity_recommender(n_books)
        for i in df_user.index:
    
    
    def popularity_recommender(self, n_books):
        return [1]

In [26]:
content_recommendation = ContentRecommendation(verbose=1)

Loadind ratings.csv ...
Done
Loadind books.csv ...
Done
Loading matrix ...
Done
Fiting SVD ...
Done
Loading corr ...
Done
Time to init: 65.72552037239075 sec


In [15]:
def show_books(start=0, end=10):
    print("\n", str(" "+str(start)+" ").center(50, "-"))
    content_recommendation.show_books(start, end)
    print(str(" "+str(end)+" ").center(50, "-"), "\n")

def show_related_books(book_id, n_books=10):
    content_recommendation.show_book_title_from_id(book_id)
    book_list = content_recommendation.related_books(book_id=book_id, n_books=n_books)
    print("\n", " 0 ".center(50, "-"))
    for i in book_list:
        content_recommendation.show_book_title_from_id(i)
    print("\n", str(" "+str(n_books)+" ").center(50, "-"))
    
def show_user(user_id):
    content_recommendation.show_books_from_user_id(user_id)

def recommend_user(user_id, n_books=10):
    content_recommendation.show_books_from_user_id(user_id)
    

In [27]:
#show_books(0, 10)
#show_related_books(1, n_books=10)
show_user(2)

User: 2
['The Grapes of Wrath']
5
['Hamlet']
4
['Lean In: Women, Work, and the Will to Lead']
5
["Good to Great: Why Some Companies Make the Leap... and Others Don't"]
5
['The Catcher in the Rye']
4
['The Great Gatsby']
5
['The Sun Also Rises']
3
["The Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy, #1)"]
5
['Gone with the Wind']
3
['On the Road']
5
['Do Androids Dream of Electric Sheep?']
5
['Social Intelligence: The New Science of Human Relationships']
4
['Blink: The Power of Thinking Without Thinking']
5
['Outliers: The Story of Success']
5
['The Tipping Point: How Little Things Can Make a Big Difference']
5
['All Marketers Are Liars: The Power of Telling Authentic Stories in a Low-Trust World']
4
['Linchpin: Are You Indispensable?']
4
['Purple Cow: Transform Your Business by Being Remarkable']
4
['Men Are from Mars, Women Are from Venus']
2
['The Diary of a Young Girl']
5
['The Secret (The Secret, #1)']
2
['Steve Jobs']
5
["Harry Potter and the Sorcerer's Stone 