In [41]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD
import time

class ContentRecommendation(object):

    def __init__(self, verbose=0):
        """
        comment
        """
        init_timer = time.time()
        self.df_r, self.df_b = self._load_df(verbose=verbose)
        self.corr = self._init_corr(verbose=verbose)
        self.title_series = self.df_b["title"]
        if verbose > 0:
            print("Time to init:", time.time() - init_timer, "sec")


    def _load_df(self, verbose=0):
        if verbose > 0:
            print("Loadind ratings.csv ...")
        df_r = pd.read_csv('ratings.csv')
        df_r.sort_values(by="user_id", inplace=True)
        df_r = df_r.reset_index()
        if verbose > 0:
            print("Done")
            print("Loadind books.csv ...")
        df_b = pd.read_csv('books.csv')
        if verbose > 0:
            print("Done")
        return df_r, df_b


    def _init_corr(self, verbose=0):
        if verbose > 0:
            print("Loading matrix ...")
        df_r_pivot = self.df_r.pivot(index="user_id", columns ="book_id", values="rating")
        df_r_pivot = df_r_pivot.fillna(0)
        X = df_r_pivot.values.T
        if verbose > 0:
            print("Done")
            print("Fiting SVD ...")
        # SVD = TruncatedSVD(n_components=n_components, random_state=42)
        # matrix = SVD.fit_transform(X)
        if verbose > 0:
            print("Done")
            print("Loading corr ...")
        #corr = np.corrcoef(matrix)
        corr = np.corrcoef(X)
        if verbose > 0:
            print("Done")
        return corr
        

    def show_book_title_from_id(self, book_id):
        book_title = self.df_b.loc[self.df_b["book_id"] == book_id, "title"].values[0]
        print(book_id, book_title)


    def show_books(self, start, end):
        while start < end and start < len(self.title_series):
            print("book_id", start+1, "title", self.title_series[start])
            start += 1


    def related_books(self, book_id=False, book_title=False, n_books=10):
        idx = []
        if book_id:
            book_corr = self.corr[book_id - 1]
            idx = (-book_corr).argsort()[:n_books]

        elif book_title:
            book_id = self.df_b.loc[self.df_b["title"] == book_title, "book_id"].values[0]
            book_corr = self.corr[book_id - 1]
            idx = (-book_corr).argsort()[:n_books]

        i = 0
        while i < len(idx):
            idx[i] += 1
            i += 1

        return idx


    def show_books_from_user_id(self, user_id):
        print("User:", user_id)
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        df_user.sort_values(by="rating", inplace=True)
        for i in df_user.index:
            print(self.df_b[self.df_b["book_id"] == df_user["book_id"][i]]["title"].values[0], df_user["rating"][i])


    def recommend_books_from_user_id(self, user_id, n_books=10):
        print("User:", user_id)
        df_user = self.df_r[self.df_r["user_id"] == user_id]
        
        if df_user.shape[0] == 0:
            return self.popularity_recommender(n_books)
        
        df_user.sort_values(by="rating", inplace=True)
        book_list = []
        for i in df_user.index:
            book_list.append(self.related_books(book_id=df_user["book_id"][i]))
        return book_list
    
    
    def popularity_recommender(self, n_books):
        # todo
        return [1]

In [42]:
content_recommendation = ContentRecommendation(verbose=1)

Loadind ratings.csv ...
Done
Loadind books.csv ...
Done
Loading matrix ...
Done
Fiting SVD ...
Done
Loading corr ...
Done
Time to init: 63.64938473701477 sec


In [43]:
def show_books(start=0, end=10):
    print("\n", str(" "+str(start)+" ").center(50, "-"))
    content_recommendation.show_books(start, end)
    print(str(" "+str(end)+" ").center(50, "-"), "\n")

def show_related_books(book_id, n_books=10):
    content_recommendation.show_book_title_from_id(book_id)
    book_list = content_recommendation.related_books(book_id=book_id, n_books=n_books)
    print("\n", " 0 ".center(50, "-"))
    for i in book_list:
        content_recommendation.show_book_title_from_id(i)
    print("\n", str(" "+str(n_books)+" ").center(50, "-"))
    
def show_user(user_id):
    content_recommendation.show_books_from_user_id(user_id)

def recommend_user(user_id, n_books=10):
    content_recommendation.show_books_from_user_id(user_id)
    book_list = content_recommendation.recommend_books_from_user_id(user_id, n_books=10)
    i = 0
    while i < len(book_list):
        print(str("related to book " + str(i)).center(50, "-"))
        for x in book_list[i]:
            content_recommendation.show_book_title_from_id(x)
        i += 1

In [44]:
#show_books(0, 10)
#show_related_books(1, n_books=10)
#show_user(2)
recommend_user(1)

User: 1
One Hundred Years of Solitude 1
The Emperor's Children 1
Atlas Shrugged 1
Angela's Ashes (Frank McCourt, #1) 2
The Dante Club 2
Speaker for the Dead (Ender's Saga, #2) 2
Eat, Pray, Love 2
Bel Canto 2
Running with Scissors 2
The Scarlet Letter 2
The Time Traveler's Wife 2
The Namesake 3
Big Little Lies 3
Reading Lolita in Tehran 3
The Secret Life of Bees 3
The Memory Keeper's Daughter 3
Girl with a Pearl Earring 3
Little Women (Little Women, #1) 3
Brunelleschi's Dome: How a Renaissance Genius Reinvented Architecture 3
The Fountainhead 3
The Stone Diaries 3
The Count of Monte Cristo 3
The Shining (The Shining #1) 3
The Lovely Bones 3
Middlesex 3
The Curious Incident of the Dog in the Night-Time 3
The Five People You Meet in Heaven 3
The Red Tent 3
Three Cups of Tea: One Man's Mission to Promote Peace ... One School at a Time 3
Snow Flower and the Secret Fan 3
A Thousand Splendid Suns 3
The Girl with the Dragon Tattoo (Millennium, #1) 3
Stories I Only Tell My Friends 3
The Giving 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.sort_values(by="rating", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.sort_values(by="rating", inplace=True)


The Color Purple 4
Midnight's Children 4
A Man Without a Country 4
Memoirs of a Geisha 4
A Year in Provence 4
The Help 4
The Art of Fielding 4
The Elegance of the Hedgehog 4
Unaccustomed Earth 4
Little Bee 4
The Chronicles of Narnia (Chronicles of Narnia, #1-7) 4
The Grapes of Wrath 4
The Picture of Dorian Gray 4
East of Eden 4
The Poisonwood Bible 4
Mystic River 4
The Last Lecture 4
The Cat's Table 4
World Without End (The Kingsbridge Series, #2) 4
The Invention of Wings 4
Born on a Blue Day: Inside the Extraordinary Mind of an Autistic Savant 4
The Forty Rules of Love 4
Jane Eyre 4
The Brothers Karamazov 4
Water for Elephants 4
Moonwalking with Einstein: The Art and Science of Remembering Everything 4
The Pillars of the Earth (The Kingsbridge Series, #1) 4
Love in the Time of Cholera 4
Year of Wonders 4
The Giver (The Giver, #1) 4
Gone with the Wind 4
People of the Book 4
The Idiot 5
The Death of Ivan Ilych 5
The Shadow of the Wind (The Cemetery of Forgotten Books,  #1) 5
Where the W