In [1]:
import pandas as pd
import numpy as np

In [2]:
class Graph(object):
    def __init__(self, reads):

        # Edges
        self.reads = reads


    def _find_a_user(self, input_User, debug=False):

        
        #Find a user in the graph N hops from an author in the user's list of read authors
        _author_list = input_User.author_list
        if debug:
            print ("Method: _find_N_user : `_author_list`: ", _author_list)
        
        _n_authors = len(_author_list)
        if _n_authors > 0:
            # Pick any author
            _reader_list = None
            while _reader_list == None:
                _next_author = _author_list[np.random.randint(_n_authors)] #Inclusive, random integer
                if debug:
                    print ("Method: _find_N_user : `_next_author` : ", _next_author)
                if len(_next_author.reader_list) > 1: 
                    _reader_list = _next_author.reader_list # readers for that particular author

            _next_User = None
            while _next_User == None:
                _choice = _reader_list[np.random.randint(len(_reader_list))]
                if _choice != input_User:
                    _next_User = _choice # Don't pick the user himself
            if debug:
                print ("Method: _find_N_user : `_next_User`: ", _next_User)
            return _next_User 
        else:
            return None

    def _book2book(self, input_Book, N=3 , debug=False):

        # Get similar, unpopular books recommended (not user based). 
        # Simply input a book, iterate Author tree, find a random user, and their unpopular book.
        def _sort_tuple(tuple_val):
            
            #sorting unread_list based on popularity [(book, popularity)..]
            return tuple_val[1]

        out_recs = []
        for i in range(N):
            _reader_list = input_Book.reader_list
            _len_rl = len(_reader_list)
            _rand_User = _reader_list[np.random.randint(_len_rl)]
            _list = [(book, book.popularity, rating) for book, rating in _rand_User.shelf
                    if rating > 4] #NB; No filtering on read books, as no input user.
            _list = sorted(_list, key=_sort_tuple, reverse=False)
            unpopular_book, popularity, rating = _list[0]
            out_recs.append(unpopular_book)

        return out_recs

    def _find_a_book(self, input_User, two_hop=False, debug=False):
        
        # Recommend book to user
        # For a particular user, go to an Author node, get another user for that author, and compare his book list
        # with the i/p user
        
        def _sort_tuple(tuple_val):
            
            #sorting unread_list based on popularity [(book, popularity)..]
            return tuple_val[1]

        if debug:
            print ("Method: _find_a_book : `input_User`: ", input_User)

        _next_User = self._find_a_user(input_User, debug=debug)

        if two_hop:
            try:
                _two_hop = self._find_a_user(_next_User, debug)
                _next_User = _two_hop if _two_hop != input_User else _next_User
            except Exception as e:
                if debug:
                    print ("Method: _find_a_book : Exception at `two_hop`: ", input_User, e)

        if debug:
            print ("Method: _find_a_book : `_next_User`: ", _next_User)

        counter= 0
        while counter < 100:
            counter+=1

            # Check the books that user2 has read and user1 hasn't which are rated above 4 stars
            try:
                _unread_list = [(book, book.popularity, rating) for book, rating in _next_User.shelf
                                if book not in [_books for _books, _rating in input_User.shelf] and rating > 4]
                _n_unread = len(_unread_list)
                
            except Exception as e:
                print ("Method: _find_a_book : `_unread_list` threw an exception: ", _next_User, e)

            # Sort this unsorted list of books in ascending order
            try:
                _unread_list = sorted(_unread_list, key=_sort_tuple, reverse=False)

                if debug:
                    if _n_unread > 1:
                        print ("Method: _find_a_book : Most unpopular book title, popularity, and rating ",
                               _unread_list[0][0].book_id, _unread_list[0][1])
                        print ("Method: _find_a_book : Most popular book title and popularity ",
                               _unread_list[_n_unread-1][0].book_id, _unread_list[_n_unread-1][1])
                    else:
                        print ("Method: _find_a_book : Most unpopular book title and popularity ",
                               _unread_list[0][0].book_id, _unread_list[0][1])
            except Exception as e:
                if debug:
                    print ("Method: _find_a_book : `_unread_list` sorting threw an exception: ", e)

            # Return the first book as it is the best
            unpopular_book, popularity, rating = _unread_list[0]
            if unpopular_book != None:
                return unpopular_book

        return None

    def GrabNBooks(self, input_User, N=3, debug=False):

        RareBooks = []
        counter = 0
        while counter < 100:
            _book = self._find_a_book(input_User, debug=debug)
            RareBooks.append(_book)
            if len(RareBooks) == N:
                return RareBooks
            else:
                counter+=1
        return None


In [3]:
class User(object):
    def __init__(self,user_id):
        self.user_id = user_id
        self.shelf = [] # Books read
        self.author_list = [] # Authors read


In [4]:
class Book(object):
    def __init__(self, book_id, Author, ratings_5, popularity, image_url):
        self.book_id = book_id
        self.author = Author
        self.author_id = Author.author_id
        self.ratings_5 = ratings_5 # Number of people that rated the book a 5
        self.popularity = popularity # What fraction of ratings does this book have?+
        self.image_url = image_url
        self.reader_list = [] #Users that read the book

    def add_reader(self,User):
        if User not in self.reader_list:
            self.reader_list.append(User) # User read this book


In [5]:
class Author(object):
    def __init__(self, author_id):
            self.author_id = author_id
            self.reader_list = [] #People who read the book

    def add_reader(self,User):
        if User not in self.reader_list:
            self.reader_list.append(User) # User read this book


In [6]:
class Read(object):
    def __init__(self, User, Book, Author, rating=None):

        if Book not in User.shelf:
            User.shelf.append((Book, rating)) # User read this book and rated it.
        if Author not in User.author_list:
            User.author_list.append(Author)

        self.user = User
        self.book = Book
        self.author = Author
        self.rating = rating # Optional

        Book.add_reader(User)
        Author.add_reader(User)


In [9]:
def BuildGraph():

    #Contains nodes User, Book, Author
    
    
    uir = pd.read_csv("../dataset/ratings.csv")
    books = pd.read_csv("../dataset/books.csv")
    
#     uir = pd.read_csv("./data/goodbooks-10k-master/ratings.csv")
#     books = pd.read_csv("./data/goodbooks-10k-master/books.csv")
    
    books = books[(books["language_code"] == "eng") | (books["language_code"] == "en-US")]
    books["author_id"] = (books["authors"].astype("category")).cat.codes # Gives us an index

    books["popularity_ratings"] = np.array(books["ratings_count"])/np.sum(books["ratings_count"])
    books["popularity_text_reviews"] = np.array(books["work_text_reviews_count"])/np.sum(books["work_text_reviews_count"])
    books["popularity_ratings5" ] = np.array(books["ratings_5"])/np.sum(books["ratings_5"])

    #Join the two DataFrames together
    uir = pd.merge(uir, books[["book_id", "original_title",
                               "author_id","popularity_ratings","ratings_5", "image_url"]], on=["book_id"])

    #Author Objects
    unique_authors = uir[["author_id"]].drop_duplicates()
    unique_authors["Author"] = [Author(aid) for aid in unique_authors["author_id"]]
    unique_authors = unique_authors.set_index("author_id", drop=True)

    #User Objects
    unique_users = uir[["user_id"]].drop_duplicates()
    unique_users["User"] = [User(uid) for uid in unique_users["user_id"]]
    unique_users = unique_users.set_index("user_id", drop=True)

    user_dict = unique_users.to_dict("index")
    author_dict =  unique_authors.to_dict("index")

    unique_books = uir[["book_id", "original_title", "author_id", "ratings_5", "popularity_ratings",
                        "image_url"]].drop_duplicates()
    unique_books["Book"] = [Book(bid, author_dict[aid]["Author"], rat, pop, url) for bid, aid, rat, pop, url
                            in unique_books[
                                ["book_id", "author_id", "ratings_5", "popularity_ratings", "image_url"]].values]


    _unique_books = unique_books.set_index("book_id", drop=True)
    _unique_books = _unique_books.drop(["author_id", "ratings_5", "popularity_ratings", "image_url"], axis=1)
    book_dict = _unique_books.to_dict("index")

    _unique_titles = unique_books.copy()
    _unique_titles["original_title"] = _unique_titles["original_title"].str.lower()
    _unique_titles = _unique_titles.drop(["author_id", "book_id", "ratings_5", "popularity_ratings", "image_url"],axis=1) 
    _unique_titles = _unique_titles.drop_duplicates("original_title").dropna()
    _unique_titles = _unique_titles.set_index("original_title", drop=True)
    titles_dict = _unique_titles.to_dict("index")

    read_list = [Read(user_dict[u]["User"], book_dict[b]["Book"], author_dict[a]["Author"], rating=int(r))
               for u, b, a, r in uir[["user_id","book_id","author_id", "rating"]].values]

    BigGraph = Graph(read_list) 

    return BigGraph, titles_dict


In [10]:
import time

start_time = time.time()
BigGraph, titles_dict = BuildGraph()
print("--- %s seconds ---" % (time.time() - start_time))

--- 406.46858501434326 seconds ---


In [11]:
title = "The Hunger Games"
book_object = titles_dict[title.lower()]["Book"]
book_list = BigGraph._book2book(book_object, N=3)
book = book_list[0]

print(book_object)
print("----------------------------------------------------")
print(book_list)
print("----------------------------------------------------")
print(book)

<__main__.Book object at 0x0000019C0DACA0C8>
----------------------------------------------------
[<__main__.Book object at 0x0000019C0DAA4788>, <__main__.Book object at 0x0000019C0DA84E08>, <__main__.Book object at 0x0000019C0D98E8C8>]
----------------------------------------------------
<__main__.Book object at 0x0000019C0DAA4788>
