In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
cd /content/gdrive/My Drive/Book recommendations/

/content/gdrive/My Drive/Book recommendations


**Finding users with similar interest**

In [None]:
import pandas as pd
mybooks=pd.read_csv("liked_book1.csv",index_col=0)
mybooks["book_id"]=mybooks["book_id"].astype(str)
mybooks.head()

Unnamed: 0,user_id,book_id,rating,title
0,-1,2517439,5,"The Forever War (The Forever War, #1)"
1,-1,113576,5,The Smartest Guys in the Room: The Amazing Ris...
2,-1,35100,5,Battle Cry of Freedom
3,-1,228221,5,The Mask of Command
5,-1,17662739,5,"2001: A Space Odyssey (Space Odyssey, #1)"


In [None]:
!head book_id_map.csv

book_id_csv,book_id
0,34684622
1,34536488
2,34017076
3,71730
4,30422361
5,33503613
6,33517540
7,34467031
8,6383669


In [None]:
csv_book_mapping={}

#the book id in json file, and book id in interactions csv are different, and so we have a mapping file
with open("book_id_map.csv",'r') as f:
  while True:
    line=f.readline()
    if not line:
      break
    csv_id,book_id=line.strip().split(",")
    csv_book_mapping[csv_id]=book_id
    #storing the id's in dictionary format

In [None]:
book_set=set(mybooks["book_id"])
#converting it set for easy search

In [None]:
!wc -l goodreads_interactions.csv

228648343 goodreads_interactions.csv


In [None]:
overlap_users={}
#key: user id, value: no. of times the user read a book that is in our list

with open("goodreads_interactions.csv") as f:
  while True:
      line=f.readline()
      if not line:
        break

      user_id, csv_id, _, rating, _ = line.strip().split(",")
      # _ - doesn't care about the variable
      book_id =csv_book_mapping.get(csv_id)
      #print(book_id)
      #map it with csv id to get the correct book id in json file
      if book_id in book_set:
        if user_id not in overlap_users:
          overlap_users[user_id]=1
        else:
          overlap_users[user_id]+=1


In [None]:
len(overlap_users)

316341

In [None]:
filtered_overlap_users=set([k for k in overlap_users if overlap_users[k]>mybooks.shape[0]/5])
#filters only users who have read atleast 20% the no. of books that we have in our list

In [None]:
len(filtered_overlap_users)

1258

In [None]:
interactions_list=[]
#will obtain bookid, rating, and userid for the users in the filtered list

with open("goodreads_interactions.csv") as f:
  while True:
    line = f.readline()
    if not line:
      break
    user_id,csv_id,_,rating,_=line.strip().split(",")
    if user_id in filtered_overlap_users:
      book_id=csv_book_mapping[csv_id]
      interactions_list.append([user_id,book_id, rating])

In [None]:
len(interactions_list)

5638701

**Generating user/book matrix**

*Every row of matrix- different user, every column- different book, and cells- ratings that the user gave to that particular book*

In [None]:
interactions_list[0]

['282', '627206', '4']

In [None]:
interactions=pd.DataFrame(interactions_list, columns=["user_id","book_id","rating"])

In [None]:
interactions=pd.concat([mybooks[["user_id","book_id","rating"]],interactions])

In [None]:
interactions["book_id"]=interactions["book_id"].astype(str)
interactions["user_id"]=interactions["user_id"].astype(str)
interactions["rating"]=pd.to_numeric(interactions["rating"])

In [None]:
interactions["user_id"].unique()

array(['-1', '282', '874', ..., '442043', '712588', '804100'],
      dtype=object)

In [None]:
interactions["user_index"]=interactions["user_id"].astype("category").cat.codes
interactions["user_index"].unique()

array([   0,  555, 1216, ..., 1054, 1143, 1183], dtype=int16)

In [None]:
interactions["book_index"]=interactions["book_id"].astype("category").cat.codes
interactions["book_index"].unique()

array([414880,  38971, 575858, ..., 759827, 631564, 552277], dtype=int32)

In [None]:
from scipy.sparse import coo_matrix
#sparse matrix inorder to save memory

ratings_mat_coo=coo_matrix((interactions["rating"],(interactions["user_index"], interactions["book_index"])))
ratings_mat_coo

<1259x802870 sparse matrix of type '<class 'numpy.int64'>'
	with 5638728 stored elements in COOrdinate format>

In [None]:
ratings_mat=ratings_mat_coo.tocsr()
#converting it to csr format of sparse matrix for easy access

In [None]:
interactions[interactions["user_id"]=="-1"]
#to find the user_index of us

Unnamed: 0,user_id,book_id,rating,user_index,book_index
0,-1,2517439,5,0,414880
1,-1,113576,5,0,38971
2,-1,35100,5,0,575858
3,-1,228221,5,0,356004
5,-1,17662739,5,0,214285
6,-1,356824,5,0,581743
7,-1,12125412,5,0,59763
8,-1,139069,5,0,124430
10,-1,76680,5,0,722098
11,-1,1898,5,0,276178


In [None]:
my_index=0

In [None]:
#Finding cosine similarity between two rows,(user(us), and other users) inorder to find which users have more similarity with us
from sklearn.metrics.pairwise import cosine_similarity
similarity =cosine_similarity(ratings_mat[my_index,:],ratings_mat).flatten()

In [None]:
import numpy as np
indices=np.argpartition(similarity,-10)[-10:]
#find positions of users with more similarity(top 10)

In [None]:
similar_users=interactions[interactions["user_index"].isin(indices)].copy()

In [None]:
similar_users=similar_users[similar_users["user_id"]!="-1"]

In [None]:
similar_users

Unnamed: 0,user_id,book_id,rating,user_index,book_index
74831,8640,105576,3,1210,15819
74832,8640,78982,5,1210,731940
74833,8640,6969361,3,1210,697632
74834,8640,105578,1,1210,15828
74835,8640,18490,4,1210,254449
...,...,...,...,...,...
5638521,712588,32388712,3,1143,543119
5638522,712588,16322,5,1143,183365
5638523,712588,860543,0,1143,759827
5638524,712588,853510,5,1143,756768


In [None]:
book_recs=similar_users.groupby("book_id").rating.agg(['count','mean'])
book_recs

Unnamed: 0_level_0,count,mean
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,3.25
100322,1,0.00
100365,1,0.00
10046142,1,0.00
1005,3,0.00
...,...,...
9951089,1,0.00
99561,2,2.50
99664,1,4.00
9969571,2,2.00


**Generating recommendations**

In [None]:
book_titles=pd.read_json("book_titles.json")
book_titles["book_id"]=book_titles["book_id"].astype(str)

In [None]:
book_recs=book_recs.merge(book_titles,how="inner",on="book_id")

In [None]:
book_recs

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title
0,1,4,3.25,Harry Potter and the Half-Blood Prince (Harry ...,1713866,https://www.goodreads.com/book/show/1.Harry_Po...,https://images.gr-assets.com/books/1361039191m...,harry potter and the halfblood prince harry po...
1,100322,1,0.00,Assata: An Autobiography,11057,https://www.goodreads.com/book/show/100322.Assata,https://images.gr-assets.com/books/1328857268m...,assata an autobiography
2,100365,1,0.00,The Mote in God's Eye,48736,https://www.goodreads.com/book/show/100365.The...,https://images.gr-assets.com/books/1399490037m...,the mote in gods eye
3,10046142,1,0.00,Dancing in the Glory of Monsters: The Collapse...,2391,https://www.goodreads.com/book/show/10046142-d...,https://images.gr-assets.com/books/1328757755m...,dancing in the glory of monsters the collapse ...
4,1005,3,0.00,Think and Grow Rich,87634,https://www.goodreads.com/book/show/1005.Think...,https://s.gr-assets.com/assets/nophoto/book/11...,think and grow rich
...,...,...,...,...,...,...,...,...
1961,9951089,1,0.00,"Truth, Beauty, and Goodness Reframed: Educatin...",115,https://www.goodreads.com/book/show/9951089-tr...,https://images.gr-assets.com/books/1328841809m...,truth beauty and goodness reframed educating f...
1962,99561,2,2.50,Looking for Alaska,804587,https://www.goodreads.com/book/show/99561.Look...,https://images.gr-assets.com/books/1394798630m...,looking for alaska
1963,99664,1,4.00,The Painted Veil,24606,https://www.goodreads.com/book/show/99664.The_...,https://images.gr-assets.com/books/1320421719m...,the painted veil
1964,9969571,2,2.00,Ready Player One,376328,https://www.goodreads.com/book/show/9969571-re...,https://images.gr-assets.com/books/1500930947m...,ready player one


In [None]:
#to find books that are specific to us, that is has a good rating among similar users, but not in the whole goodread users
book_recs["adjusted_count"]=book_recs["count"]*(book_recs["count"]/book_recs["ratings"])

In [None]:
book_recs["score"]=book_recs["mean"] * book_recs["adjusted_count"]

In [None]:
#remove books in our liked list
book_recs=book_recs[~book_recs["book_id"].isin(mybooks["book_id"])]

In [None]:
mybooks["mod_title"]=mybooks["title"].str.replace("[^a-zA-Z0-9 ]","",regex=True)

In [None]:
mybooks["mod_title"]=mybooks["mod_title"].str.lower()

In [None]:
mybooks["mod_title"]=mybooks["mod_title"].str.replace("\s+"," ",regex=True)

In [None]:
book_recs=book_recs[~book_recs["mod_title"].isin(mybooks["mod_title"])]

In [None]:
book_recs=book_recs[book_recs["count"]>2]

In [None]:
book_recs=book_recs[book_recs["mean"]>3.5]

In [None]:
top_recs=book_recs.sort_values("score",ascending=False)

In [90]:
top_recs=top_recs.head(10)

**Displaying recommendations**

In [91]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

top_recs.style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title,adjusted_count,score
1769,78983,3,4.333333,"Kane and Abel (Kane and Abel, #1)",75215,Goodreads,,kane and abel kane and abel 1,0.00012,0.000519
1917,9539,3,3.666667,"The Shadow Rising (Wheel of Time, #4)",124571,Goodreads,,the shadow rising wheel of time 4,7.2e-05,0.000265
643,19063,8,4.125,The Book Thief,1193697,Goodreads,,the book thief,5.4e-05,0.000221
1143,33,4,4.5,"The Lord of the Rings (The Lord of the Rings, #1-3)",396933,Goodreads,,the lord of the rings the lord of the rings 13,4e-05,0.000181
801,2318271,3,4.333333,The Last Lecture,245804,Goodreads,,the last lecture,3.7e-05,0.000159
1559,62291,3,5.0,"A Storm of Swords (A Song of Ice and Fire, #3)",477834,Goodreads,,a storm of swords a song of ice and fire 3,1.9e-05,9.4e-05
297,136251,4,3.75,"Harry Potter and the Deathly Hallows (Harry Potter, #7)",1784684,Goodreads,,harry potter and the deathly hallows harry potter 7,9e-06,3.4e-05
