# Recommendations

In [1]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
# copy files to runtime
!cp /content/drive/MyDrive/Colab_Notebooks/Projects/books_recommendation/datasets/goodreads_interactions.csv .
!cp /content/drive/MyDrive/Colab_Notebooks/Projects/books_recommendation/datasets/book_id_map.csv .
!cp /content/drive/MyDrive/Colab_Notebooks/Projects/books_recommendation/books_titles.json .

In [4]:
# list of liked books created by using search engine built earlier
liked_books = ["4408", "31147619", "29983711", "9401317", "9317691", "8153988", "20494944"]

In [5]:
liked_books

['4408', '31147619', '29983711', '9401317', '9317691', '8153988', '20494944']

In [6]:
# let's looks at book_id_map.csv
!head book_id_map.csv

book_id_csv,book_id
0,34684622
1,34536488
2,34017076
3,71730
4,30422361
5,33503613
6,33517540
7,34467031
8,6383669


In [7]:
# Book_id column in 2 dataset files are different - let's map those using book_id_map.csv

csv_book_mapping = {}

with open("book_id_map.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",")
        csv_book_mapping[csv_id] = book_id

In [8]:
csv_book_mapping['0']

'34684622'

In [9]:
# number of book mappings
len(csv_book_mapping)

2360651

In [10]:
!wc -l goodreads_interactions.csv

228648343 goodreads_interactions.csv


In [11]:
!ls -lh goodreads_interactions.csv

-rw------- 1 root root 4.1G Feb 24 00:13 goodreads_interactions.csv


In [12]:
!head goodreads_interactions.csv

user_id,book_id,is_read,rating,is_reviewed
0,948,1,5,0
0,947,1,5,1
0,946,1,5,0
0,945,1,5,0
0,944,1,5,0
0,943,1,5,0
0,942,1,5,0
0,941,1,5,0
0,940,1,5,0


### finding users who like same books as us

In [13]:
#  use streaming method to load data

overlap_users = set()

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in overlap_users:
            continue

        try:
            rating = int(rating)
        except ValueError:
            continue
        
        book_id = csv_book_mapping[csv_id]
        
        if book_id in liked_books and rating >= 4:
                overlap_users.add(user_id)

In [14]:
len(overlap_users)

2029

In [15]:
# find what books those users liked

rec_lines = []

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in overlap_users:
            book_id = csv_book_mapping[csv_id]
            rec_lines.append([user_id, book_id, rating])

In [16]:
len(rec_lines)

1530257

In [17]:
# convert to dataframe

import pandas as pd

recs = pd.DataFrame(rec_lines, columns=["user_id", "book_id", "rating"])
recs["book_id"] = recs["book_id"].astype(str)

In [18]:
recs

Unnamed: 0,user_id,book_id,rating
0,284,977284,3
1,284,890054,4
2,284,837153,3
3,284,1586480,4
4,284,41814,5
...,...,...,...
1530252,873216,127455,0
1530253,873216,10365343,0
1530254,873216,16131077,0
1530255,873216,18781576,0


In [19]:
top_recs = recs["book_id"].value_counts().head(10)

In [20]:
top_recs = top_recs.index.values

In [23]:
books_titles = pd.read_json("books_titles.json")
books_titles["book_id"] = books_titles["book_id"].astype(str)

In [24]:
books_titles.head()

Unnamed: 0,book_id,title,ratings,url,image_url,mod_title
0,7327624,"The Unschooled Wizard (Sun Wolf and Starhawk, ...",140,https://www.goodreads.com/book/show/7327624-th...,https://images.gr-assets.com/books/1304100136m...,the unschooled wizard sun wolf and starhawk 12
1,6066819,Best Friends Forever,51184,https://www.goodreads.com/book/show/6066819-be...,https://s.gr-assets.com/assets/nophoto/book/11...,best friends forever
2,287141,The Aeneid for Boys and Girls,46,https://www.goodreads.com/book/show/287141.The...,https://s.gr-assets.com/assets/nophoto/book/11...,the aeneid for boys and girls
3,6066812,All's Fairy in Love and War (Avalon: Web of Ma...,98,https://www.goodreads.com/book/show/6066812-al...,https://images.gr-assets.com/books/1316637798m...,alls fairy in love and war avalon web of magic 8
4,287149,The Devil's Notebook,986,https://www.goodreads.com/book/show/287149.The...,https://images.gr-assets.com/books/1328768789m...,the devils notebook


### creating initial books recommendations

In [25]:
books_titles[books_titles["book_id"].isin(top_recs)]

Unnamed: 0,book_id,title,ratings,url,image_url,mod_title
53027,77203,The Kite Runner,1848782,https://www.goodreads.com/book/show/77203.The_...,https://images.gr-assets.com/books/1484565687m...,the kite runner
284473,2767052,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
401395,5107,The Catcher in the Rye,2086945,https://www.goodreads.com/book/show/5107.The_C...,https://images.gr-assets.com/books/1398034300m...,the catcher in the rye
463463,4671,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...,the great gatsby
615314,5470,1984,2023937,https://www.goodreads.com/book/show/5470.1984,https://images.gr-assets.com/books/1348990566m...,1984
757376,38447,The Handmaid's Tale,648783,https://www.goodreads.com/book/show/38447.The_...,https://images.gr-assets.com/books/1498057733m...,the handmaids tale
790927,2657,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
878151,18143977,All the Light We Cannot See,498685,https://www.goodreads.com/book/show/18143977-a...,https://images.gr-assets.com/books/1451445646m...,all the light we cannot see
878545,3,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
1062354,29983711,Pachinko,8161,https://www.goodreads.com/book/show/29983711-p...,https://images.gr-assets.com/books/1462393298m...,pachinko


In [26]:
top_recs

array(['2767052', '29983711', '2657', '3', '4671', '18143977', '38447',
       '5470', '77203', '5107'], dtype=object)

### improving our book recommendations

In [27]:
all_recs = recs["book_id"].value_counts()

In [28]:
all_recs

2767052     1092
29983711    1089
2657        1074
3           1048
4671        1028
            ... 
21843400       1
18595019       1
22514204       1
22733082       1
18781576       1
Name: book_id, Length: 364169, dtype: int64

In [29]:
all_recs = all_recs.to_frame().reset_index()
all_recs.columns = ["book_id", "book_count"]

In [30]:
all_recs

Unnamed: 0,book_id,book_count
0,2767052,1092
1,29983711,1089
2,2657,1074
3,3,1048
4,4671,1028
...,...,...
364164,21843400,1
364165,18595019,1
364166,22514204,1
364167,22733082,1


In [31]:
all_recs.head(5)

Unnamed: 0,book_id,book_count
0,2767052,1092
1,29983711,1089
2,2657,1074
3,3,1048
4,4671,1028


In [32]:
# merge

all_recs = all_recs.merge(books_titles, how="inner", on="book_id")

In [33]:
all_recs

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title
0,2767052,1092,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
1,29983711,1089,Pachinko,8161,https://www.goodreads.com/book/show/29983711-p...,https://images.gr-assets.com/books/1462393298m...,pachinko
2,2657,1074,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
3,3,1048,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
4,4671,1028,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...,the great gatsby
...,...,...,...,...,...,...,...
328338,22707746,1,Names Can Never Hurt Me,297,https://www.goodreads.com/book/show/22707746-n...,https://images.gr-assets.com/books/1405051347m...,names can never hurt me
328339,21843400,1,Blackbird Knitting in a Bunny's Lair (Granby K...,604,https://www.goodreads.com/book/show/21843400-b...,https://images.gr-assets.com/books/1396575651m...,blackbird knitting in a bunnys lair granby kni...
328340,18595019,1,Bar None,25,https://www.goodreads.com/book/show/18595019-b...,https://images.gr-assets.com/books/1380480671m...,bar none
328341,22514204,1,Unexpected Trust (Unexpected #2),121,https://www.goodreads.com/book/show/22514204-u...,https://images.gr-assets.com/books/1403721300m...,unexpected trust unexpected 2


In [34]:
# create score
all_recs["score"] = all_recs["book_count"] * (all_recs["book_count"] / all_recs["ratings"])

In [35]:
all_recs.sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title,score
1,29983711,1089,Pachinko,8161,https://www.goodreads.com/book/show/29983711-p...,https://images.gr-assets.com/books/1462393298m...,pachinko,145.315648
238,4408,327,East of Eden,3447,https://www.goodreads.com/book/show/4408.East_...,https://images.gr-assets.com/books/1323882457m...,east of eden,31.020888
724,9317691,175,The Name of the Wind (The Kingkiller Chronicle...,1043,https://www.goodreads.com/book/show/9317691-th...,https://images.gr-assets.com/books/1360558233m...,the name of the wind the kingkiller chronicle 1,29.362416
236,32920226,328,"Sing, Unburied, Sing",4592,https://www.goodreads.com/book/show/32920226-s...,https://images.gr-assets.com/books/1499340866m...,sing unburied sing,23.428571
216,30753987,342,The Leavers,5602,https://www.goodreads.com/book/show/30753987-t...,https://images.gr-assets.com/books/1489158974m...,the leavers,20.878972
7617,26856502,27,"Vengeful (Villains, #2)",35,https://www.goodreads.com/book/show/26856502-v...,https://s.gr-assets.com/assets/nophoto/book/11...,vengeful villains 2,20.828571
1287,31147619,118,Homegoing,697,https://www.goodreads.com/book/show/31147619-h...,https://images.gr-assets.com/books/1491119004m...,homegoing,19.977044
5517,34927828,37,The Great Alone,70,https://www.goodreads.com/book/show/34927828-t...,https://images.gr-assets.com/books/1501852384m...,the great alone,19.557143
249,8153988,322,"The Eye of the World (Wheel of Time, #1)",5740,https://www.goodreads.com/book/show/8153988-th...,https://images.gr-assets.com/books/1465920672m...,the eye of the world wheel of time 1,18.063415
6011,35099035,34,Red Clocks,67,https://www.goodreads.com/book/show/35099035-r...,https://images.gr-assets.com/books/1494345016m...,red clocks,17.253731


In [36]:
all_recs[all_recs["book_count"] > 200].sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title,score
1,29983711,1089,Pachinko,8161,https://www.goodreads.com/book/show/29983711-p...,https://images.gr-assets.com/books/1462393298m...,pachinko,145.315648
238,4408,327,East of Eden,3447,https://www.goodreads.com/book/show/4408.East_...,https://images.gr-assets.com/books/1323882457m...,east of eden,31.020888
236,32920226,328,"Sing, Unburied, Sing",4592,https://www.goodreads.com/book/show/32920226-s...,https://images.gr-assets.com/books/1499340866m...,sing unburied sing,23.428571
216,30753987,342,The Leavers,5602,https://www.goodreads.com/book/show/30753987-t...,https://images.gr-assets.com/books/1489158974m...,the leavers,20.878972
249,8153988,322,"The Eye of the World (Wheel of Time, #1)",5740,https://www.goodreads.com/book/show/8153988-th...,https://images.gr-assets.com/books/1465920672m...,the eye of the world wheel of time 1,18.063415
441,33253215,236,The Heart's Invisible Furies,3629,https://www.goodreads.com/book/show/33253215-t...,https://images.gr-assets.com/books/1490803456m...,the hearts invisible furies,15.347479
71,30688435,533,Exit West,21378,https://www.goodreads.com/book/show/30688435-e...,https://images.gr-assets.com/books/1477324680m...,exit west,13.288848
251,32283423,321,American War,7776,https://www.goodreads.com/book/show/32283423-a...,https://images.gr-assets.com/books/1481494946m...,american war,13.251157
228,26025588,335,Behold the Dreamers,8793,https://www.goodreads.com/book/show/26025588-b...,https://images.gr-assets.com/books/1439643293m...,behold the dreamers,12.762993
345,17912498,266,The Queen of the Night,6115,https://www.goodreads.com/book/show/17912498-t...,https://images.gr-assets.com/books/1460425080m...,the queen of the night,11.570891


In [37]:
popular_recs = all_recs[all_recs["book_count"] > 200].sort_values("score", ascending=False)

In [38]:
# create functions

def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)


popular_recs[~popular_recs["book_id"].isin(liked_books)].head(10).style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title,score
236,32920226,328,"Sing, Unburied, Sing",4592,Goodreads,https://images.gr-assets.com/books/1499340866m/32920226.jpg,sing unburied sing,23.428571
216,30753987,342,The Leavers,5602,Goodreads,https://images.gr-assets.com/books/1489158974m/30753987.jpg,the leavers,20.878972
441,33253215,236,The Heart's Invisible Furies,3629,Goodreads,https://images.gr-assets.com/books/1490803456m/33253215.jpg,the hearts invisible furies,15.347479
71,30688435,533,Exit West,21378,Goodreads,https://images.gr-assets.com/books/1477324680m/30688435.jpg,exit west,13.288848
251,32283423,321,American War,7776,Goodreads,https://images.gr-assets.com/books/1481494946m/32283423.jpg,american war,13.251157
228,26025588,335,Behold the Dreamers,8793,Goodreads,https://images.gr-assets.com/books/1439643293m/26025588.jpg,behold the dreamers,12.762993
345,17912498,266,The Queen of the Night,6115,Goodreads,https://images.gr-assets.com/books/1460425080m/17912498.jpg,the queen of the night,11.570891
325,28114515,276,The Wangs vs. the World,7044,Goodreads,https://images.gr-assets.com/books/1451493647m/28114515.jpg,the wangs vs the world,10.81431
108,34273236,459,Little Fires Everywhere,21135,Goodreads,https://images.gr-assets.com/books/1490351351m/34273236.jpg,little fires everywhere,9.968346
104,28815371,467,The Mothers,22346,Goodreads,https://images.gr-assets.com/books/1460652458m/28815371.jpg,the mothers,9.759644


### Wrap up and next steps

1. collaborative filtering -- to improve recommendations