## Build a recommender system by using cosine simillarties score.

In [40]:
import numpy as np
import pandas as pd

In [41]:
df = pd.read_csv('H:/ExcelR Data Science/datasets_BA/book.csv',encoding='latin-1')
df.head()

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6


In [42]:
df.drop(['Unnamed: 0'],axis=1,inplace=True)

In [43]:
df.head()

Unnamed: 0,User.ID,Book.Title,Book.Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6


In [44]:
# number of rows and columns in df
df.shape

(10000, 3)

In [45]:
# dropping duplicates
df.drop_duplicates(keep=False,inplace=True)

In [46]:
# shape of df after dropping duplicate rows
df.shape

(9996, 3)

In [47]:
df.rename(columns={'User.ID':'user_id','Book.Title':'book_title','Book.Rating':'book_rating'},inplace=True)

In [48]:
df.head()

Unnamed: 0,user_id,book_title,book_rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9996 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   user_id      9996 non-null   int64 
 1   book_title   9996 non-null   object
 2   book_rating  9996 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 312.4+ KB


In [50]:
df['book_rating'].describe()

count    9996.000000
mean        7.566527
std         1.821849
min         1.000000
25%         7.000000
50%         8.000000
75%         9.000000
max        10.000000
Name: book_rating, dtype: float64

In [51]:
#number of unique users in the dataset
len(df['user_id'].unique())

2182

In [52]:
# Number of unique books
len(df['book_title'].unique())

9657

### Making a separate dataframe to keep books which have a rating of 6 and above

In [53]:
df1=df[df['book_rating']>=6]

In [54]:
df1.shape

(8500, 3)

In [55]:
#number of unique users in the dataset
len(df1['user_id'].unique())

1948

In [56]:
# Number of unique books
len(df['book_title'].unique())

9657

In [57]:
df1['book_rating'].describe()

count    8500.000000
mean        8.122941
std         1.285493
min         6.000000
25%         7.000000
50%         8.000000
75%         9.000000
max        10.000000
Name: book_rating, dtype: float64

The dataset contains 9657 unique book titles rated by 1948 unique users.

In [58]:
# Finding the top 10 popular books based on rating frequency
pop_books=df1['book_title'].value_counts()
pop_books[0:10]

The Subtle Knife (His Dark Materials, Book 2)      4
Fahrenheit 451                                     4
Stardust                                           4
The Hitchhiker's Guide to the Galaxy               3
Vanished                                           3
The Golden Compass (His Dark Materials, Book 1)    3
The Neverending Story                              3
The Hero and the Crown                             3
Strangers                                          3
Pride and Prejudice                                3
Name: book_title, dtype: int64

In [59]:
# Finding the average rating of each book
avg_rating=df1[['book_title','book_rating']].groupby(['book_title']).mean()
avg_rating

Unnamed: 0_level_0,book_rating
book_title,Unnamed: 1_level_1
"Jason, Madison &amp",8.0
Other Stories;Merril;1985;McClelland &amp,6.0
'48,7.0
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,8.0
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,7.0
...,...
"\More More More,\ Said the Baby",7.0
\O\ Is for Outlaw,8.0
"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character",8.0
"\Well, there's your problem\: Cartoons",9.0


In [60]:
# Finding the top 10 popular books based on average rating
sort_rating=avg_rating.sort_values(by='book_rating',ascending=False)
sort_rating[0:10]

Unnamed: 0_level_0,book_rating
book_title,Unnamed: 1_level_1
I'm the One That I Want,10.0
Conduct Unbecoming,10.0
Cuentos de Eva Luna,10.0
Cuentos Para Pensar / Stories To Think About,10.0
Cuando El Calzado Es Comodo - Te Olvidas del Pie,10.0
Crystal Flame,10.0
Crystal Cave,10.0
The Senator's Daughter,10.0
Cruel Miracles,10.0
Cruel Doubt,10.0


## Collaborative Filtering

## 1. User-based filtering

In [61]:
# Creating user based pivot table
user_df=pd.pivot_table(df1,values='book_rating',index='user_id',columns='book_title')
user_df.iloc[:,0:10]

book_title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",1001 Brilliant Ways to Checkmate
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8,,,,,,,,,,
9,,,,,,,,,,
10,,,,,,,,,,
12,,,,,,,,,,
14,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
278846,,,,,,,,,,
278849,,,,,,,,,,
278851,,,,,,,,,,
278852,,,,,,,,,,


In [62]:
#Impute those NaNs with 0 values
user_df.fillna(0, inplace=True)

In [63]:
user_df.iloc[:,0:10]

book_title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",1001 Brilliant Ways to Checkmate
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
278846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_score=cosine_similarity(user_df)
similarity_score[0:10]

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [65]:
# Computing a cosine score matrix
cosine_score_df=pd.DataFrame(similarity_score)
cosine_score_df.index=user_df.index
cosine_score_df.columns=user_df.index
cosine_score_df

user_id,8,9,10,12,14,16,17,19,22,26,...,278831,278832,278836,278843,278844,278846,278849,278851,278852,278854
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
278852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [66]:
# Filling diagonal elements with 0s
arr=cosine_score_df.values
np.fill_diagonal(arr, 0)
cosine_score_df

user_id,8,9,10,12,14,16,17,19,22,26,...,278831,278832,278836,278843,278844,278846,278849,278851,278852,278854
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
#Most Similar Users
#idxmax() function returns index of first occurrence of maximum over requested axis
pair=cosine_score_df.idxmax(axis=1)[0:10]
pair

user_id
8          8
9          8
10         8
12         8
14         8
16         8
17         8
19    278418
22         8
26         8
dtype: int64

In [68]:
# creating dataframe containing user-most similar user pair
sim_user_df=pd.DataFrame(pair,columns=['similar_user'])
sim_user_df.iloc[0:10]

Unnamed: 0_level_0,similar_user
user_id,Unnamed: 1_level_1
8,8
9,8
10,8
12,8
14,8
16,8
17,8
19,278418
22,8
26,8


In [69]:
# Checking books rated by user id 6
user_8=df[df['user_id']==8]
user_8  

Unnamed: 0,user_id,book_title,book_rating
2399,8,Ancient Celtic Romances,5
2400,8,The Western way: A practical guide to the West...,5
2401,8,Wings,5
2402,8,Truckers,5
2403,8,Keepers of the Earth Teachers Guide,6
2404,8,The Celts Activity Book,6
2405,8,The Art Of Celtia,7


In [70]:
# Checking books rated by user id 9
user_9=df[df['user_id']==9]
user_9

Unnamed: 0,user_id,book_title,book_rating
2406,9,The Book of Kells: Selected Plates in Full Color,6


In [71]:
# Checking books rated by user id 10
user_10=df[df['user_id']==10]
user_10

Unnamed: 0,user_id,book_title,book_rating
2407,10,SINGLE &amp; SINGLE,6


In [72]:
# Checking books rated by user id 278418
user_278418=df[df['user_id']==278418]
user_278418

Unnamed: 0,user_id,book_title,book_rating
1905,278418,253,5
1906,278418,Brighton Rock,8
1907,278418,Irene's Last Waltz,8
1908,278418,Into Thin Air : A Personal Account of the Mt. ...,6
1909,278418,The Phantom of the Opera (Signet Classics (Pap...,5
...,...,...,...
2017,278418,Third Eye,8
2018,278418,Criminal Intent,9
2019,278418,Exiles Trilogy From Earth,5
2020,278418,HABIBI,3


In [73]:
# Checking books rated by user id 19
user_19=df[df['user_id']==19]
user_19

Unnamed: 0,user_id,book_title,book_rating
2417,19,The Murder Book,7


## 2. Item-based filtering

In [74]:
# Creating item based pivot table
item_df=user_df.T
item_df

user_id,8,9,10,12,14,16,17,19,22,26,...,278831,278832,278836,278843,278844,278846,278849,278851,278852,278854
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Jason, Madison &amp",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Other Stories;Merril;1985;McClelland &amp,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"\More More More,\ Said the Baby",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\O\ Is for Outlaw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Well, there's your problem\: Cartoons",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [75]:
# calculating cosine similarity scores
similarity_score=cosine_similarity(item_df)
cosine_score_df=pd.DataFrame(similarity_score,index=item_df.index,columns=item_df.index)
cosine_score_df.iloc[:,0:10]

book_title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",1001 Brilliant Ways to Checkmate
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Jason, Madison &amp",1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Other Stories;Merril;1985;McClelland &amp,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'48,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
"\More More More,\ Said the Baby",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
\O\ Is for Outlaw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Well, there's your problem\: Cartoons",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [76]:
# Filling diagonal elements with 0s
arr1=cosine_score_df.values
np.fill_diagonal(arr1, 0)
cosine_score_df

book_title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",1001 Brilliant Ways to Checkmate,...,Zombies of the Gene Pool,Zoot Suit and Other Plays,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Jason, Madison &amp",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Other Stories;Merril;1985;McClelland &amp,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"\More More More,\ Said the Baby",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\O\ Is for Outlaw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Well, there's your problem\: Cartoons",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [77]:
#Most Similar Books
sim_books=cosine_score_df.idxmax(axis=1)
sim_books[0:10]

book_title
 Jason, Madison &amp                                                             Other Stories;Merril;1985;McClelland &amp
 Other Stories;Merril;1985;McClelland &amp                                                             Jason, Madison &amp
'48                                                                      Angry Men, Passive Men: Understanding the Root...
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities                                A Child's Night Dream
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR                                              11 Edward Street
01-01-00: A Novel of the Millennium                                             Alistair MacLean's Zeit der AttentÃ?Â¤ter.
1,401 More Things That P*Ss Me Off                                       3 Plays: Our Town, the Skin of Our Teeth, the ...
10 Commandments Of Dating                                                                                  A Time To Dance
100 G

In [78]:
# Creating a dataframe of most similar book pairs
sim_books_df=pd.DataFrame(sim_books,columns=['similar_book'])
sim_books_df.iloc[0:10]

Unnamed: 0_level_0,similar_book
book_title,Unnamed: 1_level_1
"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp
Other Stories;Merril;1985;McClelland &amp,"Jason, Madison &amp"
'48,"Angry Men, Passive Men: Understanding the Root..."
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,A Child's Night Dream
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,11 Edward Street
01-01-00: A Novel of the Millennium,Alistair MacLean's Zeit der AttentÃ?Â¤ter.
"1,401 More Things That P*Ss Me Off","3 Plays: Our Town, the Skin of Our Teeth, the ..."
10 Commandments Of Dating,A Time To Dance
"100 Great Fantasy Short, Short Stories",Canon bang bang
1001 Brilliant Ways to Checkmate,A Chakra &amp; Kundalini Workbook: Psycho-Spir...
