# Recommendation System

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation

In [2]:
books=pd.read_csv("/book.csv",index_col=0,encoding="ISO-8859-1")
books.columns=["User_ID","Title","Rating"]
books.head()

Unnamed: 0,User_ID,Title,Rating
1,276726,Classical Mythology,5
2,276729,Clara Callan,3
3,276729,Decision in Normandy,6
4,276736,Flu: The Story of the Great Influenza Pandemic...,8
5,276737,The Mummies of Urumchi,6


In [3]:
books.shape

(10000, 3)

In [4]:
# number of unique users and rows as per recommender system
len(books.User_ID.unique())

2182

In [5]:
# number of unique books and columns as per recommender system
len(books.Title.unique())

9659

In [6]:
# to create crosstab as per recommender system
book= books.pivot_table(index="User_ID",columns="Title",values="Rating").reset_index(drop=True)
book

Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177,,,,,,,,,,,...,,,,,,,,,,
2178,,,,,,,,,,,...,,,,,,,,,,
2179,,,,,,,,,,,...,,,,,,,,7.0,,
2180,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# set unique user-id's from original dataset as index for new created pivot matrix
book.index = books.User_ID.unique()
book.head()

Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
276726,,,,,,,,,,,...,,,,,,,,,,
276729,,,,,,,,,,,...,,,,,,,,,,
276736,,,,,,,,,,,...,,,,,,,,,,
276737,,,,,,,,,,,...,,,,,,,,,,
276744,,,,,,,,,,,...,,,,,,,,,,


In [8]:
#Impute those NaNs with 0 values inplace-within the dataset
book.fillna(0, inplace=True) 
book

Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
276726,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276729,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
162121,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Calculating Cosine Similarity between Users
bok= 1 - pairwise_distances( book.values,metric="cosine")
bok

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [10]:
#Store the results in a dataframe
book1= pd.DataFrame(bok)
#Set the index and column names to user ids 
book1.index = books.User_ID.unique()
book1.columns= books.User_ID.unique()
# similarity scores for customers
book1.iloc[0:5, 0:5]

Unnamed: 0,276726,276729,276736,276737,276744
276726,1.0,0.0,0.0,0.0,0.0
276729,0.0,1.0,0.0,0.0,0.0
276736,0.0,0.0,1.0,0.0,0.0
276737,0.0,0.0,0.0,1.0,0.0
276744,0.0,0.0,0.0,0.0,1.0


In [11]:
# correlation with self is always 1,so replace diagonal values of correlation matrix value with 0.
np.fill_diagonal(bok, 0)
book1.iloc[0:5, 0:5]

Unnamed: 0,276726,276729,276736,276737,276744
276726,0.0,0.0,0.0,0.0,0.0
276729,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,0.0


In [12]:
#Most Similar Users based on columns 
book1.idxmax(axis=1)[0:]

276726    276726
276729    276726
276736    276726
276737    276726
276744    276726
           ...  
162107    276726
162109    276726
162113    161453
162121    276726
162129    276726
Length: 2182, dtype: int64

In [13]:
#Checking Details of 276737 & 276726 Users to Recommend
books[(books["User_ID"]==276737) | (books["User_ID"]==276726)]

Unnamed: 0,User_ID,Title,Rating
1,276726,Classical Mythology,5
5,276737,The Mummies of Urumchi,6


In [14]:
#Creating DataFrame of Two User_id for Join Statement
user1=books[(books["User_ID"]==162113)]
user2=books[(books["User_ID"]==161453)]
print(user1["Title"])
print(user2["Title"])

9990    The Cape Ann (Contemporary American Fiction)
Name: Title, dtype: object
8960    Bread, Tomato, Garlic: Quick Cooking With 3 Ma...
8961    The Ubiquitous Shrimp: From Simple to Exotic, ...
Name: Title, dtype: object


In [15]:
# join books read by both and then we can recommend the ones which have not been read.
pd.merge(user1,user2,on="Title",how='outer')

Unnamed: 0,User_ID_x,Title,Rating_x,User_ID_y,Rating_y
0,162113.0,The Cape Ann (Contemporary American Fiction),8.0,,
1,,"Bread, Tomato, Garlic: Quick Cooking With 3 Ma...",,161453.0,9.0
2,,"The Ubiquitous Shrimp: From Simple to Exotic, ...",,161453.0,8.0
