# Imports:

In [77]:
import numpy as np
import pandas as pd 

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances

**Read in the data:**

In [78]:
recommend = pd.read_csv('data/recommend.csv')

In [79]:
recommend.head()

Unnamed: 0,user_id,rating,title
0,29403,5,Where the Wild Things Are
1,37112,5,Harry Potter and the Deathly Hallows (Harry Po...
2,16114,4,Sphere
3,50684,4,"The Desert Spear (Demon Cycle, #2)"
4,43944,3,"Y: The Last Man, Vol. 1: Unmanned"


I am creating an item-based (the books) collaborative recommender. I will set up my pivot table in the following way:

- The title will be the index
- The user_id will be the column
- The rating will be the value

In [80]:
recommend_piv = pd.pivot_table(
    recommend,
    values='rating',
    index='title',
    columns='user_id',
    fill_value=0
)

recommend_piv.head()

user_id,1,2,3,4,5,7,8,9,10,11,...,53413,53414,53415,53416,53417,53418,53419,53421,53422,53423
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Angels (Walsh Family, #3)",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
#GIRLBOSS,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
'Salem's Lot,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"'Tis (Frank McCourt, #2)",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [81]:
recommend_piv.shape

(8018, 42659)

Create a sparse matrix:

In [82]:
recommend_sparse = sparse.csr_matrix(recommend_piv)

Calculate Cosine Similarity:

In [83]:
# Remember - a distance of 1 is a similarity of 0.
distance = pairwise_distances(recommend_sparse, metric='cosine')

In [84]:
distance.shape

(8018, 8018)

In [100]:
np.round(distance[:10, :10], 2)

array([[0., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 0., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 0., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 0., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 0., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 0., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 0., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 0., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 0.]])

Create the recommender DataFrame

In [86]:
titles = recommend_piv.index
titles

Index([' Angels (Walsh Family, #3)', '#GIRLBOSS', ''Salem's Lot',
       ''Tis (Frank McCourt, #2)',
       '10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works',
       '100 Bullets, Vol. 1: First Shot, Last Call', '100 Love Sonnets',
       '100 Selected Poems', '10th Anniversary (Women's Murder Club, #10)',
       '11 Birthdays (Willow Falls, #1)',
       ...
       'Zero to One: Notes on Startups, or How to Build the Future',
       'Zita the Spacegirl (Zita the Spacegirl, #1)', 'Zodiac', 'Zone One',
       'Zorba the Greek', 'Zorro',
       'for colored girls who have considered suicide/when the rainbow is enuf',
       'god is Not Great: How Religion Poisons Everything',
       'ttyl (Internet Girls, #1)', 'xxxHolic, Vol. 1 (xxxHOLiC, #1)'],
      dtype='object', name='title', length=8018)

In [88]:
recommend_df = pd.DataFrame(distance, columns=titles, index=titles)
recommend_df.head()

title,"Angels (Walsh Family, #3)",#GIRLBOSS,'Salem's Lot,"'Tis (Frank McCourt, #2)","10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works","100 Bullets, Vol. 1: First Shot, Last Call",100 Love Sonnets,100 Selected Poems,"10th Anniversary (Women's Murder Club, #10)","11 Birthdays (Willow Falls, #1)",...,"Zero to One: Notes on Startups, or How to Build the Future","Zita the Spacegirl (Zita the Spacegirl, #1)",Zodiac,Zone One,Zorba the Greek,Zorro,for colored girls who have considered suicide/when the rainbow is enuf,god is Not Great: How Religion Poisons Everything,"ttyl (Internet Girls, #1)","xxxHolic, Vol. 1 (xxxHOLiC, #1)"
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Angels (Walsh Family, #3)",0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
#GIRLBOSS,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
'Salem's Lot,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"'Tis (Frank McCourt, #2)",1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works",1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Try it out...

In [89]:
titles[titles.str.lower().str.contains('a court of')]

Index(['A Court of Mist and Fury (A Court of Thorns and Roses, #2)',
       'A Court of Thorns and Roses (A Court of Thorns and Roses, #1)',
       'A Court of Wings and Ruin (A Court of Thorns and Roses, #3)'],
      dtype='object', name='title')

In [90]:
recommend_df['A Court of Thorns and Roses (A Court of Thorns and Roses, #1)'].sort_values().head(6)

title
A Court of Thorns and Roses (A Court of Thorns and Roses, #1)           0.000000
Dawn of the Dreadfuls (Pride and Prejudice and Zombies, #0.5)           0.822441
Percy Jackson's Greek Gods (A Percy Jackson and the Olympians Guide)    0.847277
Nimona                                                                  0.890388
A Gathering of Shadows (Shades of Magic, #2)                            0.895949
The Crown of Embers (Fire and Thorns, #2)                               0.899734
Name: A Court of Thorns and Roses (A Court of Thorns and Roses, #1), dtype: float64

In [91]:
recommend_df['A Court of Mist and Fury (A Court of Thorns and Roses, #2)'].sort_values().head(6)

title
A Court of Mist and Fury (A Court of Thorns and Roses, #2)    0.000000
Maybe Not (Maybe, #1.5)                                       0.817848
Alice's Adventures in Wonderland & Other Stories              0.846415
Origin (Lux, #4)                                              0.892237
Lumberjanes, Vol. 1: Beware the Kitten Holy                   0.902381
Spell Bound (Hex Hall, #3)                                    0.925400
Name: A Court of Mist and Fury (A Court of Thorns and Roses, #2), dtype: float64

In [94]:
titles[titles.str.lower().str.contains('harry')]

Index(['A Darkness More Than Night (Harry Bosch, #7; Terry McCaleb, #2; Harry Bosch Universe, #9)',
       'Angels Flight (Harry Bosch, #6; Harry Bosch Universe, #7)',
       'Blood Work (Harry Bosch Universe, #8; Terry McCaleb #1)',
       'Chasing Harry Winston',
       'City of Bones (Harry Bosch, #8; Harry Bosch Universe, #10)',
       'Cockroaches (Harry Hole, #2)',
       'Echo Park (Harry Bosch, #12; Harry Bosch Universe, #14)',
       'Harry Potter Boxed Set, Books 1-5 (Harry Potter, #1-5)',
       'Harry Potter Boxset (Harry Potter, #1-7)',
       'Harry Potter Collection (Harry Potter, #1-6)',
       'Harry Potter Schoolbooks Box Set: Two Classic Books from the Library of Hogwarts School of Witchcraft and Wizardry',
       'Harry Potter and the Chamber of Secrets (Harry Potter, #2)',
       'Harry Potter and the Cursed Child - Parts One and Two (Harry Potter, #8)',
       'Harry Potter and the Deathly Hallows (Harry Potter, #7)',
       'Harry Potter and the Goblet of Fire (H

In [95]:
recommend_df["Harry Potter and the Sorcerer's Stone (Harry Potter, #1)"].sort_values().head(6)

title
Harry Potter and the Sorcerer's Stone (Harry Potter, #1)    0.000000
Zita the Spacegirl (Zita the Spacegirl, #1)                 0.913395
Nigella Express: Good Food, Fast                            0.938139
The Alexandria Link (Cotton Malone, #2)                     0.941637
Child of the Prophecy (Sevenwaters, #3)                     0.946955
The Expats                                                  0.950512
Name: Harry Potter and the Sorcerer's Stone (Harry Potter, #1), dtype: float64

In [96]:
titles[titles.str.lower().str.contains('outlander')]

Index(['A Breath of Snow and Ashes (Outlander, #6)',
       'An Echo in the Bone (Outlander, #7)',
       'Dragonfly in Amber (Outlander, #2)', 'Drums of Autumn (Outlander, #4)',
       'Outlander (Outlander, #1)', 'The Fiery Cross (Outlander, #5)',
       'The Space Between (Outlander, #7.5)', 'Voyager (Outlander, #3)',
       'Written in My Own Heart's Blood (Outlander, #8)'],
      dtype='object', name='title')

In [98]:
recommend_df["Outlander (Outlander, #1)"].sort_values().head(10)

title
Outlander (Outlander, #1)                                                                                    0.000000
The Commitments                                                                                              0.891255
Proper Gauge (Wool, #2)                                                                                      0.903882
Feeling Good: The New Mood Therapy                                                                           0.906752
The Mermaids Singing (Tony Hill & Carol Jordan, #1)                                                          0.915084
InuYasha: Turning Back Time (InuYasha, #1)                                                                   0.924944
The Eight (The Eight #1)                                                                                     0.937216
Morning, Noon & Night                                                                                        0.937812
These Is My Words: The Diary of Sarah Agnes Prine,