# Show Recommender
> Author: Sharnique Beck

In [1]:
# import Libraries
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances

In [2]:
shows = pd.read_csv('./data/k_titles.csv')
data = pd.read_csv('./data/ratings_data.csv')

In [3]:
# Drop columns that will not be used
shows.drop(columns=['url','container','# ratings'], axis=1,inplace=True)
shows.head()

Unnamed: 0,title,rating
0,Thirty But Seventeen,9.58
1,Fates and Furies,9.14
2,The Last Empress,9.45
3,Encounter,9.59
4,My Strange Hero,9.56


In [4]:
data.head()

Unnamed: 0,rating,title,user,user_id
0,10,Thirty But Seventeen,la_fra,19253
1,10,W,la_fra,19253
2,10,Descendants of the Sun,la_fra,19253
3,10,Fight My Way,la_fra,19253
4,10,Thirty But Seventeen,jyotikababbar999_688,143260


In [5]:
data.shape

(402426, 4)

## Create Pivot table

In [6]:
# Create pivot table to show user rating for each title
pivot = pd.pivot_table(data, index ='title', columns= 'user_id', values='rating')
pivot.head()

user_id,0,1,2,3,4,5,6,7,8,9,...,183640,183641,183642,183643,183644,183645,183646,183647,183648,183649
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
109 Strange Things,,,,,,,,,3.0,,...,,,,,,,,,,
12th Annual Soompi Awards,,,,,,,,,,,...,,,,,,,,,,
2 Days and 1 Night,,,,,,,10.0,,,,...,,,,,,,,,,
2014 Idol Star Athletics Champio...,,,,,,,,,,,...,,,,,,,,,,
2015 Idol Star Athletics Champio...,,,,,,,,,,,...,,,,,,,,,,


## Create Sparse Matrix

In [8]:
# The sparse matrix combines the (title, user and their rating of that title)
# create a sparse matrix and fill empty cells with 0
pivot_sparse = sparse.csr_matrix(pivot.fillna(0))

## Calculate Cosine Similarity

In [11]:
# Compare movies to each other in a square matrix
# Calculate the cosine similarity using pairwise distances
recommender = pairwise_distances(pivot_sparse, metric='cosine')

In [14]:
# Looks at the first 5 movies compared to each other
recommender[:5,:5]

array([[0.        , 1.        , 0.99183537, 1.        , 1.        ],
       [1.        , 0.        , 1.        , 1.        , 1.        ],
       [0.99183537, 1.        , 0.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 0.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.        ]])

In [16]:
# Convert matrix into a Dataframe
recommender_df = pd.DataFrame(recommender, index = pivot.index, columns=pivot.index)
recommender_df.head()

title,109 Strange Things,12th Annual Soompi Awards,2 Days and 1 Night,2014 Idol Star Athletics Champio...,2015 Idol Star Athletics Champio...,2016 DMC Festival,2016 Idol Star Athletics Champio...,2017 Idol Star Athletics Champio...,2017 KBS Drama Awards,2017 KBS Song Festival,...,YoGoBaLa,You Are The Best,You Are Too Much,You Drive Me Crazy,Youn's Kitchen 2,Your Honor,Your House Helper,You’re Beautiful,Yu Huiyeol’s Sketchbook,to. Jenny
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
109 Strange Things,0.0,1.0,0.991835,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,0.993757,1.0,0.949538,1.0,0.966719,0.979238,0.982405,1.0,0.967651
12th Annual Soompi Awards,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2 Days and 1 Night,0.991835,1.0,0.0,1.0,1.0,1.0,0.984146,1.0,1.0,1.0,...,1.0,1.0,1.0,0.976157,0.974806,0.954312,0.986439,0.987525,1.0,0.994297
2014 Idol Star Athletics Champio...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2015 Idol Star Athletics Champio...,1.0,1.0,1.0,1.0,0.0,1.0,0.885266,1.0,1.0,1.0,...,1.0,1.0,1.0,0.968942,1.0,1.0,1.0,1.0,1.0,1.0


## Recommender Results

In [30]:
# system for titles with the given word and display top 10 matches
search = "Flowers"
for t in shows[shows['title'].str.contains(search)]['title'].values:
    print(t)
    print('Average Rating', data[data['title']==t]['rating'].mean())
    print('Number of Ratings', data[data['title']==t]['rating'].shape[0])
    print(' ')
    print('Recommendations:')
    print(recommender_df[t].sort_values()[1:11])
    print(' ')
    print('***********************************************************')
    

Boys Over Flowers
Average Rating 9.40739289446186
Number of Ratings 7656
 
Recommendations:
title
Playful Kiss                  0.843600
My Love From the Star         0.890176
You’re Beautiful              0.891456
Pinocchio                     0.895207
The Legend of the Blue Sea    0.902742
Strong Woman Do Bong Soon     0.907004
Secret Garden                 0.907130
Oh My Venus                   0.907468
Coffee Prince                 0.908676
Fated to Love You             0.912778
Name: Boys Over Flowers, dtype: float64
 
***********************************************************
