In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('song_data.csv')

In [3]:
df.groupby(['artist']).user_id.nunique().sort_values(ascending=False).head()

artist
Kings Of Leon             7373
Coldplay                  6340
Harmonia                  5970
Björk                     5834
Florence + The Machine    5615
Name: user_id, dtype: int64

In [4]:
df.groupby(['artist']).listen_count.sum().sort_values(ascending=False).head()

artist
Kings Of Leon             43218
Dwight Yoakam             40619
Björk                     38889
Coldplay                  35362
Florence + The Machine    33387
Name: listen_count, dtype: int64

In [5]:
df.groupby(['artist']).listen_count.sum().sort_values(ascending=True).head()

artist
William Tabbert                      14
Reel Feelings                        24
Beyoncé feat. Bun B and Slim Thug    26
Boggle Karaoke                       30
Diplo                                30
Name: listen_count, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, jaccard_similarity_score
from sklearn.externals import joblib

In [7]:
song_grouped = df.groupby(['song']).agg({'listen_count': 'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage']  = song_grouped['listen_count'].div(grouped_sum)*100
song_grouped.sort_values(['listen_count', 'song'], ascending = [0,1])

Unnamed: 0,song,listen_count,percentage
7122,Sehr kosmisch - Harmonia,5970,0.534654
9077,Undo - Björk,5281,0.472950
9873,You\'re The One - Dwight Yoakam,4806,0.430410
2062,Dog Days Are Over (Radio Edit) - Florence + Th...,4536,0.406230
6769,Revelry - Kings Of Leon,4339,0.388587
3608,Horn Concerto No. 4 in E flat K495: II. Romanc...,3949,0.353660
7110,Secrets - OneRepublic,3916,0.350705
8840,Tive Sim - Cartola,3185,0.285239
2712,Fireflies - Charttraxx Karaoke,3171,0.283985
3480,Hey_ Soul Sister - Train,3132,0.280492


In [8]:
train_data, test_data = train_test_split(df, test_size = 0.20, random_state=0)

In [9]:
import Recommenders
import Evaluation

In [10]:
pm = Recommenders.popularity_recommender_py()
pm.create(train_data, 'user_id', 'song')

In [11]:
users = df['user_id'].unique()
pm.recommend(users[5])

Unnamed: 0,user_id,song,score,Rank
7122,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Sehr kosmisch - Harmonia,4812,1.0
9077,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Undo - Björk,4211,2.0
9873,4bd88bfb25263a75bbdd467e74018f4ae570e5df,You\'re The One - Dwight Yoakam,3819,3.0
2062,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Dog Days Are Over (Radio Edit) - Florence + Th...,3624,4.0
6769,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Revelry - Kings Of Leon,3485,5.0
3608,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Horn Concerto No. 4 in E flat K495: II. Romanc...,3188,6.0
7110,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Secrets - OneRepublic,3103,7.0
8840,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Tive Sim - Cartola,2551,8.0
2712,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Fireflies - Charttraxx Karaoke,2525,9.0
3480,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Hey_ Soul Sister - Train,2505,10.0


In [12]:
pm.recommend(users[48])

Unnamed: 0,user_id,song,score,Rank
7122,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Sehr kosmisch - Harmonia,4812,1.0
9077,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Undo - Björk,4211,2.0
9873,edc8b7b1fd592a3b69c3d823a742e1a064abec95,You\'re The One - Dwight Yoakam,3819,3.0
2062,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Dog Days Are Over (Radio Edit) - Florence + Th...,3624,4.0
6769,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Revelry - Kings Of Leon,3485,5.0
3608,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Horn Concerto No. 4 in E flat K495: II. Romanc...,3188,6.0
7110,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Secrets - OneRepublic,3103,7.0
8840,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Tive Sim - Cartola,2551,8.0
2712,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Fireflies - Charttraxx Karaoke,2525,9.0
3480,edc8b7b1fd592a3b69c3d823a742e1a064abec95,Hey_ Soul Sister - Train,2505,10.0


In [13]:
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'user_id', 'song')

In [15]:
#Print the songs for the user in training data
user_id = users[0]
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
is_model.recommend(user_id)

------------------------------------------------------------------------------------
Training data songs for the user userid: b80344d063b5ccb3212f76538f3d9e43d87dca9e:
------------------------------------------------------------------------------------
All That We Perceive - Thievery Corporation
Apuesta Por El Rock \'N\' Roll - Héroes del Silencio
Paper Gangsta - Lady GaGa
Neon - John Mayer
Oh No - Andrew Bird
Clarity - John Mayer
Breakout - Foo Fighters
Mykonos - Fleet Foxes
Love Song For No One - John Mayer
The Cove - Jack Johnson
Our Swords (Soundtrack Version) - Band Of Horses
Sehr kosmisch - Harmonia
Drive - Incubus
High and dry - Jorge Drexler
Love Shack - The B-52\'s
Heaven\'s gonna burn your eyes - Thievery Corporation feat. Emiliana Torrini
Holes To Heaven - Jack Johnson
Country Road - Jack Johnson / Paula Fuga
Generator - Foo Fighters
Are You In? - Incubus
Stronger - Kanye West
I\'ll Be Missing You (Featuring Faith Evans & 112)(Album Version) - Puff Daddy
Champion - Kanye Wes

KeyboardInterrupt: 

In [None]:
import time
start = time.time()

#Define what percentage of users to use for precision recall calculation
user_sample = 0.04

#Instantiate the precision_recall_calculator class
pr = Evaluation.precision_recall_calculator(test_data, train_data, pm, is_model)

#Call method to calculate precision and recall values
(pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list, ism_avg_recall_list) = pr.calculate_measures(user_sample)

end = time.time()
print(end - start)