# Build a song recommender system

In [1]:
import turicreate

# Load some music data

In [2]:
song_data = turicreate.SFrame('./song_data.sframe/')

# Explore our data

In [3]:
song_data

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


## Show the most popular songs in the dataset

In [5]:
song_data.show()

In [6]:
song_data['song'].show()

# Count the number of unique users in the data

In [7]:
users = song_data['user_id'].unique()

In [8]:
len(users)

66346

# Create a song recommender

In [9]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [11]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [12]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


In [13]:
popularity_model.recommend(users=[users[1]])

user_id,song,score,rank
696787172dd3f5169dc94deef 97e427cee86147d ...,Sehr kosmisch - Harmonia,4754.0,1
696787172dd3f5169dc94deef 97e427cee86147d ...,Undo - Björk,4227.0,2
696787172dd3f5169dc94deef 97e427cee86147d ...,You're The One - Dwight Yoakam ...,3781.0,3
696787172dd3f5169dc94deef 97e427cee86147d ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
696787172dd3f5169dc94deef 97e427cee86147d ...,Revelry - Kings Of Leon,3527.0,5
696787172dd3f5169dc94deef 97e427cee86147d ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
696787172dd3f5169dc94deef 97e427cee86147d ...,Secrets - OneRepublic,3148.0,7
696787172dd3f5169dc94deef 97e427cee86147d ...,Hey_ Soul Sister - Train,2538.0,8
696787172dd3f5169dc94deef 97e427cee86147d ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
696787172dd3f5169dc94deef 97e427cee86147d ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [15]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [16]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


In [17]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
696787172dd3f5169dc94deef 97e427cee86147d ...,Senza Una Donna (Without A Woman) - Zucchero / ...,0.0170265776770455,1
696787172dd3f5169dc94deef 97e427cee86147d ...,Oceanside - Angels Of Light & Akron/Family ...,0.0163826985018593,2
696787172dd3f5169dc94deef 97e427cee86147d ...,Bolinha De Papel - João Gilberto ...,0.0139285709176744,3
696787172dd3f5169dc94deef 97e427cee86147d ...,La Fokin Moda - Calle 13,0.0133134637560163,4
696787172dd3f5169dc94deef 97e427cee86147d ...,Wendy Clear - Blink-182,0.0127551002161843,5
696787172dd3f5169dc94deef 97e427cee86147d ...,Sin Coro - Calle 13,0.0118136874267033,6
696787172dd3f5169dc94deef 97e427cee86147d ...,Sin Exagerar - Calle 13 Featuring Tego Calderón ...,0.0114380546978541,7
696787172dd3f5169dc94deef 97e427cee86147d ...,Un Beso de Desayuno - Calle 13 ...,0.0110539283071245,8
696787172dd3f5169dc94deef 97e427cee86147d ...,No Hay Nadie Como Tú - Calle 13 Featuring Café ...,0.0108289122581481,9
696787172dd3f5169dc94deef 97e427cee86147d ...,La Tripleta - Calle 13,0.0103487159524645,10


# Apply model to find similar songs in the data set

In [18]:
personalized_model.get_similar_items(['With Or Without You - U2'])

song,similar,score,rank
With Or Without You - U2,I Still Haven't Found What I'm Looking For ...,0.0428571701049804,1
With Or Without You - U2,Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...,0.033734917640686,2
With Or Without You - U2,Window In The Skies - U2,0.032835841178894,3
With Or Without You - U2,Vertigo - U2,0.030075192451477,4
With Or Without You - U2,Sunday Bloody Sunday - U2,0.0271317958831787,5
With Or Without You - U2,Bad - U2,0.0251798629760742,6
With Or Without You - U2,A Day Without Me - U2,0.0237154364585876,7
With Or Without You - U2,Another Time Another Place - U2 ...,0.0203251838684082,8
With Or Without You - U2,Walk On - U2,0.0202020406723022,9
With Or Without You - U2,Get On Your Boots - U2,0.0196850299835205,10


In [19]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.1881188154220581,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.1871921420097351,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.1848341226577758,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399047,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229125,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684326,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447448,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.0187969803810119,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.0187969803810119,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374061584,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [21]:
%matplotlib inline
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.020129648584100997 | 0.005076958940827931 |
|   2    | 0.01876492664619583  | 0.009343511979131202 |
|   3    | 0.01865119981803711  | 0.013803909044441289 |
|   4    | 0.01722961446605253  | 0.017047056018396872 |
|   5    | 0.016854315933128652 | 0.021653829509510208 |
|   6    |  0.0160923461844649  | 0.024940165600349846 |
|   7    | 0.015499342009065673 | 0.02851553276322984  |
|   8    | 0.014372227908563632 |  0.0300831821046765  |
|   9    | 0.01414003563440617  | 0.03273872815838012  |
|   10   | 0.013920163766632581 |  0.0362400251857775  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.019447287615148405 | 0.006346929351023517 |
|   2    | 0.015694302285909244 | 0.009765528266040035 |
|   3    | 0.013647219379051518 | 0.012066863346290157 |
|   4    | 0.012197202320027285 | 0.014176385235750636 |
|   5    | 0.011190719890822244 | 0.016591332584167794 |
|   6    | 0.01063345843284432  | 0.018450372365418456 |
|   7    | 0.009894234049812337 | 0.01960402272582416  |
|   8    | 0.00921187308085977  | 0.021093327234575965 |
|   9    | 0.008643238940065955 | 0.021863715722467012 |
|   10   | 0.008188331627430916 | 0.022703140333641878 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.

## Unique Users

In [29]:
kwUsers = song_data[song_data['artist'] == 'Kanye West']
len(kwUsers['user_id'].unique())

2522

In [30]:
u = song_data[song_data['artist'] == 'Foo Fighters']
len(u['user_id'].unique())

2055

In [31]:
u = song_data[song_data['artist'] == 'Taylor Swift']
len(u['user_id'].unique())

3246

In [32]:
u = song_data[song_data['artist'] == 'Lady GaGa']
len(u['user_id'].unique())

2928

In [39]:
## Most and Least Listened Too Artists

In [40]:
import turicreate.aggregate as agg
song_data.groupby(key_column_names='artist',
                  operations={'total_plays': agg.SUM('listen_count')}).sort(key_column_names='total_plays', ascending=True)

artist,total_plays
William Tabbert,14
Reel Feelings,24
Beyoncé feat. Bun B and Slim Thug ...,26
Boggle Karaoke,30
Diplo,30
harvey summers,31
Nâdiya,36
Jody Bernal,38
Aneta Langerova,38
Kanye West / Talib Kweli / Q-Tip / Common / ...,38


In [41]:

song_data.groupby(key_column_names='artist',
                  operations={'total_plays': agg.SUM('listen_count')}).sort(key_column_names='total_plays', ascending=False)

artist,total_plays
Kings Of Leon,43218
Dwight Yoakam,40619
Björk,38889
Coldplay,35362
Florence + The Machine,33387
Justin Bieber,29715
Alliance Ethnik,26689
OneRepublic,25754
Train,25402
The Black Keys,22184


## Most Recommended Songs

In [42]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [43]:
personalized_model.recommend(subset_test_users,k=1)

user_id,song,score,rank
696787172dd3f5169dc94deef 97e427cee86147d ...,Senza Una Donna (Without A Woman) - Zucchero / ...,0.0170265776770455,1
2e9cbd200d67910aeb97b36ef e9cd4341cc06030 ...,Secrets - OneRepublic,0.0384363691012064,1
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Rabbit Heart (Raise It Up) - Florence + The ...,0.0799399726092815,1
fe85b96ba1983219b296f6b48 69dd29eb2b72ff9 ...,Secrets - OneRepublic,0.0788827141125996,1
95dc7e2b188b1148b2d25f4e6 b6e94afacc4efc3 ...,Bust a Move - Infected Mushroom ...,0.0534738540649414,1
9bd4c3ace8717adbd277541d9 79af6acf6de54f6 ...,JUAN LOCO - Rodrigo y Gabriela ...,0.0253521124521891,1
66515166dc465cb51af511db9 f6e708095af31a6 ...,Fell On Black Days - Soundgarden ...,0.0132082886993885,1
181d9c254ff957a396b8bbcec 10228f84cfa5c2c ...,Future Reflections - MGMT,0.0541004793984549,1
87fa091833add3a9b90404d4b ae5d2cd709f632b ...,RoboCop - Kanye West,0.0541125535964965,1
3930e92959235c4adb586ffc8 ed5429b9a304656 ...,Undo - Björk,0.039873685155596,1


In [46]:
personalized_model.recommend(subset_test_users,k=1).groupby(key_column_names='song',
                  operations={'count': turicreate.aggregate.COUNT()}).sort(key_column_names='count', ascending=False)

song,count
Secrets - OneRepublic,400
Undo - Björk,390
Revelry - Kings Of Leon,193
You're The One - Dwight Yoakam ...,174
Fireflies - Charttraxx Karaoke ...,127
Sehr kosmisch - Harmonia,105
Hey_ Soul Sister - Train,95
Horn Concerto No. 4 in E flat K495: II. Romance ...,89
OMG - Usher featuring will.i.am ...,65
Dog Days Are Over (Radio Edit) - Florence + The ...,49
