# Build a song recommender system

In [2]:
import turicreate

# Load some music data

In [3]:
song_data = turicreate.SFrame('song_data.sframe/')

# Explore our data

In [7]:
song_data

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


## Show the most popular songs in the dataset

In [12]:
song_data['song'].show()
len(song_data)

1116609

# Count the number of unique users in the data

In [10]:
users = song_data['user_id'].unique()

In [6]:
len(users)

66346

# Create a song recommender

In [13]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [14]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [9]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


In [10]:
popularity_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Sehr kosmisch - Harmonia,4754.0,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Undo - Björk,4227.0,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,You're The One - Dwight Yoakam ...,3781.0,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Revelry - Kings Of Leon,3527.0,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Secrets - OneRepublic,3148.0,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Hey_ Soul Sister - Train,2538.0,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [15]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [17]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Cuando Pase El Temblor - Soda Stereo ...,0.0194504536115206,1
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Fireflies - Charttraxx Karaoke ...,0.0144737317011906,2
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Love Is A Losing Game - Amy Winehouse ...,0.0142865960414593,3
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Marry Me - Train,0.0141334717090313,4
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Secrets - OneRepublic,0.013591665488023,5
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Sehr kosmisch - Harmonia,0.0133987894425025,6
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Te Hacen Falta Vitaminas - Soda Stereo ...,0.0129302831796499,7
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,OMG - Usher featuring will.i.am ...,0.0127778282532325,8
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Y solo se me ocurre amarte (Unplugged) - ...,0.0123411279458266,9
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,No Dejes Que... - Caifanes ...,0.0121042499175438,10


In [18]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


# Apply model to find similar songs in the data set

In [20]:
personalized_model.get_similar_items(['Hotel California - Gipsy Kings'])

song,similar,score,rank
Hotel California - Gipsy Kings ...,Volare (Nel blu di pinto Di blu) - Gipsy Kings ...,0.0799999833106994,1
Hotel California - Gipsy Kings ...,Djobi_ Djoba - Gipsy Kings ...,0.0491803288459777,2
Hotel California - Gipsy Kings ...,I Heard It's The Softest Thing Ever (Album ...,0.0236686468124389,3
Hotel California - Gipsy Kings ...,Illumination - Miami Horror ...,0.0198019742965698,4
Hotel California - Gipsy Kings ...,Chan Chan (Live) - Buena Vista Social Club ...,0.0193049907684326,5
Hotel California - Gipsy Kings ...,Ana - Pixies,0.018691599369049,6
Hotel California - Gipsy Kings ...,Don't Play That Song (You Lied) (LP Version) - ...,0.0185185074806213,7
Hotel California - Gipsy Kings ...,The Horizon Has Been Defeated - Jack Johnson ...,0.0168067216873168,8
Hotel California - Gipsy Kings ...,Oh! Sweet Nuthin' (LP Version) - The Velvet ...,0.0168067216873168,9
Hotel California - Gipsy Kings ...,Fold - José González,0.0163934230804443,10


In [15]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.188118815422,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.18719214201,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.184834122658,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.018796980381,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.018796980381,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374062,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [22]:
%matplotlib inline

In [23]:
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.025588536335721612 | 0.006206272403815905 |
|   2    | 0.020982599795291712 | 0.009931912925771672 |
|   3    | 0.021153190037529898 | 0.016300754478850724 |
|   4    | 0.020214943705220072 | 0.020912627309761435 |
|   5    | 0.018833162743091117 | 0.024868696260712638 |
|   6    | 0.01853747298987829  | 0.028946229099760332 |
|   7    | 0.017887605400399697 | 0.03232705446113841  |
|   8    | 0.01658990105765949  | 0.03447716845874473  |
|   9    | 0.015921755942226796 | 0.03690523623993428  |
|   10   | 0.015455475946775887 | 0.041335708531204995 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.05049471170249062  | 0.013799543456657067 |
|   2    | 0.04520641419310816  | 0.02493195136490941  |
|   3    | 0.03809848743318547  | 0.031363367289672324 |
|   4    | 0.033435687478676236 | 0.03628039716831943  |
|   5    | 0.030092118730808647 | 0.040746175239522266 |
|   6    | 0.02763561924257934  | 0.04451488179737922  |
|   7    | 0.025734756543354307 | 0.04809032896289809  |
|   8    | 0.02409587171613784  | 0.05137203720878234  |
|   9    | 0.02285909245991132  | 0.05455030819667466  |
|   10   | 0.02156260661890141  | 0.056991939502174946 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.

# Quiz

In [24]:
train_data,test_data = song_data.random_split(.8,seed=0)

In [25]:
quiz_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

In [26]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [28]:
out=quiz_model.recommend(subset_test_users,k=1)

In [33]:
out.groupby('song', operations={'total_count': turicreate.aggregate.COUNT()})

song,total_count
The Climb - Miley Cyrus,3
Hey Daddy (Daddy's Home) - Usher ...,5
I Gotta Feeling - Black Eyed Peas ...,16
Cerdo - Molotov,1
Ich Will - Rammstein,9
Too Deep - Girl Talk,3
Dumpweed - Blink-182,2
Guys Like Me - Eric Church ...,2
I Can't Love You Back - Easton Corbin ...,2
Freedom - Akon,4


## Q1. Artist with most unique listening users 

In [21]:
artists = ['Kanye West', 'Foo Fighters', 'Taylor Swift', 'Lady GaGa']
result = {}
for artist in artists:
    result[artist] = len(song_data[song_data['artist'] == artist]['user_id'].unique())

print(result)

{'Kanye West': 2522, 'Foo Fighters': 2055, 'Taylor Swift': 3246, 'Lady GaGa': 2928}


## Q2. Most popular artist with highest listen count

In [26]:
artists = ['Kings of Leon', 'Coldplay', 'Taylor Swift', 'Lady GaGa']
counted_data = song_data.groupby('artist', operations={'total_listen_count': turicreate.aggregate.SUM('listen_count')})
result = {}

counted_data.apply(lambda )

# for artist in artists:
#     result[artist] = counted_data[counted_data]
#     result[artist] = song_data[song_data['artist'] == artist].groupby('listen_count', operations={'total_count': turicreate.aggregate.SUM()})
# print(result)
# song_data[song_data['artist'] == artist].groupby('artist', operations={'total_listen_count': turicreate.aggregate.SUM('listen_count')})


artist,total_listen_count
Coldplay,35362


In [18]:
extract=song_data[song_data['artist'].apply(lambda input: input in ['Kanye West', 'Foo Fighters', 'Taylor Swift', 'Lady GaGa'])]

In [100]:
len(extract)

17560

In [101]:
extract[extract['listen_count'] == extract['listen_count'].max()]

user_id,song_id,listen_count,title,artist,song
d2232ac7a1ec17b283b5dff24 3161902b2cb706c ...,SOTWSXL12A8C143349,333,Love Story,Taylor Swift,Love Story - Taylor Swift


In [93]:
extract[extract['listen_count']==extract['listen_count'].max()]

user_id,song_id,listen_count,title,artist,song
d2232ac7a1ec17b283b5dff24 3161902b2cb706c ...,SOTWSXL12A8C143349,333,Love Story,Taylor Swift,Love Story - Taylor Swift


In [97]:
extract2[extract2['listen_count']==extract2['listen_count'].min()]

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOMLMKI12A81C204BC,1,Champion,Kanye West
8fce200f3912e9608e3b1463c db9c3529aab5c08 ...,SOIBSWV12A6D4F6AB3,1,Through The Wire,Kanye West
a699816c7f4f8855808eee732 a5cbd1620091998 ...,SOYCYHP12A67021567,1,There She Goes Again,Velvet Underground & Nico
a56bf59af6edc5ae6c92d61dd d214989332864e8 ...,SONGNHO12AB0183915,1,Bad News,Kanye West
8fa25e588aeedaa539674babb 75729ac9f31f15e ...,SOOLPFK12A58A7BDE3,1,Flashing Lights,Kanye West
4926960fa1f2aa9f6c46bf591 e83bb4e3d7f6a6f ...,SOQAEHC12A67020A41,1,Roses,Kanye West
4926960fa1f2aa9f6c46bf591 e83bb4e3d7f6a6f ...,SOUXNNU12A67020A48,1,Hey Mama,Kanye West
d2f7afe12fbd26fc2bed00397 40fc6af579920c0 ...,SOIBSWV12A6D4F6AB3,1,Through The Wire,Kanye West
d2f7afe12fbd26fc2bed00397 40fc6af579920c0 ...,SOUCBEB12A6310E1F9,1,Jesus Walks,Kanye West

song
Stronger - Kanye West
Champion - Kanye West
Through The Wire - Kanye West ...
There She Goes Again - Velvet Underground & ...
Bad News - Kanye West
Flashing Lights - Kanye West ...
Roses - Kanye West
Hey Mama - Kanye West
Through The Wire - Kanye West ...
Jesus Walks - Kanye West


In [95]:
extract2=song_data[song_data['artist'].apply(lambda input: input in ['William Tabbert','Velvet Underground & Nico','Kanye West', 'The Cool Kids'])]

In [102]:
extract2

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOMLMKI12A81C204BC,1,Champion,Kanye West
5d5e0142e54c3bb7b69f548c2 ee55066c90700eb ...,SORFASW12A81C22AE7,2,Stronger,Kanye West
537340ff896dea11328910013 cfe759413e1eeb3 ...,SOBXHDL12A81C204C0,2,Stronger,Kanye West
7dd192c8bd4f27f573cb15e86 56442aadd7a9c01 ...,SOOLPFK12A58A7BDE3,5,Flashing Lights,Kanye West
8fce200f3912e9608e3b1463c db9c3529aab5c08 ...,SOBXHDL12A81C204C0,2,Stronger,Kanye West
8fce200f3912e9608e3b1463c db9c3529aab5c08 ...,SOIBSWV12A6D4F6AB3,1,Through The Wire,Kanye West
a699816c7f4f8855808eee732 a5cbd1620091998 ...,SOYCYHP12A67021567,1,There She Goes Again,Velvet Underground & Nico
a56bf59af6edc5ae6c92d61dd d214989332864e8 ...,SONGNHO12AB0183915,1,Bad News,Kanye West
8fa25e588aeedaa539674babb 75729ac9f31f15e ...,SOOLPFK12A58A7BDE3,1,Flashing Lights,Kanye West

song
Stronger - Kanye West
Champion - Kanye West
Stronger - Kanye West
Stronger - Kanye West
Flashing Lights - Kanye West ...
Stronger - Kanye West
Through The Wire - Kanye West ...
There She Goes Again - Velvet Underground & ...
Bad News - Kanye West
Flashing Lights - Kanye West ...


In [None]:
extract2['']