# Build a song recommender system

In [1]:
import turicreate

# Load some music data

In [3]:
song_data = turicreate.SFrame('song_data.sframe')

# Explore our data

In [14]:
song_data.sort('listen_count', ascending= False)

user_id,song_id,listen_count,title,artist
50996bbabb6f7857bf0c80194 35b5246a0e45cfd ...,SOUAGPQ12A8AE47B3A,920,Crack Under Pressure,Righteous Pigs
bb85bb79612e5373ac714fcd4 469cabeb5ed94e1 ...,SOZQSVB12A8C13C271,796,Paradise & Dreams,Darren Styles
c012ec364329bb08cbe3e62fe 76db31f8c5d8ec3 ...,SOBONKR12A58A7A7E0,683,You're The One,Dwight Yoakam
70caceccaa745b6f7bc2898a1 54538eb1ada4d5a ...,SOPREHY12AB01815F9,676,I'm On A Boat,The Lonely Island / T-Pain ...
d2232ac7a1ec17b283b5dff24 3161902b2cb706c ...,SOLGIWB12A58A77A05,649,Reelin' In The Years,Steely Dan
f5363481018dc87e8b06f9451 e99804610a594fa ...,SOVRIPE12A6D4FEA19,605,Can't Help But Wait (Album Version) ...,Trey Songz
f1bdbb9fb7399b402a09fa124 210dedf78e76034 ...,SOZPMJT12AAF3B40D1,585,The Quest,HYPOCRISY
70caceccaa745b6f7bc2898a1 54538eb1ada4d5a ...,SOJCRUY12A67ADA4C2,553,Fast Car (LP Version),Tracy Chapman
c012ec364329bb08cbe3e62fe 76db31f8c5d8ec3 ...,SOZOWON12A67ADA091,500,Glamorous,Fergie / Ludacris
2be4e2736f580dd4fe7b489f6 75935ccfad0a453 ...,SOXBCZH12A67ADAD77,500,Peace Train,Cat Stevens

song
Crack Under Pressure - Righteous Pigs ...
Paradise & Dreams - Darren Styles ...
You're The One - Dwight Yoakam ...
I'm On A Boat - The Lonely Island / T-Pain ...
Reelin' In The Years - Steely Dan ...
Can't Help But Wait (Album Version) - Trey ...
The Quest - HYPOCRISY
Fast Car (LP Version) - Tracy Chapman ...
Glamorous - Fergie / Ludacris ...
Peace Train - Cat Stevens


## Show the most popular songs in the dataset

In [19]:
song_data['user_id'].value_counts().sort('count',ascending=False).show()

# Count the number of unique users in the data

In [26]:
users = song_data['user_id'].unique()
songs = song_data['song_id'].unique()

In [28]:
print(len(users))
print(len(songs))

66346
10000


# Create a song recommender

In [29]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [30]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [31]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


In [32]:
popularity_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Sehr kosmisch - Harmonia,4754.0,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Undo - Björk,4227.0,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,You're The One - Dwight Yoakam ...,3781.0,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Revelry - Kings Of Leon,3527.0,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Secrets - OneRepublic,3148.0,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Hey_ Soul Sister - Train,2538.0,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [35]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [36]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


In [37]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Grind With Me (Explicit Version) - Pretty Ricky ...,0.0459424376487731,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,There Goes My Baby - Usher ...,0.0331920742988586,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Panty Droppa [Intro] (Album Version) - Trey ...,0.031856620311737,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nobody (Featuring Athena Cage) (LP Version) - ...,0.0278467655181884,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Youth Against Fascism - Sonic Youth ...,0.0262914180755615,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nice & Slow - Usher,0.0239639401435852,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Making Love (Into The Night) - Usher ...,0.0238176941871643,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Naked - Marques Houston,0.0228925704956054,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,I.nner Indulgence - DESTRUCTION ...,0.0220767498016357,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Love Lost (Album Version) - Trey Songz ...,0.0204497694969177,10


# Apply model to find similar songs in the data set

In [38]:
personalized_model.get_similar_items(['With Or Without You - U2'])

song,similar,score,rank
With Or Without You - U2,I Still Haven't Found What I'm Looking For ...,0.0428571701049804,1
With Or Without You - U2,Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...,0.033734917640686,2
With Or Without You - U2,Window In The Skies - U2,0.032835841178894,3
With Or Without You - U2,Vertigo - U2,0.030075192451477,4
With Or Without You - U2,Sunday Bloody Sunday - U2,0.0271317958831787,5
With Or Without You - U2,Bad - U2,0.0251798629760742,6
With Or Without You - U2,A Day Without Me - U2,0.0237154364585876,7
With Or Without You - U2,Another Time Another Place - U2 ...,0.0203251838684082,8
With Or Without You - U2,Walk On - U2,0.0202020406723022,9
With Or Without You - U2,Get On Your Boots - U2,0.0196850299835205,10


In [39]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.1881188154220581,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.1871921420097351,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.1848341226577758,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399047,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229125,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684326,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447448,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.0187969803810119,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.0187969803810119,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374061584,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [67]:
%matplotlib inline
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)



compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.02320027294438758  | 0.006561737856517794 |
|   2    | 0.021494370522006156 | 0.012063290684068579 |
|   3    | 0.020243375412259776 |  0.0160928882141779  |
|   4    | 0.019959058341862835 | 0.02140686799079842  |
|   5    | 0.01862845445240535  | 0.02484108493576255  |
|   6    | 0.017968838849084467 | 0.028379355490409756 |
|   7    | 0.017205244431447105 | 0.03232348937313112  |
|   8    | 0.01624872057318321  | 0.03429446278729799  |
|   9    | 0.015770120171348437 | 0.03741837884621922  |
|   10   | 0.01490958717161385  | 0.039900873038027654 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.026953258273626747 | 0.008091823828773681 |
|   2    | 0.02541794609348346  | 0.013349244106664783 |
|   3    | 0.022176731490958716 | 0.01781846229133845  |
|   4    | 0.01944728761514843  | 0.02093824519617766  |
|   5    | 0.01746844080518595  | 0.023184903916736076 |
|   6    | 0.01609234618446493  |  0.0254604005115776  |
|   7    | 0.015011941316956668 | 0.027656952964015425 |
|   8    | 0.01403104742408735  | 0.02963850755560068  |
|   9    | 0.013381856780014403 | 0.03164097123923124  |
|   10   | 0.012794268167860815 |  0.0332254973503694  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.