In [13]:
%matplotlib inline

import pandas
from sklearn.cross_validation import train_test_split
import numpy as np
import time
from sklearn.externals import joblib
import Recommenders as Recommenders
import Evaluation as Evaluation

In [6]:
#Read userid-songid-listen_count triplets
#This step might take time to download data from external sources
triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'

song_df_1 = pandas.read_table(triplets_file,header=None)
song_df_1.columns = ['user_id', 'song_id', 'listen_count']

#Read song  metadata
song_df_2 =  pandas.read_csv(songs_metadata_file)

#Merge the two dataframes above to create input dataframe for recommender systems
song_df = pandas.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on="song_id", how="left")

In [3]:
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [7]:
#song_df = song_df.head(1000)

song_df['song'] = song_df['title'].map(str) + " - " + song_df['artist_name']


song_grouped = song_df.groupby(['song']).agg({'listen_count': 'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage']  = song_grouped['listen_count'].div(grouped_sum)*100
song_grouped.sort_values(['listen_count', 'song'], ascending = [0,1])

Unnamed: 0,song,listen_count,percentage
7127,Sehr kosmisch - Harmonia,8277,0.41385
9084,Undo - Björk,7032,0.35160
2068,Dog Days Are Over (Radio Edit) - Florence + Th...,6949,0.34745
9880,You're The One - Dwight Yoakam,6412,0.32060
6774,Revelry - Kings Of Leon,6145,0.30725
7115,Secrets - OneRepublic,5841,0.29205
3613,Horn Concerto No. 4 in E flat K495: II. Romanc...,5385,0.26925
2717,Fireflies - Charttraxx Karaoke,4795,0.23975
3485,Hey_ Soul Sister - Train,4758,0.23790
8847,Tive Sim - Cartola,4548,0.22740


In [8]:
users = song_df['user_id'].unique()

In [10]:
len(users)

76353

In [11]:
songs = song_df['song'].unique()
len(songs)

9953

In [12]:
train_data, test_data = train_test_split(song_df, test_size = 0.20, random_state=0)
print(train_data.head(5))

                                          user_id             song_id  \
608812   7b8fbe766a49e5d7618452149dfab920621fc4fb  SOJJYDE12AF729FC16   
623729   d24956cd68ff84b6d0271286ae6866ee1c89ff77  SOPQGWI12A8C135DDB   
583106   da7b91b6cab1ca11227ee7720c4d2e03e8c31579  SOCOIIG12A58A7D151   
435735   5f633da6ad4845350949c3c76ce6c4ef6f167476  SOQQTBB12AB0182F1D   
1361953  01ad0fabd01af750700a1e80bb0055abcb3edd28  SOVYNVS12AC3DF64AB   

         listen_count                      title  \
608812              1     Two Is Better Than One   
623729              2              Royal Gregory   
583106              1                 Mr Sandman   
435735              2  A Days Work (feat. P.O.S)   
1361953             2      Rockin' Rollin' Stone   

                              release                             artist_name  \
608812                     Love Drunk  Boys Like Girls featuring Taylor Swift   
623729                             LP                               Holy Fuck   
58310

In [14]:
pm = Recommenders.popularity_recommender_py()
pm.create(train_data, 'user_id', 'song')

In [15]:
user_id = users[5]
pm.recommend(user_id)

Unnamed: 0,user_id,song,score,Rank
7127,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Sehr kosmisch - Harmonia,6630,1.0
9084,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Undo - Björk,5639,2.0
2068,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Dog Days Are Over (Radio Edit) - Florence + Th...,5592,3.0
9880,4bd88bfb25263a75bbdd467e74018f4ae570e5df,You're The One - Dwight Yoakam,5143,4.0
6774,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Revelry - Kings Of Leon,4938,5.0
7115,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Secrets - OneRepublic,4627,6.0
3613,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Horn Concerto No. 4 in E flat K495: II. Romanc...,4368,7.0
2717,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Fireflies - Charttraxx Karaoke,3835,8.0
3485,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Hey_ Soul Sister - Train,3819,9.0
8847,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Tive Sim - Cartola,3707,10.0
