## SONG RECOMMENDER SYSTEM

In [1]:
# First, load our dependencies.

In [2]:
%matplotlib inline
import pandas 
from sklearn.cross_validation import train_test_split
import numpy as np
import time
from sklearn.externals import joblib
import Recommenders as Recommenders
import Evaluation as Evaluation



## Loading the music data

In [3]:
# Read userid-songid-listen-copunt triplets
triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
song_metadata_file = "song_data.csv"
# DataFrame for our triplet file
song_df1 = pandas.read_table(triplets_file,header = None)
song_df1.columns = ['user_id','song_id','listen_count']
# Dataframe for meta data file
song_df2 = pandas.read_csv(song_metadata_file)
#Merge the above two dataframes to get a single input data frame for recommender system
song_df = pandas.merge(song_df1,song_df2.drop_duplicates(['song_id']),on="song_id",how='left')


In [4]:
song_df.head()
# To find length of data set
print("total numberof training data:",len(song_df))
# Create a subset of dataset
song_df = song_df.head(10000)



total numberof training data: 2000000


## Now , merge song title and song artist into one column for our analysis

In [5]:
song_df['song'] = song_df['title'].map(str)+"-"+song_df['artist_name']

## Let's show the most popular songs
 This step is not necessary. Just some feature engineering for ease of analysis.

In [6]:
song_grouped = song_df.groupby(['song']).agg({'listen_count':'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage'] =(song_grouped['listen_count']/(grouped_sum))*100
song_grouped.sort_values(['listen_count','song'],ascending = [0,1])

Unnamed: 0,song,listen_count,percentage
3660,Sehr kosmisch-Harmonia,45,0.45
4678,Undo-Björk,32,0.32
5105,You're The One-Dwight Yoakam,32,0.32
1071,Dog Days Are Over (Radio Edit)-Florence + The ...,28,0.28
3655,Secrets-OneRepublic,28,0.28
4378,The Scientist-Coldplay,27,0.27
4712,Use Somebody-Kings Of Leon,27,0.27
3476,Revelry-Kings Of Leon,26,0.26
1387,Fireflies-Charttraxx Karaoke,24,0.24
1862,Horn Concerto No. 4 in E flat K495: II. Romanc...,23,0.23


In [16]:
# TO count the number of unique users
users = song_df['user_id'].unique()
len(num_users)

365

In [8]:
# To count the number of unique songs:
songs = song_df['song'].unique()
len(songs)


5151

## Split the whole data into training and testing data 

In [9]:
train_data , test_data = train_test_split(song_df,test_size = 0.20, random_state = 0)
print(train_data.head(5))

                                       user_id             song_id  \
7389  94d5bdc37683950e90c56c9b32721edb5d347600  SOXNZOW12AB017F756   
9275  1012ecfd277b96487ed8357d02fa8326b13696a5  SOXHYVQ12AB0187949   
2995  15415fa2745b344bce958967c346f2a89f792f63  SOOSZAZ12A6D4FADF8   
5316  ffadf9297a99945c0513cd87939d91d8b602936b  SOWDJEJ12A8C1339FE   
356   5a905f000fc1ff3df7ca807d57edb608863db05d  SOAMPRJ12A8AE45F38   

      listen_count                 title  \
7389             2      Half Of My Heart   
9275             1  The Beautiful People   
2995             1     Sanctify Yourself   
5316             4     Heart Cooks Brain   
356             20                 Rorol   

                                                release      artist_name  \
7389                                     Battle Studies       John Mayer   
9275             Antichrist Superstar (Ecopac Explicit)   Marilyn Manson   
2995                             Glittering Prize 81/92     Simple Minds   
5316  Ever

In [10]:
# Simple popularity based recommender...Not exclusive for a particular user
pm = Recommenders.popularity_recommender()
pm.create(train_data,'user_id','song')

In [44]:
# Using recommender to make some recommendation
user_id = users[7]
pm.recommend(user_id)

Unnamed: 0,user_id,song,score,Rank
3194,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Sehr kosmisch-Harmonia,37,1.0
4083,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Undo-Björk,27,2.0
931,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Dog Days Are Over (Radio Edit)-Florence + The ...,24,3.0
4443,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,You're The One-Dwight Yoakam,24,4.0
3034,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Revelry-Kings Of Leon,21,5.0
3189,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Secrets-OneRepublic,21,6.0
4112,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Use Somebody-Kings Of Leon,21,7.0
1207,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Fireflies-Charttraxx Karaoke,20,8.0
1577,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Hey_ Soul Sister-Train,19,9.0
1626,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Horn Concerto No. 4 in E flat K495: II. Romanc...,19,10.0


## For a song recommender with personalization , we will use item similarity filter based recommender

In [50]:
is_model = Recommenders.item_similarity_recommender()
is_model.create(train_data,'user_id','song')


In [52]:
user_item = is_model.get_user_items(user_id)
print("Users history music data:\n",user_item)
print("Recommendations for User id",user_id,":")
is_model.recommend(user_id)

Users history music data:
 ['Swallowed In The Sea-Coldplay', 'Life In Technicolor ii-Coldplay', 'Life In Technicolor-Coldplay', 'The Scientist-Coldplay', 'Trouble-Coldplay', 'Strawberry Swing-Coldplay', 'Lost!-Coldplay', 'Clocks-Coldplay']
Recommendations for User id 9d6f0ead607ac2a6c2460e4d14fb439a146b7dec :
No. of unique songs for the user: 8
no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :3429


Unnamed: 0,user_id,song,score,rank
0,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,She Just Likes To Fight-Four Tet,0.281579,1
1,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Warning Sign-Coldplay,0.281579,2
2,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,We Never Change-Coldplay,0.281579,3
3,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Puppetmad-Puppetmastaz,0.281579,4
4,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,God Put A Smile Upon Your Face-Coldplay,0.281579,5
5,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Susie Q-Creedence Clearwater Revival,0.281579,6
6,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,The Joker-Fatboy Slim,0.281579,7
7,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Korg Rhythm Afro-Holy Fuck,0.281579,8
8,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,This Unfolds-Four Tet,0.281579,9
9,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,high fives-Four Tet,0.281579,10


In [53]:
song = 'Yellow - Coldplay'
is_model.get_similar_items(['Yellow - Coldplay'])

no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :0


Unnamed: 0,user_id,song,score,rank
0,,Cover My Eyes-La Roux,0.0,1
1,,The Carpal Tunnel Of Love-Fall Out Boy,0.0,2
2,,The Whole World-Outkast Featuring Killer Mike,0.0,3
3,,Go With The Flow-Queens Of The Stone Age,0.0,4
4,,She Just Likes To Fight-Four Tet,0.0,5
5,,Mourning Air-Portishead,0.0,6
6,,Break Through-Colbie Caillat,0.0,7
7,,Creepin Up The Backstairs-The Fratellis,0.0,8
8,,Warning Sign-Coldplay,0.0,9
9,,Your Arms Feel Like home-3 Doors Down,0.0,10
