# Import Necessary libraries

In [1]:
%matplotlib inline

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import time
import joblib
import Recommenders as Recommenders
import Evaluation as Evaluation

# Import datasets

In [2]:
song_df1=pd.read_csv('triplets_file/triplets_file.csv')
song_df2=pd.read_csv('song_data/song_data.csv')

# Exploring the datasets

In [3]:
song_df1.head()

Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [4]:
song_df2.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


# Performing left-join operation on dataframes

In [5]:
song_df=pd.merge(song_df1,song_df2.drop_duplicates(['song_id']),how="left",on="song_id" )

In [6]:
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [7]:
len(song_df)

2000000

In [8]:
song_df=song_df.sample(20000)

# Store the song name as "*title* - *artist name*" in a new column

In [9]:
song_df['song_name']=song_df['title'].map(str)+" - "+song_df['artist_name']
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,song_name
1348300,f38c1490438e2df9c7affcfee4b7644077d7ae2a,SOCBSZW12AB01891C1,3,XRDS,Ritual Noise,Covenant,0,XRDS - Covenant
493973,9a95678196cc9d20f4f3ff7a4e9595dd4a6b7ab2,SOHHHYJ12AB0184D60,1,Ocean View,Goodbye Manhattan,Pieces Of A Dream,2007,Ocean View - Pieces Of A Dream
229141,44e0cf636b7f1b6fe5269e557927eb53ef29b730,SOMHWDR12A8C1380BA,1,Ocean Breathes Salty,Good News For People Who Love Bad News,Modest Mouse,2004,Ocean Breathes Salty - Modest Mouse
1141360,c6f2c209d305694a60cf66251dba4223ee1f9c0c,SOFRCGW12A81C21EA6,2,Hey There Delilah,Now That's What I Call Music! 68,Plain White T's,2005,Hey There Delilah - Plain White T's
850432,98e8cd9a8e4f0255a511a380a3ad0e3cc94c7f56,SOCBWRP12A8C1311DB,1,Hey,Wave Of Mutilation: Best Of Pixies,Pixies,1989,Hey - Pixies


# Calculating percentage listen count for each song

In [10]:
song_grouped=song_df.groupby(['song_name']).agg({'listen_count':'count'}).reset_index()
listen_count_sum=song_grouped['listen_count'].sum()
song_grouped['percentage']=song_grouped['listen_count'].div(listen_count_sum)*100
song_grouped

Unnamed: 0,song_name,listen_count,percentage
0,#40 - DAVE MATTHEWS BAND,1,0.005
1,& Down - Boys Noize,9,0.045
2,' Cello Song - Nick Drake,4,0.020
3,'Round Midnight - Miles Davis,1,0.005
4,'Til We Die (Album Version) - Slipknot,6,0.030
...,...,...,...
7263,sun drums and soil - Four Tet,1,0.005
7264,teachme (Album Version) - Musiq Soulchild,1,0.005
7265,you were there with me - Four Tet,1,0.005
7266,¡Viva La Gloria! (Album Version) - Green Day,1,0.005


# Total number of users in the selected part of the dataset

In [11]:
users=song_df['user_id'].unique()
len(users)

15507

# Total number of songs in the selected part of the dataset

In [12]:
songs = song_df['song_name'].unique()
len(songs)

7268

# Train-test split 

In [13]:
train_data,test_data=train_test_split(song_df,test_size=0.2,random_state=0)
train_data.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,song_name
167978,03f7e4ac0d94229253aa4c5f6801e23d268ba1aa,SOPPPWN12A8C135DCD,1,Milkshake,LP,Holy Fuck,2007,Milkshake - Holy Fuck
1925651,20215b21d188499c7e4a3ab833f52ecef3be1bdd,SORXEUZ12AC3DF6E3F,2,Bend & Flush,All The Filth! (With Extra Filth),Pork Dukes,0,Bend & Flush - Pork Dukes
339762,532ba1de8872c56d2bc02f749f7ca10160ef2008,SOKCRPL12AB018D1A2,1,Someone's Missing,Congratulations,MGMT,2010,Someone's Missing - MGMT
1969526,e09032fff6dd20acb3771c2209b1a46e55d2cae7,SOOABBO12A6701DFDA,1,Flake,Brushfire Fairytales,Jack Johnson,2000,Flake - Jack Johnson
739423,155dfdd4cf9acb4d66423ae1e9013f007ce813fa,SONAZWN12A8151C957,1,White Winter Hymnal,White Winter Hymnal,Fleet Foxes,2008,White Winter Hymnal - Fleet Foxes


# Creating popularity based recommender model

In [14]:
popularity_recommender = Recommenders.popularity_recommender_py()
popularity_recommender.create(train_data, 'user_id', 'song_name')

In [15]:
random_user=np.random.choice(users)
popularity_recommender.recommend(random_user)

Unnamed: 0,user_id,song_name,score,Rank
4754,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Sehr kosmisch - Harmonia,61,1.0
6026,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Undo - Björk,59,2.0
1369,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Dog Days Are Over (Radio Edit) - Florence + Th...,54,3.0
6571,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,You're The One - Dwight Yoakam,51,4.0
4517,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Revelry - Kings Of Leon,45,5.0
2319,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Hey_ Soul Sister - Train,44,6.0
4747,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Secrets - OneRepublic,44,7.0
1795,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Fireflies - Charttraxx Karaoke,39,8.0
2408,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Horn Concerto No. 4 in E flat K495: II. Romanc...,39,9.0
1468,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Drop The World - Lil Wayne / Eminem,38,10.0


In [31]:
random_user2=np.random.choice(users)
popularity_recommender.recommend(random_user2)

Unnamed: 0,user_id,song_name,score,Rank
4754,46da7b0cca93a98e88a6366c4670691f37d1761f,Sehr kosmisch - Harmonia,61,1.0
6026,46da7b0cca93a98e88a6366c4670691f37d1761f,Undo - Björk,59,2.0
1369,46da7b0cca93a98e88a6366c4670691f37d1761f,Dog Days Are Over (Radio Edit) - Florence + Th...,54,3.0
6571,46da7b0cca93a98e88a6366c4670691f37d1761f,You're The One - Dwight Yoakam,51,4.0
4517,46da7b0cca93a98e88a6366c4670691f37d1761f,Revelry - Kings Of Leon,45,5.0
2319,46da7b0cca93a98e88a6366c4670691f37d1761f,Hey_ Soul Sister - Train,44,6.0
4747,46da7b0cca93a98e88a6366c4670691f37d1761f,Secrets - OneRepublic,44,7.0
1795,46da7b0cca93a98e88a6366c4670691f37d1761f,Fireflies - Charttraxx Karaoke,39,8.0
2408,46da7b0cca93a98e88a6366c4670691f37d1761f,Horn Concerto No. 4 in E flat K495: II. Romanc...,39,9.0
1468,46da7b0cca93a98e88a6366c4670691f37d1761f,Drop The World - Lil Wayne / Eminem,38,10.0


# Creating item-similarity based recommender model

In [17]:
item_similarity_recommender=Recommenders.item_similarity_recommender_py()
item_similarity_recommender.create(train_data,'user_id','song_name')

In [18]:
user_items = item_similarity_recommender.get_user_items(random_user)
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % random_user)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
item_similarity_recommender.recommend(random_user)

------------------------------------------------------------------------------------
Training data songs for the user userid: c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a:
------------------------------------------------------------------------------------
White Shadows - Coldplay
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 1
no. of unique songs in the training set: 6617
Non zero values in cooccurence_matrix :3


Unnamed: 0,user_id,song_name,score,rank
0,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Ego - Beyoncé,0.2,1
1,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,A Rush Of Blood To The Head - Coldplay,0.142857,2
2,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Serve The Servants - Nirvana,0.0,3
3,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Method Man - Method Man,0.0,4
4,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Lemme Get That - Rihanna,0.0,5
5,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,It's Tricky - RUN-DMC,0.0,6
6,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,Tango del Pecado - Calle 13 Featuring Bajofond...,0.0,7
7,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,When We Dance - Sting,0.0,8
8,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,All These Things I Hate [revolve Around Me] [a...,0.0,9
9,c3f324c4daaf525dc7323b6e54e3cfb18fcdf32a,The Pulse - Digitalism,0.0,10


In [32]:
user_items = item_similarity_recommender.get_user_items(random_user2)
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % random_user2)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
item_similarity_recommender.recommend(random_user2)

------------------------------------------------------------------------------------
Training data songs for the user userid: 46da7b0cca93a98e88a6366c4670691f37d1761f:
------------------------------------------------------------------------------------
The Way Things Go - Octopus Project
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 1
no. of unique songs in the training set: 6617
Non zero values in cooccurence_matrix :9


Unnamed: 0,user_id,song_name,score,rank
0,46da7b0cca93a98e88a6366c4670691f37d1761f,Heavyweight - Infected Mushroom,0.1,1
1,46da7b0cca93a98e88a6366c4670691f37d1761f,Skyscraper - Train,0.090909,2
2,46da7b0cca93a98e88a6366c4670691f37d1761f,Caring Is Creepy - The Shins,0.083333,3
3,46da7b0cca93a98e88a6366c4670691f37d1761f,Just The Two Of Us (WSM Compilation Edit) - Gr...,0.083333,4
4,46da7b0cca93a98e88a6366c4670691f37d1761f,Responsible Stu - Octopus Project,0.076923,5
5,46da7b0cca93a98e88a6366c4670691f37d1761f,If It's Love - Train,0.076923,6
6,46da7b0cca93a98e88a6366c4670691f37d1761f,Day 'N' Nite - Kid Cudi,0.071429,7
7,46da7b0cca93a98e88a6366c4670691f37d1761f,Sehr kosmisch - Harmonia,0.014286,8
8,46da7b0cca93a98e88a6366c4670691f37d1761f,Serve The Servants - Nirvana,0.0,9
9,46da7b0cca93a98e88a6366c4670691f37d1761f,Method Man - Method Man,0.0,10


# Recommending similar songs

In [20]:
item_similarity_recommender.get_similar_items(['Secrets - OneRepublic'])

no. of unique songs in the training set: 6617
Non zero values in cooccurence_matrix :16


Unnamed: 0,user_id,song_name,score,rank
0,,The Sporting Life - The Decemberists,0.022727,1
1,,Face To Face (Demon Remix) - Daft Punk,0.022222,2
2,,One And Only - Mariah Carey / Twista,0.022222,3
3,,Kickstarts - Example,0.022222,4
4,,Satisfaction - Benny Benassi Presents The Biz,0.022222,5
5,,She Loves You - Peter Sellers,0.021739,6
6,,Wie Jetzt - Dynamite Deluxe,0.021739,7
7,,The World - Empire Of The Sun,0.021277,8
8,,How You Remind Me - Nickelback,0.018868,9
9,,A-Punk (Album) - Vampire Weekend,0.018519,10


In [22]:
song = 'White Shadows - Coldplay'
item_similarity_recommender.get_similar_items([song])

no. of unique songs in the training set: 6617
Non zero values in cooccurence_matrix :3


Unnamed: 0,user_id,song_name,score,rank
0,,Ego - Beyoncé,0.2,1
1,,A Rush Of Blood To The Head - Coldplay,0.142857,2
2,,Serve The Servants - Nirvana,0.0,3
3,,Method Man - Method Man,0.0,4
4,,Lemme Get That - Rihanna,0.0,5
5,,It's Tricky - RUN-DMC,0.0,6
6,,Tango del Pecado - Calle 13 Featuring Bajofond...,0.0,7
7,,When We Dance - Sting,0.0,8
8,,All These Things I Hate [revolve Around Me] [a...,0.0,9
9,,The Pulse - Digitalism,0.0,10
