### Importing modules

In [1]:
import pandas as pd
import numpy as np
import recommenders as rc

### Loading the dataset

In [2]:
song_df_1 = pd.read_csv('./triplets_file.csv')
song_df_2 = pd.read_csv('./song_data.csv')

In [3]:
song_df = pd.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on='song_id', how='left')
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


### Data preprocessing

In [4]:
song_df['song'] = song_df['artist_name'] + ' - ' + song_df['title']
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,song
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0,Jack Johnson - The Cove
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976,Paco De Lucia - Entre Dos Aguas
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007,Kanye West - Stronger
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005,Jack Johnson - Constellations
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999,Foo Fighters - Learn To Fly


In [5]:
# Aggregate listen count per song
song_grouped = song_df.groupby(['song']).agg({'listen_count':'count'}).reset_index()
song_grouped.head()

Unnamed: 0,song,listen_count
0,!!! - Sweet Life,90
1,'N Sync/Phil Collins - Trashin' The Camp (Phil...,94
2,+ / - {Plus/Minus} - The Queen of Nothing,314
3,+44 - Lycanthrope,96
4,+44 - Make You Smile,130


In [6]:
total_listen_count = song_grouped['listen_count'].sum()
song_grouped['percentage'] = (song_grouped['listen_count'] / total_listen_count) * 100
song_grouped.sort_values(['listen_count', 'song'], ascending=[0, 1])

Unnamed: 0,song,listen_count,percentage
3812,Harmonia - Sehr kosmisch,8277,0.41385
1112,Björk - Undo,7032,0.35160
3288,Florence + The Machine - Dog Days Are Over (Ra...,6949,0.34745
2724,Dwight Yoakam - You're The One,6412,0.32060
4911,Kings Of Leon - Revelry,6145,0.30725
...,...,...,...
7232,Ricardo Arjona - Historia Del Portero,51,0.00255
9286,Three Days Grace - Scared,51,0.00255
434,Amparanoia - Don´t Leave Me Now,50,0.00250
4587,Juanes - No Creo En El Jamas,48,0.00240


### Popularity recommandation engine

In [7]:
pe = rc.popularity_recommender()

In [8]:
pe.create(song_df, 'user_id', 'song')

In [9]:
# Get a recommendation based on song popularity
pe.recommend()

Unnamed: 0,song,score,rank
3812,Harmonia - Sehr kosmisch,8277,1
1112,Björk - Undo,7032,2
3288,Florence + The Machine - Dog Days Are Over (Ra...,6949,3
2724,Dwight Yoakam - You're The One,6412,4
4911,Kings Of Leon - Revelry,6145,5
...,...,...,...
7232,Ricardo Arjona - Historia Del Portero,51,9949
9286,Three Days Grace - Scared,51,9950
434,Amparanoia - Don´t Leave Me Now,50,9951
4587,Juanes - No Creo En El Jamas,48,9952
