# **Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from scipy.spatial.distance import cdist
import warnings
import import_ipynb
from Utility import *

warnings.simplefilter(action='ignore', category=FutureWarning)
import random

warnings.filterwarnings("ignore")
from sklearn.cluster import KMeans

importing Jupyter notebook from Utility.ipynb


 ## Read Music Data by artists

In [2]:
data = pd.read_csv(r"datasets/data_by_artist.csv")
data.head()

Unnamed: 0,mode,count,acousticness,artists,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key
0,1,9,0.590111,"""Cats"" 1981 Original London Cast",0.467222,250318.555556,0.394003,0.0114,0.290833,-14.448,0.210389,117.518111,0.3895,38.333333,5
1,1,26,0.862538,"""Cats"" 1983 Broadway Cast",0.441731,287280.0,0.406808,0.081158,0.315215,-10.69,0.176212,103.044154,0.268865,30.576923,5
2,1,7,0.856571,"""Fiddler On The Roof” Motion Picture Chorus",0.348286,328920.0,0.286571,0.024593,0.325786,-15.230714,0.118514,77.375857,0.354857,34.857143,0
3,1,27,0.884926,"""Fiddler On The Roof” Motion Picture Orchestra",0.425074,262890.962963,0.24577,0.073587,0.275481,-15.63937,0.1232,88.66763,0.37203,34.851852,0
4,1,7,0.510714,"""Joseph And The Amazing Technicolor Dreamcoat""...",0.467143,270436.142857,0.488286,0.0094,0.195,-10.236714,0.098543,122.835857,0.482286,43.0,5


In [3]:
artist_data = data[['valence', 'artists', 'count', 'popularity']]
artist_data.to_csv('artist_data.csv')

In [4]:
p = 0.02  # to randomly select 1% of the rows
df_playlist_spotify = pd.read_csv(r"datasets/spotify_dataset.csv", error_bad_lines=False, warn_bad_lines=False,
                                  skiprows=lambda i: i > 0 and random.random() > p)
df_playlist_spotify.head()

Unnamed: 0,user_id,"""artistname""","""trackname""","""playlistname"""
0,9cc0cfd4d7d7885102480dd99e7a90d6,Tiffany Page,7 Years Too Late,HARD ROCK 2010
1,9cc0cfd4d7d7885102480dd99e7a90d6,Paul McCartney,Live And Let Die,HARD ROCK 2010
2,9cc0cfd4d7d7885102480dd99e7a90d6,Tom Petty And The Heartbreakers,American Girl,IOW 2012
3,9cc0cfd4d7d7885102480dd99e7a90d6,Noah And The Whale,L.I.F.E.G.O.E.S.O.N.,IOW 2012
4,07f0fc3be95dcd878966b1f9572ff670,C418,Thirteen,C418


In [5]:
df_playlist_spotify.columns = df_playlist_spotify.columns.str.replace('"', '')
df_playlist_spotify.columns = df_playlist_spotify.columns.str.replace('name', '')
df_playlist_spotify.columns = df_playlist_spotify.columns.str.replace(' ', '')
df_playlist_spotify.columns

Index(['user_id', 'artist', 'track', 'playlist'], dtype='object')

In [None]:
### For recommender system, we are only keeping the artists with frequency higher than 50

In [6]:
df_playlist = df_playlist_spotify.groupby('artist').filter(lambda x: len(x) >= 50)
print(df_playlist)

                                 user_id                           artist  \
1       9cc0cfd4d7d7885102480dd99e7a90d6                   Paul McCartney   
2       9cc0cfd4d7d7885102480dd99e7a90d6  Tom Petty And The Heartbreakers   
3       9cc0cfd4d7d7885102480dd99e7a90d6               Noah And The Whale   
7       07f0fc3be95dcd878966b1f9572ff670                          Ratatat   
10      07f0fc3be95dcd878966b1f9572ff670                Infected Mushroom   
...                                  ...                              ...   
258306  2302bf9c64dc63d88a750215ed187f2c                            JAY Z   
258307  2302bf9c64dc63d88a750215ed187f2c                  Imagine Dragons   
258308  2302bf9c64dc63d88a750215ed187f2c                             Bush   
258309  2302bf9c64dc63d88a750215ed187f2c               The Rolling Stones   
258310  2302bf9c64dc63d88a750215ed187f2c  Tom Petty And The Heartbreakers   

                             track          playlist  
1                 Li

In [7]:
df_playlist = df_playlist[df_playlist.groupby('user_id').artist.transform('nunique') >= 10]
print(df_playlist)

                                 user_id             artist  \
7       07f0fc3be95dcd878966b1f9572ff670            Ratatat   
10      07f0fc3be95dcd878966b1f9572ff670  Infected Mushroom   
12      07f0fc3be95dcd878966b1f9572ff670              Flume   
14      07f0fc3be95dcd878966b1f9572ff670          In Flames   
19      07f0fc3be95dcd878966b1f9572ff670        Murray Gold   
...                                  ...                ...   
258296  488ead471813ae982119153f792e4a3e              JAY Z   
258298  488ead471813ae982119153f792e4a3e            Tove Lo   
258299  488ead471813ae982119153f792e4a3e         Katy Perry   
258302  488ead471813ae982119153f792e4a3e            Madonna   
258303  488ead471813ae982119153f792e4a3e            Shakira   

                                                track playlist  
7                                       Tacobel Canon  Electro  
10                                        Heavyweight   Soirée  
12                  Sleepless (feat. Jezzabell D

In [8]:
size = lambda x: len(x)
df_freq = df_playlist.groupby(['user_id', 'artist']).agg('size').reset_index().rename(columns={0: 'freq'})[
    ['user_id', 'artist', 'freq']].sort_values(['freq'], ascending=False)
df_freq.head()

Unnamed: 0,user_id,artist,freq
7152,26b51e580277e131f87e4c7ee4c0887a,Vitamin String Quartet,75
11609,414050deadb38aafd8d4ad22ca634055,Vitamin String Quartet,49
43796,fa849dabeb14a2800ad5130907fc5018,Ella Fitzgerald,44
43841,fa849dabeb14a2800ad5130907fc5018,Peggy Lee,39
8692,2e221a9fa50e6487e8a3b7c7ac8c806a,Ennio Morricone,37


In [9]:
df_artist = pd.DataFrame(df_freq["artist"].unique())
df_artist = df_artist.reset_index()
df_artist = df_artist.rename(columns={'index': 'artist_id', 0: 'artist'})
df_artist.head()

Unnamed: 0,artist_id,artist
0,0,Vitamin String Quartet
1,1,Ella Fitzgerald
2,2,Peggy Lee
3,3,Ennio Morricone
4,4,Frank Sinatra


In [10]:
# pass favorite artist name and no of recommended artist/song you want to listen
recommended_artist_and_songs = recommend_artist_and_songs('Ella Fitzgerald',10)
# this list contains n recommended artist with 1 songs per artist
print(recommended_artist_and_songs)

                 artist                                              songs
0       Lata Mangeshkar                                  Aa Ab Laut Chalen
1           Dean Martin                                    'Til I Find You
2       Ella Fitzgerald                                         'Deed I Do
3        The Beach Boys  "Cassius" Love Vs. "Sonny" Wilson - Remastered...
4  Ludwig van Beethoven  12 Variations on "Ein Mädchen oder Weibchen", ...
5    The Rolling Stones  (I Can't Get No) Satisfaction - (Original Sing...
6          Irina Salkow              Kapitel 1 - Der Page und die Herzogin
7         Fleetwood Mac                            Albatross - 2018 Master
8             Bob Dylan  (Quinn the Eskimo) The Mighty Quinn - Live at ...
9           Miles Davis                                    'Round Midnight


In [11]:
#TODO: refactor and cleanup