## Music Recommendation System (Machine Learning)

This project is aimed upon building a music recommendation system that gives the user recommendations on music based on his music taste by analysing his previously heard music and playlist. This project is done in two ways, using 'User - to - User Recommendation' and 'Item - to - Item Recommendation'. Birch, MiniBatchKMeans and KMeans algorithms are being used along with 'Surprise' module to compute the similarity between recommendations and user's already existing playlist for evaluation

### Obtaining Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
final = pd.read_csv('datasets/final/final.csv')
metadata = pd.read_csv('datasets/final/metadata.csv')

### Model Selection - K Means Algorithm

In [3]:
from sklearn.cluster import KMeans
from sklearn.utils import shuffle

In [4]:
final = shuffle(final)

In [5]:
X = final.loc[[i for i in range(0, 6000)]]
Y = final.loc[[i for i in range(6000, final.shape[0])]]

In [6]:
X = shuffle(X)
Y = shuffle(Y)

In [7]:
metadata.head()

Unnamed: 0,track_id,album_title,artist_name,genre,track_title
0,2,AWOL - A Way Of Life,AWOL,HipHop,Food
1,3,AWOL - A Way Of Life,AWOL,HipHop,Electric Ave
2,5,AWOL - A Way Of Life,AWOL,HipHop,This World
3,10,Constant Hitmaker,Kurt Vile,Pop,Freeway
4,134,AWOL - A Way Of Life,AWOL,HipHop,Street Music


In [8]:
metadata = metadata.set_index('track_id')

In [9]:
X.drop(['label'], axis= 1, inplace= True)

KeyError: "['label'] not found in axis"

In [9]:
kmeans = KMeans(n_clusters=6)

In [10]:
Y.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Holiday,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental
11234,11234,59252,0.03455,0.562571,0.577579,2.8e-05,0.243006,0.337121,100.171,0.240919,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10238,10238,47463,0.97811,0.481496,0.163413,0.014302,0.11795,0.447975,171.979,0.519498,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11134,11134,56883,1.2e-05,0.365231,0.851602,0.083557,0.08921,0.031173,108.848,0.515668,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8809,8809,39536,0.178197,0.40067,0.868561,0.738472,0.10991,0.068722,96.113,0.038607,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6842,6842,29447,0.301083,0.395698,0.722168,0.936578,0.097972,0.043087,141.988,0.109104,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
def fit(df, algo, flag=0):
    if flag:
        algo.fit(df)
    else:
         algo.partial_fit(df)          
    df['label'] = algo.labels_
    return (df, algo)

In [12]:
def predict(t, Y):
    y_pred = t[1].predict(Y)
    mode = pd.Series(y_pred).mode()
    return t[0][t[0]['label'] == mode.loc[0]]

In [13]:
def recommend(recommendations, meta, Y):
    dat = []
    for i in Y['track_id']:
        dat.append(i)
    genre_mode = meta.loc[dat]['genre'].mode()
    artist_mode = meta.loc[dat]['artist_name'].mode()
    return meta[meta['genre'] == genre_mode.iloc[0]], meta[meta['artist_name'] == artist_mode.iloc[0]], meta.loc[recommendations['track_id']]

In [14]:
t = fit(X, kmeans, 1)

In [15]:
recommendations = predict(t, Y)

In [16]:
output = recommend(recommendations, metadata, Y)

In [17]:
genre_recommend, artist_name_recommend, mixed_recommend = output[0], output[1], output[2]

In [18]:
genre_recommend.shape

(3892, 4)

In [19]:
artist_name_recommend.shape

(52, 4)

In [20]:
mixed_recommend.shape

(1142, 4)

In [21]:
# Genre wise recommendations
genre_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [22]:
# Artist wise recommendations
artist_name_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34660,Zehu,51%,AvantGarde|International|Blues|Jazz|,Hadri Ha'Kat
34661,Zehu,51%,AvantGarde|International|Blues|Jazz|,Blender Tzivoni
34662,Zehu,51%,AvantGarde|International|Blues|Jazz|,Naniah
34663,Zehu,51%,AvantGarde|International|Blues|Jazz|,Yoter Miday
34664,Zehu,51%,AvantGarde|International|Blues|Jazz|,"Yamim, Lielot"


In [23]:
# Mixed Recommendations
mixed_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19923,Food Miles,THF Drenching,AvantGarde|International|,A Tour Of US Recession
13675,netBloc Vol. 01: The Opening Salvo,Pablie,AvantGarde|International|,Trying Jazz
16158,netBloc Vol. 23: We Invented The Compilation,Entertainment for the Braindead,Pop,Animals
24316,Live at Jetto Festival,Daan Hendriks,AvantGarde|International|,Give Me Your Love (illegal remix)
21897,Neuro Science EP,B1t Crunch3r vs Killeralien vs Phonetic System,Electronic,Mentalism feat. Dubbwune


In [24]:
recommendations.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
5040,5040,19923,0.670348,0.576863,0.543091,0.967985,0.116525,0.407882,115.695,0.966856,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3414,3414,13675,0.715131,0.784469,0.357312,0.794843,0.103969,0.068457,90.02,0.80291,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4109,4109,16158,0.987536,0.454306,0.086323,0.916047,0.139758,0.037731,111.842,0.199447,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
5982,5982,24316,0.091983,0.625877,0.556248,0.81844,0.122404,0.097589,192.024,0.697436,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
5482,5482,21897,0.00606,0.82977,0.937159,0.867768,0.112238,0.089005,140.077,0.607665,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [25]:
artist_name_recommend['artist_name'].value_counts()

51%    52
Name: artist_name, dtype: int64

In [26]:
genre_recommend['genre'].value_counts()

Rock    3892
Name: genre, dtype: int64

In [27]:
genre_recommend['artist_name'].value_counts()

Glove Compartment               65
Blah Blah Blah                  62
Mors Ontologica                 50
Les Baudouins Morts             38
Kraus                           35
                                ..
Alone in 1982                    1
Ostrich Tuning                   1
The Dalai Lama Rama Fa Fa Fa     1
The Rusty Bells                  1
Lost Boy                         1
Name: artist_name, Length: 725, dtype: int64

#### Testing

In [28]:
testing = Y.iloc[6:12]['track_id']

In [29]:
testing

8944     40038
10956    52221
9872     45139
7468     32456
8018     34529
11034    54282
Name: track_id, dtype: int64

In [30]:
ids = testing.loc[testing.index]

In [31]:
songs = metadata.loc[testing.loc[list(testing.index)]]

In [32]:
songs

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
40038,"Oya Wa Touhoku, Oto Kumori",Hiiragi Fukuda,AvantGarde|International|Blues|,Nisoku Hokou(Bipedal Locomotion)
52221,Live at WFMU on Beastin' The Airwaves! With Ke...,Grooms,Rock,3D Voices
45139,netlabelism.com - Compilation 02/11,Photophob,Electronic,Backyard Prophet
32456,"Live at Primavera Sound Festival, 05/27/10",Sic Alps,Rock,Stories
34529,Sowilo (Double Disc),Gorowski,Electronic,I'll Get By (Feat. Crepesuzette)
54282,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,Unearthly Sky


In [33]:
re = predict(t, Y.iloc[6:12])

In [34]:
output = recommend(re, metadata, Y.iloc[6:12])

In [35]:
ge_re, ge_ar, ge_mix = output[0], output[1], output[2]

In [36]:
ge_re.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [37]:
ge_ar.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
54282,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,Unearthly Sky
54284,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,At Large
54285,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,Quitters Face
54286,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,Anger and Envy
54287,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,27 Club
54288,Live at WFMU on Beastin' The Airwaves with Kei...,Abstract Artimus,Rock,Torture Chile


In [38]:
ge_mix.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17881,Infiltrator,Covox,Electronic,Psychic Youth
20741,Antique Phonograph Music Program 07/14/2009,Venetian Trio,OldTime|Historic,Evening Chimes
20576,@ ISSUE 10/22/09,Sim Cain,Blues,One
5264,Live at WFMU on Mike's Show on 7/11/2002,Dälek,HipHop,Classical Homicide
19602,Antique Phonograph Music Program 09/22/2009,Billy Murray And Chorus,OldTime|Historic,Underneath the Cotton Moon
19648,Live on Solid Gold Hell with Sue P. on WFMU 10...,Wizardry,Rock,Wayfarer
807,Fun With Your New Head,Guinea Worms,Rock,Spider Season
6469,Keyif Live on WFMU from the 2009 Golden Festiv...,Orkestra Keyif,International,Derya'da Deryaliklar
23801,Blub,Bulb,AvantGarde|International|,La marche de l'ours
1917,The Philadelphia Parking Authority Must Die,The Twin Atlas,Folk,Inverted Torches


In [39]:
ge_re.shape

(3892, 4)

In [40]:
ge_ar.shape

(6, 4)

In [41]:
ge_mix.shape

(2807, 4)

### Model Selection - MiniBatchKMeans

In [42]:
from sklearn.cluster import MiniBatchKMeans

In [43]:
mini = MiniBatchKMeans(n_clusters = 6)

In [44]:
X.drop('label', axis=1, inplace=True)

In [45]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [46]:
for i in [part_1, part_2, part_3]:
    t = fit(i, mini)
    mini = t[1]
    i = t[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_


In [47]:
X = pd.concat([part_1, part_2, part_3])

In [48]:
X.columns

Index(['Unnamed: 0', 'track_id', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'liveness', 'speechiness', 'tempo', 'valence',
       ...
       'Salsa', 'NuJazz', 'HipHop Beats', 'Modern Jazz', 'Turkish', 'Tango',
       'Fado', 'Christmas', 'Instrumental', 'label'],
      dtype='object', length=931)

In [49]:
X.head(3)

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
2329,2329,9787,0.03221,0.446872,0.809473,0.340068,0.233547,0.115823,125.668,0.344056,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
3820,3820,15136,0.247748,0.067595,0.443744,0.969501,0.12995,0.049286,175.511,0.07955,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
488,488,1272,0.635033,0.24734,0.999023,0.885709,0.20476,0.079676,189.539,0.03165,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5


In [50]:
X['label'].value_counts()

0    2836
3    1189
5     946
1     833
2     195
4       1
Name: label, dtype: int64

In [51]:
recommendations = predict((X, mini), Y)

In [None]:
output = recommend(recommendations, metadata, Y)

In [None]:
genre_recommend_mini, artist_name_recommend_mini, mixed_mini = output[0], output[1], output[2]

In [None]:
genre_recommend_mini.shape

In [None]:
artist_name_recommend_mini.shape

In [None]:
# Genre wise recommendations
genre_recommend_mini.head()

In [None]:
# Artist wise recommendations
artist_name_recommend_mini.head()

In [None]:
# Mixed Recommendations
mixed_mini.head()

### Model Selection - Birch

In [None]:
from sklearn.cluster import Birch

In [None]:
birch = Birch(n_clusters = 6)

In [None]:
X.drop('label', axis=1, inplace=True)

In [None]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [None]:
for i in [part_1, part_2, part_3]:
    t = fit(i, birch)
    mini = t[1]
    i = t[0]

In [None]:
X = pd.concat([part_1, part_2, part_3])

In [None]:
X.columns

In [None]:
X.head(3)

In [None]:
X['label'].value_counts()

In [None]:
recommendations = predict((X, birch), Y)

In [None]:
output = recommend(recommendations, metadata, Y)

In [None]:
genre_recommend_birch, artist_name_recommend_birch, mixed_birch = output[0], output[1], output[2]

In [None]:
genre_recommend_birch.shape

In [None]:
artist_name_recommend_birch.shape

In [None]:
# Genre wise recommendations
genre_recommend_birch.head()

In [None]:
# Artist wise recommendations
artist_name_recommend_birch.head()

In [None]:
# Mixed Recommendations
mixed_birch.head()