## Music Recommendation System (Machine Learning)

This project is aimed upon building a music recommendation system that gives the user recommendations on music based on his music taste by analysing his previously heard music and playlist. This project is done in two ways, using 'User - to - User Recommendation' and 'Item - to - Item Recommendation'. Birch, MiniBatchKMeans and KMeans algorithms are being used along with 'Surprise' module to compute the similarity between recommendations and user's already existing playlist for evaluation

### Obtaining Data

In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [80]:
final = pd.read_csv('datasets/final/final.csv')
metadata = pd.read_csv('datasets/final/metadata.csv')

### Model Selection - K Means Algorithm

In [81]:
from sklearn.cluster import KMeans
from sklearn.utils import shuffle

In [89]:
final = shuffle(final)

In [83]:
X = final.loc[[i for i in range(0, 6000)]]
Y = final.loc[[i for i in range(6000, final.shape[0])]]

In [84]:
X = shuffle(X)
Y = shuffle(Y)

In [87]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

ValueError: Found input variables with inconsistent numbers of samples: [6000, 7129]

In [7]:
metadata.head()

Unnamed: 0,track_id,album_title,artist_name,genre,track_title
0,2,AWOL - A Way Of Life,AWOL,HipHop,Food
1,3,AWOL - A Way Of Life,AWOL,HipHop,Electric Ave
2,5,AWOL - A Way Of Life,AWOL,HipHop,This World
3,10,Constant Hitmaker,Kurt Vile,Pop,Freeway
4,134,AWOL - A Way Of Life,AWOL,HipHop,Street Music


In [8]:
metadata = metadata.set_index('track_id')

In [9]:
X.drop(['label'], axis= 1, inplace= True)

KeyError: "['label'] not found in axis"

In [10]:
kmeans = KMeans(n_clusters=6)

In [11]:
Y.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Holiday,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental
12055,12055,85957,0.194504,0.307904,0.439775,0.000154,0.155741,0.032184,117.839,0.039938,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11982,11982,82907,0.009056,0.320597,0.598259,0.000885,0.105224,0.032706,178.146,0.826332,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12998,12998,122712,0.995472,0.75087,0.447152,0.893677,0.132846,0.070192,145.394,0.888371,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10449,10449,48492,0.966743,0.452448,0.199366,0.944537,0.102844,0.03527,144.876,0.819464,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6817,6817,29021,0.574051,0.714634,0.813965,0.89244,0.204575,0.052757,120.53,0.799301,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
def fit(df, algo, flag=0):
    if flag:
        algo.fit(df)
    else:
         algo.partial_fit(df)          
    df['label'] = algo.labels_
    return (df, algo)

In [13]:
def predict(t, Y):
    y_pred = t[1].predict(Y)
    mode = pd.Series(y_pred).mode()
    return t[0][t[0]['label'] == mode.loc[0]]

In [14]:
def recommend(recommendations, meta, Y):
    dat = []
    for i in Y['track_id']:
        dat.append(i)
    genre_mode = meta.loc[dat]['genre'].mode()
    artist_mode = meta.loc[dat]['artist_name'].mode()
    return meta[meta['genre'] == genre_mode.iloc[0]], meta[meta['artist_name'] == artist_mode.iloc[0]], meta.loc[recommendations['track_id']]

In [15]:
t = fit(X, kmeans, 1)

In [16]:
recommendations = predict(t, Y)

In [17]:
output = recommend(recommendations, metadata, Y)

In [18]:
genre_recommend, artist_name_recommend, mixed_recommend = output[0], output[1], output[2]

In [19]:
genre_recommend.shape

(3892, 4)

In [20]:
artist_name_recommend.shape

(52, 4)

In [21]:
mixed_recommend.shape

(1150, 4)

In [22]:
# Genre wise recommendations
genre_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [23]:
# Artist wise recommendations
artist_name_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34660,Zehu,51%,AvantGarde|International|Blues|Jazz|,Hadri Ha'Kat
34661,Zehu,51%,AvantGarde|International|Blues|Jazz|,Blender Tzivoni
34662,Zehu,51%,AvantGarde|International|Blues|Jazz|,Naniah
34663,Zehu,51%,AvantGarde|International|Blues|Jazz|,Yoter Miday
34664,Zehu,51%,AvantGarde|International|Blues|Jazz|,"Yamim, Lielot"


In [24]:
# Mixed Recommendations
mixed_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17738,Inconceivable,Magical Unicellular Music,Rock,Inconceivable
20360,This is the Sound!,Burnkit2600,AvantGarde|International|Blues|Jazz|,Call of the Final Five
15742,Terabyte,Carson Day,Electronic,Gentle
19428,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Shelia
4696,"Live at WFMU on Dan Bodah's Show Oct 19, 2008",Dan Friel,Electronic,Ghost Town Pt1


In [25]:
recommendations.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
4452,4452,17738,0.205731,0.559022,0.392238,0.932055,0.285795,0.040134,120.129,0.031587,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
5179,5179,20360,0.103773,0.564786,0.815586,0.765864,0.09644,0.033317,112.004,0.05597,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3965,3965,15742,0.627579,0.63835,0.652164,0.949948,0.086517,0.046199,120.075,0.09671,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4839,4839,19428,0.867633,0.751303,0.383047,0.746167,0.965176,0.151845,118.047,0.295027,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
1600,1600,4696,5e-06,0.395954,0.071486,0.948992,0.081442,0.055404,130.007,0.470819,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [26]:
artist_name_recommend['artist_name'].value_counts()

51%    52
Name: artist_name, dtype: int64

In [27]:
genre_recommend['genre'].value_counts()

Rock    3892
Name: genre, dtype: int64

In [28]:
genre_recommend['artist_name'].value_counts()

Glove Compartment               65
Blah Blah Blah                  62
Mors Ontologica                 50
Les Baudouins Morts             38
Kraus                           35
                                ..
Alone in 1982                    1
Ostrich Tuning                   1
The Dalai Lama Rama Fa Fa Fa     1
The Rusty Bells                  1
Lost Boy                         1
Name: artist_name, Length: 725, dtype: int64

#### Testing

In [29]:
testing = Y.iloc[6:12]['track_id']

In [30]:
testing

11131     56841
8752      39283
6629      28334
12348     98669
12960    121591
9910      45516
Name: track_id, dtype: int64

In [31]:
ids = testing.loc[testing.index]

In [32]:
songs = metadata.loc[testing.loc[list(testing.index)]]

In [33]:
songs

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
56841,Live at WFMU on Diane's Kamikaze Fun Machine N...,The Might Could,Rock,Coming Clean
39283,AHX To Excess,geir tjelta,Electronic,the fugitive
28334,Explicit Beats,4di,AvantGarde|International|Blues|,Rifle Expert
98669,Live at the 2014 Golden Festival,Scott Wilson & Efendi,International,Bir Dimrt
121591,Live on WFMU's Three Chord Monte with Joe Belo...,Kurt Baker,Pop,Everybody Knows
45516,Evils,Plusplus,Folk,We wont be mowing today


In [34]:
re = predict(t, Y.iloc[6:12])

In [35]:
output = recommend(re, metadata, Y.iloc[6:12])

In [36]:
ge_re, ge_ar, ge_mix = output[0], output[1], output[2]

In [37]:
ge_re.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1574,Please Throw Me Back in The Ocean,Sir Lord Von Raven,AvantGarde|International|Blues|,The Glass Castle
1575,Please Throw Me Back in The Ocean,Sir Lord Von Raven,AvantGarde|International|Blues|,Take it or Leave it
1892,Tommy Jay's Tall Tales Of Trauma,Tommy Jay,AvantGarde|International|Blues|,I Was There
3394,Live at WFMU on Liz Berg's Show on 5/19/2008,Bonde Do Role,AvantGarde|International|Blues|,Gasolina / Contamida
3396,Live at WFMU on Liz Berg's Show on 5/19/2008,Bonde Do Role,AvantGarde|International|Blues|,Solta o Franco


In [38]:
ge_ar.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
28332,Explicit Beats,4di,AvantGarde|International|,Telephone (skit)
28334,Explicit Beats,4di,AvantGarde|International|Blues|,Rifle Expert
28348,Explicit Beats,4di,AvantGarde|International|,It's Good For Me
28350,Explicit Beats,4di,AvantGarde|International|,Emanon - Do You Know? (4di Remix)
28355,Explicit Beats,4di,AvantGarde|International|,Outro
28356,Explicit Beats,4di,AvantGarde|International|,Kommunion Pt. 2
28358,Explicit Beats,4di,AvantGarde|International|,Im Labor Pt.1
28359,Explicit Beats,4di,AvantGarde|International|,Im Labor Pt.2 / Reden- und Machen Interlude
28360,Explicit Beats,4di,AvantGarde|International|,Cocaine Blunts (F.Radsch)
28361,Explicit Beats,4di,AvantGarde|International|,Unterholz


In [39]:
ge_mix.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17738,Inconceivable,Magical Unicellular Music,Rock,Inconceivable
20360,This is the Sound!,Burnkit2600,AvantGarde|International|Blues|Jazz|,Call of the Final Five
15742,Terabyte,Carson Day,Electronic,Gentle
19428,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Shelia
4696,"Live at WFMU on Dan Bodah's Show Oct 19, 2008",Dan Friel,Electronic,Ghost Town Pt1
12338,netBloc Vol. 17: Refined Excursions For The Di...,Brigitte Bijoux,Electronic,Seduction Douce
20234,Classic Material Vol. 3: UI Radio,CM aka Creative,HipHop,Ghetto Music
17720,Le Voyage,Pigeons & Crazy Porridgemakers,Rock,Haughty Dwarf
19726,Shlo-Fi EP,Shlohmo,HipHop,Ghosts pt. 2
21009,Ornitorrinco Voador,Retrigger,Electronic,Brand New Cadillac (Vocals by Gabi Lima)


In [40]:
ge_re.shape

(1507, 4)

In [41]:
ge_ar.shape

(11, 4)

In [42]:
ge_mix.shape

(1150, 4)

### Model Selection - MiniBatchKMeans

In [43]:
from sklearn.cluster import MiniBatchKMeans

In [44]:
mini = MiniBatchKMeans(n_clusters = 6)

In [45]:
X.drop('label', axis=1, inplace=True)

In [46]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [47]:
for i in [part_1, part_2, part_3]:
    t = fit(i, mini)
    mini = t[1]
    i = t[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_


In [48]:
X = pd.concat([part_1, part_2, part_3])

In [49]:
X.columns

Index(['Unnamed: 0', 'track_id', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'liveness', 'speechiness', 'tempo', 'valence',
       ...
       'Salsa', 'NuJazz', 'HipHop Beats', 'Modern Jazz', 'Turkish', 'Tango',
       'Fado', 'Christmas', 'Instrumental', 'label'],
      dtype='object', length=931)

In [50]:
X.head(3)

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
173,173,539,0.993984,0.274216,0.080029,0.928708,0.117161,0.060822,204.539,0.32658,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3609,3609,14365,0.089991,0.741833,0.461279,0.891303,0.117768,0.064761,163.877,0.565653,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4452,4452,17738,0.205731,0.559022,0.392238,0.932055,0.285795,0.040134,120.129,0.031587,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [51]:
X['label'].value_counts()

0    2835
2    1157
3    1149
1     594
4     156
5     109
Name: label, dtype: int64

In [52]:
recommendations = predict((X, mini), Y)

In [53]:
output = recommend(recommendations, metadata, Y)

In [54]:
genre_recommend_mini, artist_name_recommend_mini, mixed_mini = output[0], output[1], output[2]

In [55]:
genre_recommend_mini.shape

(3892, 4)

In [56]:
artist_name_recommend_mini.shape

(52, 4)

In [57]:
# Genre wise recommendations
genre_recommend_mini.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [58]:
# Artist wise recommendations
artist_name_recommend_mini.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34660,Zehu,51%,AvantGarde|International|Blues|Jazz|,Hadri Ha'Kat
34661,Zehu,51%,AvantGarde|International|Blues|Jazz|,Blender Tzivoni
34662,Zehu,51%,AvantGarde|International|Blues|Jazz|,Naniah
34663,Zehu,51%,AvantGarde|International|Blues|Jazz|,Yoter Miday
34664,Zehu,51%,AvantGarde|International|Blues|Jazz|,"Yamim, Lielot"


In [59]:
# Mixed Recommendations
mixed_mini.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17738,Inconceivable,Magical Unicellular Music,Rock,Inconceivable
20360,This is the Sound!,Burnkit2600,AvantGarde|International|Blues|Jazz|,Call of the Final Five
15742,Terabyte,Carson Day,Electronic,Gentle
19428,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Shelia
4696,"Live at WFMU on Dan Bodah's Show Oct 19, 2008",Dan Friel,Electronic,Ghost Town Pt1


### Model Selection - Birch

In [60]:
from sklearn.cluster import Birch

In [61]:
birch = Birch(n_clusters = 6)

In [62]:
X.drop('label', axis=1, inplace=True)

In [63]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [64]:
for i in [part_1, part_2, part_3]:
    t = fit(i, birch)
    mini = t[1]
    i = t[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = algo.labels_


In [65]:
X = pd.concat([part_1, part_2, part_3])

In [66]:
X.columns

Index(['Unnamed: 0', 'track_id', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'liveness', 'speechiness', 'tempo', 'valence',
       ...
       'Salsa', 'NuJazz', 'HipHop Beats', 'Modern Jazz', 'Turkish', 'Tango',
       'Fado', 'Christmas', 'Instrumental', 'label'],
      dtype='object', length=931)

In [67]:
X.head(3)

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
173,173,539,0.993984,0.274216,0.080029,0.928708,0.117161,0.060822,204.539,0.32658,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3609,3609,14365,0.089991,0.741833,0.461279,0.891303,0.117768,0.064761,163.877,0.565653,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4452,4452,17738,0.205731,0.559022,0.392238,0.932055,0.285795,0.040134,120.129,0.031587,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3


In [68]:
X['label'].value_counts()

1    2830
3    1139
4     892
0     867
2     196
5      76
Name: label, dtype: int64

In [69]:
recommendations = predict((X, birch), Y)

In [70]:
output = recommend(recommendations, metadata, Y)

In [71]:
genre_recommend_birch, artist_name_recommend_birch, mixed_birch = output[0], output[1], output[2]

In [72]:
genre_recommend_birch.shape

(3892, 4)

In [73]:
artist_name_recommend_birch.shape

(52, 4)

In [74]:
# Genre wise recommendations
genre_recommend_birch.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [75]:
# Artist wise recommendations
artist_name_recommend_birch.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34660,Zehu,51%,AvantGarde|International|Blues|Jazz|,Hadri Ha'Kat
34661,Zehu,51%,AvantGarde|International|Blues|Jazz|,Blender Tzivoni
34662,Zehu,51%,AvantGarde|International|Blues|Jazz|,Naniah
34663,Zehu,51%,AvantGarde|International|Blues|Jazz|,Yoter Miday
34664,Zehu,51%,AvantGarde|International|Blues|Jazz|,"Yamim, Lielot"


In [76]:
# Mixed Recommendations
mixed_birch.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17738,Inconceivable,Magical Unicellular Music,Rock,Inconceivable
20360,This is the Sound!,Burnkit2600,AvantGarde|International|Blues|Jazz|,Call of the Final Five
15742,Terabyte,Carson Day,Electronic,Gentle
19428,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Shelia
4696,"Live at WFMU on Dan Bodah's Show Oct 19, 2008",Dan Friel,Electronic,Ghost Town Pt1
