In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import os
import scipy
import warnings 
warnings.filterwarnings("ignore")
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from tqdm import tqdm

In [43]:
data = pd.read_csv("Dataset/preprocessed_data.csv")

In [44]:
data.columns

Index(['artist', 'track_name', 'acousticness', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'popularity'],
      dtype='object')

In [49]:
data.drop(columns='artist',inplace=True)

In [5]:
df = data.pivot_table(index='track_name')

In [6]:
numerical = data.select_dtypes(exclude='object')

### Nearest Neighbors algorithm

In [7]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')

In [8]:
model_knn.fit(df)

In [40]:
query_index = np.random.choice(df.shape[0])
print(query_index)
distances, indices = model_knn.kneighbors(df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 20)

146316


In [41]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for : {0}\n'.format(df.index[query_index]))
    else:
        print('{0}:{1}'.format(i, df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for : Stars Fell On Alabama - Remastered

1:Dos Cruces
2:Try Again
3:But That's Alright
4:Everything I Love
5:You Ain't Goin' Nowhere - Take 1
6:Like Sonny - Alternate Take 5
7:Shine On (Shine All Your Sweet Love on Me)
8:I'm a Survivor
9:Dark as a Dungeon
10:Swing Low, Sweet Chariot
11:I'd Rather Love You
12:Someone To Love Me (The Prisoner's Song)
13:Take Good Care Of Her
14:Turn, Turn, Turn (To Everything There Is a Season)
15:On the Horizon - Mono
16:Goodnight My Love
17:Fragilidad
18:One Place - Acoustic
19:春宵吟


### K Means Clustering

Using K Means cluster to create a dependent variable

In [59]:
cluster_model = KMeans(n_clusters=20, max_iter = 1000, random_state= 10)
cluster = cluster_model.fit_predict(numerical)
data['cluster'] = cluster

In [60]:
class SpotifyRecommendation():
    def __init__(self, dataset):
        self.dataset = dataset
    def recommend(self, songs, amount=1):
        distance = []
        song = self.dataset[(self.dataset.track_name.str.lower() == songs.lower())].head(1).values[0]
        rec = self.dataset[self.dataset.track_name.str.lower() != songs.lower()]
        for songs in tqdm(rec.values):
            d = 0
            for col in np.arange(len(rec.columns)):
                if not col in [0]:
                    d = d + np.absolute(float(song[col]) - float(songs[col]))
            distance.append(d)
        rec['distance'] = distance
        rec = rec.sort_values('distance')
        rec.reset_index(drop=True,inplace= True)
        columns = ['track_name']
        return rec[columns][:amount]

In [63]:
recommendations = SpotifyRecommendation(data)
recommendations.recommend("I See the Want to in Your Eyes", 20)

100%|██████████| 289325/289325 [00:07<00:00, 38137.24it/s]


Unnamed: 0,track_name
0,Let It Be Me
1,Stand By Your Man
2,Silver Bells
3,Zazueira (with Stanley Turrentine)
4,Stars Fell On Alabama - Remastered
5,春風太無情
6,Broken Lady
7,Family Bible
8,If I May
9,Carabela


In [66]:
import pandas as pd
import numpy as np
import os 
import warnings
warnings.filterwarnings("ignore")
from sklearn.cluster import KMeans
from tqdm import tqdm

data = pd.read_csv("Dataset/preprocessed_data.csv")

model_data = data.drop(columns= 'artist')

numerical = model_data.select_dtypes(exclude='object')

cluster_model = KMeans(n_clusters= 25, max_iter= 1250, random_state= 35)
cluster = cluster_model.fit_predict(numerical)
model_data['cluster'] = cluster


class SpotifyRecommendation():
    def __init__(self, dataset):
        self.dataset = dataset
    def recommend(self, songs, amount = 5):
        distance = []
        song_data = self.dataset[(self.dataset.track_name.str.lower() == songs.lower())].head(1).values[0]
        rec_data = self.dataset[(self.dataset.track_name.str.lower() != songs.lower())]

        for songs in tqdm(rec_data.values):
            dist = 0
            for col in np.arange(len(rec_data.columns)):
                if not col in [0]:
                    dist = dist + np.abs(float(song_data[col]) - float(songs[col]))
            distance.append(dist)
        
        rec_data['distance'] = distance

        rec_sorted = rec_data.sort_values('distance')
        rec_sorted.reset_index(drop = True, inplace = True)

        columns = ['track_name']
        return rec_sorted[columns][:amount]


In [67]:
recommendation = SpotifyRecommendation(model_data)

recommendation.recommend("I See the Want to in Your Eyes", 10)

100%|██████████| 289325/289325 [00:05<00:00, 51149.54it/s]


Unnamed: 0,track_name
0,Let It Be Me
1,Stand By Your Man
2,Silver Bells
3,Zazueira (with Stanley Turrentine)
4,Stars Fell On Alabama - Remastered
5,春風太無情
6,Broken Lady
7,Family Bible
8,If I May
9,Carabela
