# Kimiya Ghanai Machine Learning

## Music Recommendation System using Spotfy dataset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Import Spotify Dataset

In [2]:
df= pd.read_csv('Spotify Dataset.csv')
df.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [3]:
df.isnull().sum()

valence             0
year                0
acousticness        0
artists             0
danceability        0
duration_ms         0
energy              0
explicit            0
id                  0
instrumentalness    0
key                 0
liveness            0
loudness            0
mode                0
name                0
popularity          0
release_date        0
speechiness         0
tempo               0
dtype: int64

### Normalization Data

In [16]:
from sklearn.preprocessing import MinMaxScaler
datatypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
normaldata = df.select_dtypes(include=datatypes)
for col in normaldata.columns:
    MinMaxScaler(col)

### Prediction with KMeans

In [6]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=10)
features = km.fit_predict(normaldata)
df['features'] = features
MinMaxScaler(df['features'])

0,1,2
,feature_range,"0 1 1..., dtype: int32"
,copy,True
,clip,False


### Recommendation Class

In [7]:
from tqdm import tqdm
class Spotify_Recommendation():
    def __init__(self, dataset):
        self.dataset = dataset
    def recommend(self, songs, amount=1):
        distance = []
        song = self.dataset[self.dataset.name.str.lower() == songs.lower()].head(1)
        rec = self.dataset[self.dataset.name.str.lower() != songs.lower()]
        numeric_cols = rec.select_dtypes(include=[np.number]).columns
        song_values = song[numeric_cols].values[0]
        for row in tqdm(rec[numeric_cols].values):
            d = np.sum(np.abs(song_values - row))
            distance.append(d)
        rec = rec.copy()
        rec['distance'] = distance
        rec = rec.sort_values('distance')
        columns = ['artists', 'name']
        return rec[columns].head(amount)
recommendations = Spotify_Recommendation(df)

### User Input

In [14]:
usersong = input("Song name:")
recommendations.recommend(usersong, 10)

Song name: cake by the ocean


100%|███████████████████████████████████████████████████████████████████████| 170652/170652 [00:02<00:00, 64599.92it/s]


Unnamed: 0,artists,name
56510,['Post Malone'],"Yours Truly, Austin Post"
38022,['Queen Naija'],Medicine
19456,"['Jeremy Zucker', 'Chelsea Cutler']",you were good to me
18524,['Jason Aldean'],Burnin' It Down
56372,['Elliot Root'],"June, After Dark"
123722,['BoA'],MASAYUME CHASING
35220,['George Strait'],Cowboys Like Us
155106,['NCT 127'],Regular - English Version
70523,['Shania Twain'],When
91887,"['Johnny Orlando', 'kenzie']",What If (I Told You I Like You)
