# **Analysis The Music Data**

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

In [3]:
music = pd.read_csv("Music Info.csv")

In [4]:
pd.set_option('display.max_colwidth', None)
music.head(2)

Unnamed: 0,track_id,name,artist,spotify_preview_url,spotify_id,tags,genre,year,duration_ms,danceability,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,TRIOREW128F424EAF0,Mr. Brightside,The Killers,https://p.scdn.co/mp3-preview/4d26180e6961fd46866cd9106936ea55dfcbaa75?cid=774b29d4f13844c495f206cafdad9c86,09ZQ5TmUG8TSL56n0knqrj,"rock, alternative, indie, alternative_rock, indie_rock, 00s",,2004,222200,0.355,...,1,-4.36,1,0.0746,0.00119,0.0,0.0971,0.24,148.114,4
1,TRRIVDJ128F429B0E8,Wonderwall,Oasis,https://p.scdn.co/mp3-preview/d012e536916c927bd6c8ced0dae75ee3b7715983?cid=774b29d4f13844c495f206cafdad9c86,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_rock, british, 90s, love, britpop",,2006,258613,0.409,...,2,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4


In [5]:
pd.set_option('display.max_colwidth', None)
music.sample(2)[['tags','genre']]

Unnamed: 0,tags,genre
26515,"progressive_metal, doom_metal, gothic_metal",
45685,"electronic, instrumental, chillout, soundtrack, new_age",


In [6]:
music.shape

(50683, 21)

In [7]:
(music.isnull().sum()/music.shape[0])*100

track_id                0.000000
name                    0.000000
artist                  0.000000
spotify_preview_url     0.000000
spotify_id              0.000000
tags                    2.223625
genre                  55.906320
year                    0.000000
duration_ms             0.000000
danceability            0.000000
energy                  0.000000
key                     0.000000
loudness                0.000000
mode                    0.000000
speechiness             0.000000
acousticness            0.000000
instrumentalness        0.000000
liveness                0.000000
valence                 0.000000
tempo                   0.000000
time_signature          0.000000
dtype: float64

In [8]:
# fill missing value by forward fill
music['tags'].fillna(method='ffill', inplace=True)
music['genre'].fillna(method='bfill', inplace=True)

music.isnull().sum()

track_id               0
name                   0
artist                 0
spotify_preview_url    0
spotify_id             0
tags                   0
genre                  0
year                   0
duration_ms            0
danceability           0
energy                 0
key                    0
loudness               0
mode                   0
speechiness            0
acousticness           0
instrumentalness       0
liveness               0
valence                0
tempo                  0
time_signature         0
dtype: int64

In [9]:
music.sample(2)

Unnamed: 0,track_id,name,artist,spotify_preview_url,spotify_id,tags,genre,year,duration_ms,danceability,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
18284,TRDPRQN128F933064C,The Looking Glass,Dream Theater,https://p.scdn.co/mp3-preview/0e58b7d56fdb49470e02fc51e147417d0a5f1824?cid=774b29d4f13844c495f206cafdad9c86,5l79BZlSp7p8rBiUAvtTLI,"progressive_rock, progressive_metal",Rock,2013,293066,0.518,...,9,-3.972,1,0.0661,3.7e-05,2.6e-05,0.219,0.598,137.98,4
7162,TRPWQKH128F9306D12,Certainly,Erykah Badu,https://p.scdn.co/mp3-preview/f0199dbe6b33d01d227905c2139b3904860b34dc?cid=774b29d4f13844c495f206cafdad9c86,0ZpAK0kBE6YY9LOJsDWmyJ,"female_vocalists, jazz, soul, rnb",Pop,1997,283226,0.717,...,7,-9.891,1,0.193,0.0779,0.0109,0.244,0.179,71.074,4


In [10]:
music.shape

(50683, 21)

In [11]:
music['name'].nunique()

50683

# **Created Reccomdation Model (with help of KNN)**

In [12]:
music.columns
# ['name', 'artist','tags', 'genre', 'year', 'duration_ms', 'danceability', 'energy', 'key','loudness', 'mode', 'speechiness', 'acousticness', 
#                'instrumentalness','liveness', 'valence', 'tempo', 'time_signature']

#-----------------------------------#
# [['name', 'artist','tags', 'genre', 'energy','loudness', 'acousticness', 'liveness', 'valence', 'tempo', 'time_signature']]

Index(['track_id', 'name', 'artist', 'spotify_preview_url', 'spotify_id',
       'tags', 'genre', 'year', 'duration_ms', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'time_signature'],
      dtype='object')

In [13]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

songs = music[['name', 'artist','tags', 'genre', 'energy','loudness','valence', 'tempo']]
data = music[['name', 'artist','tags', 'genre', 'energy','loudness','valence', 'tempo']]

# encoding with label
label = LabelEncoder()
label_data = data.apply(label.fit_transform)

#standarding the value
stand = StandardScaler()
stand_data  = stand.fit_transform(label_data)


In [14]:
data = pd.DataFrame(stand_data, columns = data.columns)
data

Unnamed: 0,name,artist,tags,genre,energy,loudness,valence,tempo
0,0.176339,1.263570,0.847271,0.720337,0.877182,0.992761,-0.467338,1.007711
1,1.645624,0.463311,0.960002,0.720337,0.781099,0.989150,0.763913,1.570995
2,-1.154473,0.435361,0.701551,0.720337,0.537197,0.598657,0.440372,-0.080445
3,1.038349,-0.601235,0.858915,0.720337,-0.061474,-0.245654,0.281598,-0.705448
4,-1.112712,0.659779,0.859797,0.720337,-0.926220,-0.536997,-0.874760,-1.201640
...,...,...,...,...,...,...,...,...
50678,0.667012,1.708706,-1.443682,-1.483052,0.932615,1.347983,0.074891,1.395564
50679,-1.163426,-1.656738,1.132008,-1.483052,0.045695,0.313702,0.161768,1.818280
50680,1.229109,1.685689,0.288029,-1.483052,1.087826,1.340206,-0.341517,1.230471
50681,-0.145650,1.708706,0.288029,-1.483052,0.818054,1.116630,0.665053,-0.499208


In [15]:
corr_data = data.corr()
trg = np.triu(corr_data)
trg = pd.DataFrame(trg)
trg

Unnamed: 0,0,1,2,3,4,5,6,7
0,1.0,0.004046,0.004364,0.005833,-0.00333,-0.001658,0.001128,0.001668
1,0.0,1.0,0.060985,0.052753,0.014895,0.008778,0.04943,0.0178
2,0.0,0.0,1.0,0.24354,0.101776,0.132694,0.178731,0.022773
3,0.0,0.0,0.0,1.0,0.03841,0.082408,0.104962,0.011902
4,0.0,0.0,0.0,0.0,1.0,0.785636,0.229157,0.226793
5,0.0,0.0,0.0,0.0,0.0,1.0,0.19342,0.17266
6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.087268
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [16]:
# train model 
from sklearn.neighbors import NearestNeighbors

feature = data[['artist','tags', 'genre', 'energy','loudness','valence', 'tempo']]

knn = NearestNeighbors(n_neighbors=7, metric='euclidean')
knn.fit(feature)


# recommend songs for user
def your_songs(song_no):
    
	#find the song data
	distances , indices = knn.kneighbors([feature.loc[song_no]])

	#recommding similarted to those songs
	recommend_songs = songs.iloc[indices[0][1:]]

	#song + recommeding songs
	search_song = songs.loc[song_no]['name']
	song_name = recommend_songs['name'].to_list()

	song_name.insert(0,search_song)

	return song_name

your_songs(23)

['Use Somebody',
 'Better Than Me',
 'Nothing But Time',
 'Running Out Of Time',
 'This is Home',
 'Jet Black',
 'I Will Protect You']

In [17]:
# save the model
# import pickle 
# pickle.dump(knn, open('music_model.pkl', 'wb'))

# **User**

In [25]:
# user  = music[['artist','tags', 'genre', 'energy','loudness','valence', 'tempo']]
user  = music[['name','artist','genre', 'spotify_preview_url']]
user.isnull().sum()

name                   0
artist                 0
genre                  0
spotify_preview_url    0
dtype: int64

In [26]:

songs_artist =  music[['name','artist','genre', 'spotify_preview_url']]
songs_artist.to_csv('songs_artist.csv', index=False)

In [24]:
# feature.to_csv('feature_data.csv', index=False)