In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors  
import tensorflow as tf  
from sklearn.preprocessing import LabelEncoder



## 🎵 **Genre-Emotion to Features Mapping**

| Genre             | Emotion(s)        | Energy   | Danceability | Positiveness | Speechiness | Liveness | Acousticness | Instrumentalness |
|-------------------|-------------------|----------|--------------|--------------|-------------|----------|--------------|------------------|
| pop               | joy, love, sadness | High     | High         | High         | Medium      | Low      | Medium       | Low              |
| rock              | joy, anger, love   | High     | Medium       | Medium       | Low         | Medium   | Medium       | Low              |
| hip hop           | anger, joy, love   | High     | Medium       | Medium       | High        | Low      | Low          | Low              |
| indie             | sadness, love, joy | Medium   | High         | Medium       | Low         | Medium   | High         | Low              |
| dance             | joy, love, excitement | High   | High         | High         | Low         | Low      | Low          | Low              |
| rap               | anger, confidence, pride | High | Medium      | Low          | High        | Low      | Low          | Low              |
| r&b               | love, sadness, desire | Medium | Medium      | Medium       | High        | Low      | Low          | Low              |
| electronic        | joy, excitement, energy | High  | High         | High         | Medium      | Low      | Low          | Low              |
| soul              | love, sadness, nostalgia | Low   | Low          | Medium       | Low         | High     | High         | Low              |
| alternative       | sadness, joy, melancholy | Medium | Medium     | Medium       | Medium      | Low      | High         | Medium           |
| metal             | anger, rage, despair | High    | Low          | Low          | Low         | Low      | Low          | High             |
| classical         | peace, sadness, awe | Low     | Low          | Low          | Low         | High     | High         | Low              |
| jazz              | calm, nostalgia, contentment | Low  | Low         | Medium       | Low         | High     | High         | Medium           |
| country           | love, heartbreak, reflection | Medium | Medium   | Medium       | Low         | Low      | High         | Low              |
| folk              | calm, thoughtfulness, nostalgia | Low | Low       | Medium       | Low         | High     | High         | Low              |


In [43]:
import pandas as pd

data = [
    ['pop', 'joy, love, sadness', 'High', 'High', 'High', 'Medium', 'Low', 'Medium', 'Low'],
    ['rock', 'joy, anger, love', 'High', 'Medium', 'Medium', 'Low', 'Medium', 'Medium', 'Low'],
    ['hip hop', 'anger, joy, love', 'High', 'Medium', 'Medium', 'High', 'Low', 'Low', 'Low'],
    ['indie', 'sadness, love, joy', 'Medium', 'High', 'Medium', 'Low', 'Medium', 'High', 'Low'],
    ['dance', 'joy, love, excitement', 'High', 'High', 'High', 'Low', 'Low', 'Low', 'Low'],
    ['rap', 'anger, confidence, pride', 'High', 'Medium', 'Low', 'High', 'Low', 'Low', 'Low'],
    ['r&b', 'love, sadness, desire', 'Medium', 'Medium', 'Medium', 'High', 'Low', 'Low', 'Low'],
    ['electronic', 'joy, excitement, energy', 'High', 'High', 'High', 'Medium', 'Low', 'Low', 'Low'],
    ['soul', 'love, sadness, nostalgia', 'Low', 'Low', 'Medium', 'Low', 'High', 'High', 'Low'],
    ['alternative', 'sadness, joy, melancholy', 'Medium', 'Medium', 'Medium', 'Medium', 'Low', 'High', 'Medium'],
    ['metal', 'anger, rage, despair', 'High', 'Low', 'Low', 'Low', 'Low', 'Low', 'High'],
    ['classical', 'peace, sadness, awe', 'Low', 'Low', 'Low', 'Low', 'High', 'High', 'Low'],
    ['jazz', 'calm, nostalgia, contentment', 'Low', 'Low', 'Medium', 'Low', 'High', 'High', 'Medium'],
    ['country', 'love, heartbreak, reflection', 'Medium', 'Medium', 'Medium', 'Low', 'Low', 'High', 'Low'],
    ['folk', 'calm, thoughtfulness, nostalgia', 'Low', 'Low', 'Medium', 'Low', 'High', 'High', 'Low']
]

columns = [
    'Genre', 'Emotion(s)', 'Energy', 'Danceability', 'Positiveness',
    'Speechiness', 'Liveness', 'Acousticness', 'Instrumentalness'
]

df = pd.DataFrame(data, columns=columns)

df.to_csv('../data/category.csv', index=False)


In [47]:
data = pd.read_csv('../data/light_spotify_dataset.csv')
print(data.nunique())
np.shape(data)

artist               30607
song                159729
emotion                 10
variance             38455
Genre                 2562
Release Date            93
Key                     24
Tempo                  167
Loudness              2953
Explicit                 2
Popularity             100
Energy                 101
Danceability            94
Positiveness           100
Speechiness             96
Liveness               100
Acousticness           101
Instrumentalness       101
dtype: int64


(236988, 18)

In [48]:
data

Unnamed: 0,artist,song,emotion,variance,Genre,Release Date,Key,Tempo,Loudness,Explicit,Popularity,Energy,Danceability,Positiveness,Speechiness,Liveness,Acousticness,Instrumentalness
0,ABBA,She's My Kind Of Girl,joy,0.447619,pop,2014,F Maj,128,-6.00,No,31,78,56,60,3,31,7,0
1,ABBA,"Andante, Andante",love,0.202222,pop,1980,A# Maj,102,-10.72,No,59,36,52,38,2,7,68,0
2,ABBA,As Good As New,sadness,0.300881,pop,1979,E Maj,139,-5.70,No,50,78,85,97,3,8,20,2
3,ABBA,Bang,joy,0.355000,pop,1975,F Maj,132,-3.00,No,52,76,50,89,3,32,3,0
4,ABBA,Bang-A-Boomerang,joy,0.355000,pop,1975,F Maj,132,-3.00,No,52,76,50,89,3,32,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236983,(Eri Sasaki),Gate Of Steiner,sadness,0.833514,"Unknown,Unknown,Unknown",2018,D min,148,-6.44,No,41,61,41,37,4,14,20,0
236984,(Elena Tsagrinou),El Diablo,love,0.833514,"Unknown,Unknown,Unknown",2021,A min,114,-7.78,No,51,66,66,62,13,84,0,0
236985,(Dima Bilan),Believe,joy,0.833514,"Unknown,Unknown,Unknown",2009,C min,134,-6.72,No,36,73,55,24,5,22,5,0
236986,(Ani Lorak),Shady Lady,joy,0.833514,"Unknown,Unknown,Unknown",2009,F# min,128,-13.00,No,37,70,71,77,5,7,5,0


In [49]:
#missing data
data = data.dropna(subset=['song'])
missing_data = data.isnull().sum()
missing_data_table = pd.DataFrame({
    'Column Name': missing_data.index,
    'Missing Values': missing_data.values,
    'Percentage Missing': (missing_data / len(data)) * 100
})
missing_data_table = missing_data_table[missing_data_table['Missing Values'] > 0]
missing_data_table.sort_values(by='Missing Values', ascending=False, inplace=True)
print(missing_data_table)

Empty DataFrame
Columns: [Column Name, Missing Values, Percentage Missing]
Index: []


In [50]:
data = data.dropna(subset=['song'])
Song_Name = data['song']
data = data.drop(columns=['song'])
label_encoder = LabelEncoder()
data['artist'] = label_encoder.fit_transform(data['artist'])
data['emotion'] = label_encoder.fit_transform(data['emotion'])
data['Genre'] = label_encoder.fit_transform(data['Genre'])
data['Key'] = label_encoder.fit_transform(data['Key'])
data['Explicit'] = data['Explicit'].map({'Yes': 1, 'No': 0})


data

Unnamed: 0,artist,emotion,variance,Genre,Release Date,Key,Tempo,Loudness,Explicit,Popularity,Energy,Danceability,Positiveness,Speechiness,Liveness,Acousticness,Instrumentalness
0,523,4,0.447619,2033,2014,16,128,-6.00,0,31,78,56,60,3,31,7,0
1,523,5,0.202222,2033,1980,2,102,-10.72,0,59,36,52,38,2,7,68,0
2,523,7,0.300881,2033,1979,14,139,-5.70,0,50,78,85,97,3,8,20,2
3,523,4,0.355000,2033,1975,16,132,-3.00,0,52,76,50,89,3,32,3,0
4,523,4,0.355000,2033,1975,16,132,-3.00,0,52,76,50,89,3,32,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236983,4,7,0.833514,629,2018,11,148,-6.44,0,41,61,41,37,4,14,20,0
236984,3,5,0.833514,629,2021,1,114,-7.78,0,51,66,66,62,13,84,0,0
236985,2,4,0.833514,629,2009,7,134,-6.72,0,36,73,55,24,5,22,5,0
236986,1,4,0.833514,629,2009,19,128,-13.00,0,37,70,71,77,5,7,5,0


In [51]:
data.to_csv('../data/preprocess.csv', index=False)
