In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.multioutput import MultiOutputClassifier
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Turkish Music Dataset

In [None]:
# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/Thesis-Project/Acoustic Features.csv")

# Separate features and target variable
X = df.iloc[:, 1:].values  # Features (all columns except "Class")
y = df.iloc[:, 0].values   # Target labels (emotion classes)

# Split the dataset into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVM classifier with an RBF kernel
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.75
Classification Report:
               precision    recall  f1-score   support

       angry       0.88      0.75      0.81        20
       happy       0.83      0.95      0.88        20
       relax       0.67      0.80      0.73        20
         sad       0.62      0.50      0.56        20

    accuracy                           0.75        80
   macro avg       0.75      0.75      0.74        80
weighted avg       0.75      0.75      0.74        80



# Multi-label Dataset of music by emotion

In [None]:
# Load the dataset
df2 = pd.read_csv("/content/drive/MyDrive/Thesis-Project/arff/emotions.csv")

In [None]:
df2.count()

Unnamed: 0,0
Mean_Acc1298_Mean_Mem40_Centroid,593
Mean_Acc1298_Mean_Mem40_Rolloff,593
Mean_Acc1298_Mean_Mem40_Flux,593
Mean_Acc1298_Mean_Mem40_MFCC_0,593
Mean_Acc1298_Mean_Mem40_MFCC_1,593
...,...
happy-pleased,593
relaxing-calm,593
quiet-still,593
sad-lonely,593


In [None]:
df2.iloc[0]

Unnamed: 0,0
Mean_Acc1298_Mean_Mem40_Centroid,0.034741
Mean_Acc1298_Mean_Mem40_Rolloff,0.089665
Mean_Acc1298_Mean_Mem40_Flux,0.091225
Mean_Acc1298_Mean_Mem40_MFCC_0,-73.302422
Mean_Acc1298_Mean_Mem40_MFCC_1,6.215179
...,...
happy-pleased,1.000000
relaxing-calm,1.000000
quiet-still,0.000000
sad-lonely,0.000000


In [None]:
df2.iloc[:, 0:72].count()

Unnamed: 0,0
Mean_Acc1298_Mean_Mem40_Centroid,593
Mean_Acc1298_Mean_Mem40_Rolloff,593
Mean_Acc1298_Mean_Mem40_Flux,593
Mean_Acc1298_Mean_Mem40_MFCC_0,593
Mean_Acc1298_Mean_Mem40_MFCC_1,593
...,...
BH_HighPeakBPM,593
BH_HighLowRatio,593
BHSUM1,593
BHSUM2,593


In [None]:
df2.iloc[:, 72:78].count()

Unnamed: 0,0
amazed-suprised,593
happy-pleased,593
relaxing-calm,593
quiet-still,593
sad-lonely,593
angry-aggresive,593


In [None]:
# Separate features and target variable
X = df2.iloc[:, 0:72].values  # Features (all columns except "Class")
y = df2.iloc[:, 72:78].values   # Target labels (emotion classes)

In [None]:
class_counts = pd.Series(y.flatten()).value_counts()
print(class_counts)

0    2450
1    1108
Name: count, dtype: int64


In [None]:
# Split the dataset into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVM classifier with an RBF kernel
svm_model = MultiOutputClassifier(SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42))
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.3025210084033613
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.50      0.59        36
           1       0.70      0.21      0.33        33
           2       0.71      0.87      0.78        53
           3       0.74      0.87      0.80        23
           4       0.93      0.42      0.58        31
           5       0.83      0.59      0.69        41

   micro avg       0.75      0.59      0.66       217
   macro avg       0.77      0.58      0.63       217
weighted avg       0.77      0.59      0.63       217
 samples avg       0.67      0.60      0.61       217



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Spotify Dataset

In [None]:
# Load the dataset
df3 = pd.read_csv("/content/drive/MyDrive/Thesis-Project/278k_song_labelled.csv")

In [None]:
df3.head()

Unnamed: 0.1,Unnamed: 0,duration (ms),danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,spec_rate,labels
0,0,195000.0,0.611,0.614,-8.815,0.0672,0.0169,0.000794,0.753,0.52,128.05,3.446154e-07,2.0
1,1,194641.0,0.638,0.781,-6.848,0.0285,0.0118,0.00953,0.349,0.25,122.985,1.464234e-07,1.0
2,2,217573.0,0.56,0.81,-8.029,0.0872,0.0071,8e-06,0.241,0.247,170.044,4.00785e-07,1.0
3,3,443478.0,0.525,0.699,-4.571,0.0353,0.0178,8.8e-05,0.0888,0.199,92.011,7.959809e-08,0.0
4,4,225862.0,0.367,0.771,-5.863,0.106,0.365,1e-06,0.0965,0.163,115.917,4.693131e-07,1.0


In [None]:
df3 = df3.drop(df3.columns[0], axis=1)

In [None]:
df3.head()

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,spec_rate,labels
0,0.611,0.614,-8.815,0.0672,0.0169,0.000794,0.753,0.52,128.05,3.446154e-07,2.0
1,0.638,0.781,-6.848,0.0285,0.0118,0.00953,0.349,0.25,122.985,1.464234e-07,1.0
2,0.56,0.81,-8.029,0.0872,0.0071,8e-06,0.241,0.247,170.044,4.00785e-07,1.0
3,0.525,0.699,-4.571,0.0353,0.0178,8.8e-05,0.0888,0.199,92.011,7.959809e-08,0.0
4,0.367,0.771,-5.863,0.106,0.365,1e-06,0.0965,0.163,115.917,4.693131e-07,1.0


In [None]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181785 entries, 0 to 181784
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   danceability      181785 non-null  float64
 1   energy            181785 non-null  float64
 2   loudness          181785 non-null  float64
 3   speechiness       181785 non-null  float64
 4   acousticness      181785 non-null  float64
 5   instrumentalness  181785 non-null  float64
 6   liveness          181785 non-null  float64
 7   valence           181785 non-null  float64
 8   tempo             181784 non-null  float64
 9   spec_rate         181784 non-null  float64
 10  labels            181784 non-null  float64
dtypes: float64(11)
memory usage: 15.3 MB


In [None]:
# Separate features and target variable
X = df.iloc[:, 0:11].values  # Features (all columns except "Class")
y = df.iloc[:, 12].values   # Target labels (emotion classes)

In [None]:
X.min()

nan

In [None]:
X.max()

nan

In [None]:
X

array([[0.00000000e+00, 1.95000000e+05, 6.11000000e-01, ...,
        5.20000000e-01, 1.28050000e+02, 3.44615385e-07],
       [1.00000000e+00, 1.94641000e+05, 6.38000000e-01, ...,
        2.50000000e-01, 1.22985000e+02, 1.46423415e-07],
       [2.00000000e+00, 2.17573000e+05, 5.60000000e-01, ...,
        2.47000000e-01, 1.70044000e+02, 4.00785024e-07],
       ...,
       [8.57630000e+04, 3.23067000e+05, 7.53000000e-01, ...,
        6.64000000e-01, 9.21070000e+01, 6.56210631e-07],
       [8.57640000e+04, 1.69000000e+05, 8.63000000e-01, ...,
        5.69000000e-01, 9.54280000e+01, 1.55029586e-06],
       [8.57650000e+04, 2.53600000e+05, 5.96000000e-01, ...,
                   nan,            nan,            nan]])

In [None]:
y

array([ 2.,  1.,  1., ...,  1.,  1., nan])

In [None]:
# Find the indices of rows with NaN values in X or y
nan_indices = np.isnan(X).any(axis=1) | np.isnan(y)

# Remove rows with NaN values from X and y
X_clean = X[~nan_indices]
y_clean = y[~nan_indices]

In [None]:
y_clean.min()

0.0

In [None]:
y_clean.max()

3.0

In [None]:
# Assuming y_clean is a NumPy array or a pandas Series
class_counts = pd.Series(y_clean).value_counts()
print(class_counts)

1.0    28815
0.0    25176
2.0    19501
3.0    12273
Name: count, dtype: int64


In [None]:
# Split the dataset into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42, stratify=y_clean)

# Standardize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVM classifier with an RBF kernel
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.8694105987290853
Classification Report:
               precision    recall  f1-score   support

         0.0       0.90      0.88      0.89      5035
         1.0       0.82      0.86      0.84      5763
         2.0       0.87      0.81      0.84      3900
         3.0       0.94      0.95      0.94      2455

    accuracy                           0.87     17153
   macro avg       0.88      0.88      0.88     17153
weighted avg       0.87      0.87      0.87     17153

