# Music Genre Classification - ECS 171 Final Project

In [1]:
import kagglehub
import os
import pandas as pd
import numpy as np

In [7]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import (accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix)
from sklearn.model_selection import KFold


from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import label_binarize
from tensorflow.keras.regularizers import l2


In [9]:
path = kagglehub.dataset_download("vicsuperman/prediction-of-music-genre")
print("Files in dataset folder:", os.listdir(path))
df = pd.read_csv(os.path.join(path, "music_genre.csv"))
df.head()

Files in dataset folder: ['music_genre.csv']


Unnamed: 0,instance_id,artist_name,track_name,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,obtained_date,valence,music_genre
0,32894.0,Röyksopp,Röyksopp's Night Out,27.0,0.00468,0.652,-1.0,0.941,0.792,A#,0.115,-5.201,Minor,0.0748,100.889,4-Apr,0.759,Electronic
1,46652.0,Thievery Corporation,The Shining Path,31.0,0.0127,0.622,218293.0,0.89,0.95,D,0.124,-7.043,Minor,0.03,115.002,4-Apr,0.531,Electronic
2,30097.0,Dillon Francis,Hurricane,28.0,0.00306,0.62,215613.0,0.755,0.0118,G#,0.534,-4.617,Major,0.0345,127.994,4-Apr,0.333,Electronic
3,62177.0,Dubloadz,Nitro,34.0,0.0254,0.774,166875.0,0.7,0.00253,C#,0.157,-4.498,Major,0.239,128.014,4-Apr,0.27,Electronic
4,24907.0,What So Not,Divide & Conquer,32.0,0.00465,0.638,222369.0,0.587,0.909,F#,0.157,-6.266,Major,0.0413,145.036,4-Apr,0.323,Electronic


In [10]:
# remove unnecessary columns
df.drop(['instance_id', 'artist_name', 'track_name', 'obtained_date'], axis=1, inplace=True, errors='ignore')

# remove rows with any missing values
df.dropna(inplace=True)

# reset the index after dropping rows
df.reset_index(drop=True, inplace=True)

# handle -1 or other placeholder strings in tempo
df['tempo'] = pd.to_numeric(df['tempo'], errors='coerce')
df.dropna(subset=['tempo'], inplace=True) 
df.reset_index(drop=True, inplace=True)

# one hot encode categorical features
df = pd.get_dummies(df, columns=['key', 'mode'], drop_first=True)

# define features x and target y
X = df.drop('music_genre', axis=1)
y_labels = df['music_genre'] # string labels

X_initial = df.drop('music_genre', axis=1)
y_labels = df['music_genre']

if 'instrumentalness' in X_initial.columns:
  X = X_initial.drop('instrumentalness', axis=1)

# encode target labels y to numerical values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_labels)
num_classes = len(label_encoder.classes_)

# split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# one-hot encode y_train for neural network (categorical_crossentropy expects this)
y_train_nn = to_categorical(y_train, num_classes=num_classes)
y_test_nn = to_categorical(y_test, num_classes=num_classes) # also for NN evaluation

# store feature names after one-hot encoding for NN input_dim
feature_names = X.columns.tolist()
input_dim_nn = X_train_scaled.shape[1]

print(f"number of features: {input_dim_nn}")
print(f"number of classes: {num_classes}")
print(f"x_train_scaled shape: {X_train_scaled.shape}")
print(f"y_train_nn shape: {y_train_nn.shape}")

number of features: 22
number of classes: 10
x_train_scaled shape: (31514, 22)
y_train_nn shape: (31514, 10)


In [11]:
def evaluate_classification_model(model_name, y_true_encoded, y_pred_encoded, y_prob=None):
    print(f"\n{model_name} eval")

    # calc accuracy
    accuracy = accuracy_score(y_true_encoded, y_pred_encoded)
    print(f"accuracy: {accuracy:.4f}")

    # calc f1 score, use weighetd cause its a common mlticlass adaptation, also zero div just incase theres not a pred
    f1 = f1_score(y_true_encoded, y_pred_encoded, average='weighted', zero_division=0)
    print(f"f1 score: {f1:.4f}")

    #calc precision
    precision = precision_score(y_true_encoded, y_pred_encoded, average='weighted', zero_division=0)
    print(f"precision: {precision:.4f}")

    #calculate recall
    recall = recall_score(y_true_encoded, y_pred_encoded, average='weighted', zero_division=0)
    print(f"recall: {recall:.4f}")

    #calc roc auc if probabilities are provided
    if y_prob is not None:
        # ensure y_true is binarized for roc_auc_score in multiclass
        y_true_binarized = label_binarize(y_true_encoded, classes=range(num_classes))
        roc_auc = roc_auc_score(y_true_binarized, y_prob, average='weighted', multi_class='ovr')
        print(f"roc auc score: {roc_auc:.4f}")

    #calculate confusion matrix
    conf_matrix = confusion_matrix(y_true_encoded, y_pred_encoded)
    print("confusion matrix:\n", conf_matrix)

In [12]:
# model 1: sgdclassifier
sgd_model = SGDClassifier(loss='log_loss',penalty='l2', alpha=0.0001, max_iter=1000, eta0=0.01, learning_rate='constant', tol=1e-3)

# pred labels and probabilities
sgd_model.fit(X_train_scaled, y_train)

y_pred_sgd = sgd_model.predict(X_test_scaled)

y_prob_sgd = sgd_model.predict_proba(X_test_scaled)

# evaluate sgdclassifier
evaluate_classification_model("sgdclassifier", y_test, y_pred_sgd, y_prob_sgd)


sgdclassifier eval
accuracy: 0.4624
f1 score: 0.4503
precision: 0.4538
recall: 0.4624
roc auc score: 0.8693
confusion matrix:
 [[ 234   18   44   10  262  216   86  105   55  319]
 [  26  710  123  186   62  180    2   49    0   11]
 [  45  220  400   35  176  138    2  247    2   76]
 [  14   54   14 1149   20   50    0   38    0   11]
 [  45   51  126   17  615  112   27  117    7  229]
 [  62  124   77   28   75  735   63  104   20   52]
 [  85    0    6    1   50  100  514   54  433  113]
 [  34   51  130  162  169  173   52  520   19   46]
 [ 111    0    2    1   64   56  440   31  456  190]
 [ 113    6    8    7  182   30    7   66   37  912]]


In [13]:
#model 2: random forest classififer
#n_jobs=-1 to use all available cores for faster training.
rf_model = RandomForestClassifier( n_estimators=50, random_state=42,n_jobs=-1,
                                  )
rf_model.fit(X_train_scaled, y_train)

y_pred_rf = rf_model.predict(X_test_scaled)
y_prob_rf = rf_model.predict_proba(X_test_scaled)
evaluate_classification_model("RandomForestClassifier", y_test, y_pred_rf, y_prob_rf)


RandomForestClassifier eval
accuracy: 0.5174
f1 score: 0.5166
precision: 0.5190
recall: 0.5174
roc auc score: 0.8996
confusion matrix:
 [[ 440   17   35    5  154   84  113  103   90  308]
 [  35  948  110  121   32   71    1   20    1   10]
 [  70  137  621   23  132   81    2  202    2   71]
 [  26   41   32 1149   15   27    0   55    0    5]
 [ 114   31   58    3  694   66   21  106   14  239]
 [ 107   87   80   15   58  725   31  153   37   47]
 [  59    0    1    0   11   14  498   14  688   71]
 [  65   21  171   99   84  145   40  665   21   45]
 [  69    0    0    0   12    9  704    3  420  134]
 [ 203    7   26    2  132    6   54   30   80  828]]


In [15]:
# model 3: nn with gridsearch 

def NNmodel_struct(learning_rate=0.001, dropout_rate=0.2, reg_lambda_l2=0.01, neurons_layer1=128, neurons_layer2=64):
    model = Sequential()

    # input layer

    # X_train_scaled.shape[1] is the # of features after scaling and onehot encoding
    model.add(Dense(neurons_layer1, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=l2(reg_lambda_l2)))
    model.add(Dropout(dropout_rate))

    # using fixed 2 hidden layers
    model.add(Dense(neurons_layer2, activation='relu', kernel_regularizer=l2(reg_lambda_l2)))
    model.add(Dropout(dropout_rate))

    # output layer for multi-class classification
    model.add(Dense(num_classes, activation='softmax'))

    optimizer = Adam(learning_rate=learning_rate)

    # use cross entropy cuz its the best for this architecutre
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# define par grid for manual search
param_grid_nn_manual = {
    'neurons_layer1': [64, 128],
    'neurons_layer2': [32, 64],
    'dropout_rate': [0.2, 0.3],
    'learning_rate': [0.001, 0.0005],
    'reg_lambda_l2': [0.01, 0.001],
    'batch_size': [32, 64],
    'epochs': [25, 50]
}

# store results from manual grid search
results_nn_manual = []
best_nn_accuracy = 0.0
best_nn_params = {}
best_nn_model_manual = None

# for kfold cross validation in the manual loop
n_folds = 2
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

print("starting gridsearch for nn")

# go thru all combos of parameters
# itertools.product to create all combinations causde the gridsearch lib wasnt working
import itertools
keys, values = zip(*param_grid_nn_manual.items())
param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

for i, params in enumerate(param_combinations):
    print(f"testing combo {i+1}/{len(param_combinations)}: {params}")
    fold_accuracies = []

    # k fold cross-validation loop
    for fold, (train_index, val_index) in enumerate(kf.split(X_train_scaled, y_train_nn)):
        print(f"  fold {fold+1}/{n_folds}")
        X_train_fold, X_val_fold = X_train_scaled[train_index], X_train_scaled[val_index]
        y_train_fold, y_val_fold = y_train_nn[train_index], y_train_nn[val_index] # use one-hot encoded targets

        # create and compile the model with current params
        # extract specific params for model creation and others for fit
        model_params = {k: params[k] for k in ['neurons_layer1', 'neurons_layer2', 'dropout_rate', 'learning_rate', 'reg_lambda_l2']}
        current_model = NNmodel_struct(**model_params)

        # train the model
        current_model.fit(X_train_fold, y_train_fold, epochs=params['epochs'], batch_size=params['batch_size'], verbose=0) 

        # eval on the validation fold
        _, accuracy_val = current_model.evaluate(X_val_fold, y_val_fold, verbose=0)
        fold_accuracies.append(accuracy_val)

    # average acc across folds
    avg_fold_accuracy = np.mean(fold_accuracies)
    print(f"  avg validation accuracy: {avg_fold_accuracy:.4f}")
    results_nn_manual.append({'params': params, 'accuracy': avg_fold_accuracy})

    # update best model if current is better
    if avg_fold_accuracy > best_nn_accuracy:
        best_nn_accuracy = avg_fold_accuracy
        best_nn_params = params

print(f"\ngridsearch complete.")
print(f"best nn validation accuracy: {best_nn_accuracy:.4f}")
print(f"best nn params: {best_nn_params}")

print("training the best nn model")
final_model_params = {k: best_nn_params[k] for k in ['neurons_layer1', 'neurons_layer2', 'dropout_rate', 'learning_rate', 'reg_lambda_l2']}
best_nn_model_manual = NNmodel_struct(**final_model_params)

# use one hot
best_nn_model_manual.fit(X_train_scaled, y_train_nn, epochs=best_nn_params['epochs'], batch_size=best_nn_params['batch_size'], verbose=1) # show progress for final model

# pred labels and probabilities with the best nn model
y_prob_nn_manual = best_nn_model_manual.predict(X_test_scaled)
#convert probabilities to class label
y_pred_nn_manual_encoded = np.argmax(y_prob_nn_manual, axis=1)

#evaluate the best nn model
evaluate_classification_model("neural network", y_test, y_pred_nn_manual_encoded, y_prob_nn_manual)

starting gridsearch for nn
testing combo 1/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'reg_lambda_l2': 0.01, 'batch_size': 32, 'epochs': 25}
  fold 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  fold 2/2
  avg validation accuracy: 0.5257
testing combo 2/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'reg_lambda_l2': 0.01, 'batch_size': 32, 'epochs': 50}
  fold 1/2
  fold 2/2
  avg validation accuracy: 0.5348
testing combo 3/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'reg_lambda_l2': 0.01, 'batch_size': 64, 'epochs': 25}
  fold 1/2
  fold 2/2
  avg validation accuracy: 0.5283
testing combo 4/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'reg_lambda_l2': 0.01, 'batch_size': 64, 'epochs': 50}
  fold 1/2
  fold 2/2
  avg validation accuracy: 0.5296
testing combo 5/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'reg_lambda_l2': 0.001, 'batch_size': 32, 'epochs': 25}
  fold 1/2
  fold 2/2
  avg validation accuracy: 0.5462
testing combo 6/128: {'neurons_layer1': 64, 'neurons_layer2': 32, 