In [35]:
%matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

import numpy as np
import math
from matplotlib import pyplot as plt

In [36]:

# Load the dataset
df = pd.read_csv('/Users/chadakeng/Desktop/NumCom/project/Music-Genre-Classifier/features_30_sec.csv')
# print(df.head)

# Filter for the genres of interest
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# filters the DataFrame df to include only the rows where the values in the filename (head) match any of the values in the list genres that way we can train
# each genre seperately
df = df[df['label'].isin(genres)]

# Split the dataset into features (X) and target (y), then into training and test sets
X = df.drop(['filename', 'label'], axis=1)  # Drop non-numeric columns
y = df['label']

# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1000) # we keep some of the data for training and the rest goes to testing.

# print(X_train.head)

In [37]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


Softmax Function
Note: will need multinomial logistic regression for this https://www.youtube.com/watch?v=Mi992wr6zKc&ab_channel=GopalPrasadMalakar

In [38]:
# soft max function generalizes sigmoid function for multiple classes (jazz rock pop blues etc instead of being just 1 or 0 for one genre)
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

Multinomial Logistic Regression

In [39]:
class MultinomialLogisticRegression():
    # constructor
    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr # learning rate
        self.n_iters = n_iters # number of iterations for the gradient descent loop
        self.weights = None # we dont know how much each factor weighs in to determining what genre it is so we initialize as none
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape # X.shape returns a tuple representing the dimensions of X
        self.classes_ = np.unique(y) 
        n_classes = len(self.classes_) # how many different genres we have
        
        self.weights = np.zeros((n_classes, n_features))
        self.bias = np.zeros(n_classes)
        
        y_encoded = self._one_hot(y) # converts the categorical genre labels into a binary matrix. e.g.(rock, pop, jazz)->[1, 0, 0]
        
        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights.T) + self.bias
            y_pred = softmax(linear_pred)
            
            dw = (1/n_samples) * np.dot(X.T, (y_pred - y_encoded))
            db = (1/n_samples) * np.sum(y_pred - y_encoded, axis=0)
            
            self.weights -= self.lr * dw.T
            self.bias -= self.lr * db
    
    def predict(self, X):
        linear_pred = np.dot(X, self.weights.T) + self.bias
        y_pred = softmax(linear_pred)
        return np.argmax(y_pred, axis=1)
    
    def _one_hot(self, y):
        y_encoded = np.zeros((len(y), len(self.classes_)))
        for idx, label in enumerate(self.classes_):
            y_encoded[np.where(y == label), idx] = 1
        return y_encoded

X (Input Matrix): This is a matrix where each row represents a data sample and each column represents a feature. If you have n_samples data points and n_features features, the dimension of X would be n_samples x n_features.

Weights: This is a vector with a length equal to the number of features (n_features). Its dimension is n_features x 1.

Gradient descent is not calculating the coefficients of the sigmoid function but adjusting the weights w_i and bias b that are used within the linear combination fed into the sigmoid function. 

In [40]:
# Initialize and train the model
model = MultinomialLogisticRegression(lr=0.01, n_iters=1000)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Convert numeric predictions back to string labels
label_mapping = {idx: label for idx, label in enumerate(model.classes_)}
string_predictions = [label_mapping[label] for label in predictions]

# Evaluate the model
print(confusion_matrix(y_test, string_predictions))
print(classification_report(y_test, string_predictions))

[[10  1  1  0  0  2  0  0  0  0]
 [ 0 17  1  0  0  1  0  0  0  0]
 [ 3  1 13  1  0  1  0  2  0  3]
 [ 1  0  0  9  5  0  1  1  2  4]
 [ 0  0  0  0  9  1  1  0  3  0]
 [ 1  4  0  1  0 20  0  2  1  0]
 [ 1  0  0  1  0  0 15  0  0  1]
 [ 0  0  0  0  0  0  0 18  2  1]
 [ 1  0  1  0  4  0  0  1 17  0]
 [ 4  0  0  1  1  1  3  0  0  4]]
              precision    recall  f1-score   support

       blues       0.48      0.71      0.57        14
   classical       0.74      0.89      0.81        19
     country       0.81      0.54      0.65        24
       disco       0.69      0.39      0.50        23
      hiphop       0.47      0.64      0.55        14
        jazz       0.77      0.69      0.73        29
       metal       0.75      0.83      0.79        18
         pop       0.75      0.86      0.80        21
      reggae       0.68      0.71      0.69        24
        rock       0.31      0.29      0.30        14

    accuracy                           0.66       200
   macro avg       