In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn import preprocessing

In [26]:
# Load data
df = pd.read_pickle("rawFeaturesGTZAN.pkl")

# Split into train and test
data = df.drop(["path", "class"], axis=1)
scaler = preprocessing.StandardScaler()
data[data.columns] = scaler.fit_transform(data[data.columns])

X = data.to_numpy()
y = df["class"].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, shuffle=True, stratify=y)

In [27]:
y_trainExpanded = np.expand_dims(y_train, axis=1)
trainDataMat = np.concatenate((X_train, y_trainExpanded), axis=1)

In [28]:
headers = list(data)+["class"]
trainData = pd.DataFrame(data=trainDataMat, columns=headers)
trainData.to_csv("trainGTZAN.csv")

In [18]:
featureImportance = []
with open("featureImportance.csv") as handle:
    for line in handle:
        temp = line.rstrip().split(";")
        featureImportance.append([temp[0], float(temp[1])])
score = np.array([float(x[1]) for x in featureImportance])

THRESHOLD = np.mean(score)
overThreshold = [x for x in featureImportance if x[1]>=THRESHOLD]
overThresholdLabels = [x[0].rstrip().replace(".","-") for x in overThreshold]
print(overThresholdLabels)

['spectral-contrast7-mean', 'spectral-rolloff-std', 'mfcc1-mean', 'spectral-bandwith-mean', 'spectral-rolloff-mean', 'rmse-mean', 'spectral-contrast5-mean', 'spectral-centroid-std', 'rmse-std', 'spectral-centroid-mean', 'mfcc12-mean', 'spectral-bandwith-std', 'mfcc10-mean', 'mfcc4-mean', 'spectral-contrast4-mean', 'mfcc6-mean', 'mfcc8-mean', 'chroma7-mean', 'mfcc4-std', 'mfcc2-mean', 'zero-cross-rate-std', 'chroma5-mean', 'mfcc7-std', 'zero-cross-rate-mean', 'spectral-contrast3-mean', 'mfcc13-mean', 'mfcc6-std', 'mfcc14-mean', 'mfcc15-mean', 'mfcc5-std', 'chroma4-mean', 'chroma6-mean', 'spectral-contrast3-std', 'spectral-contrast6-mean']


In [7]:
def evalRandomForest(X_train, X_test, y_train, n=100):
    rf = RandomForestClassifier(n_estimators=n, random_state=42)
    model = rf.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return y_pred
    
def getMetrics(y_pred, y_test, modelName):
    acc = accuracy_score(y_pred, y_test)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    print(f"Model: {modelName}")
    print(f"- CA: {round(acc,4)}")
    print(f"- Precision: {round(precision,4)}")
    print(f"- Recall: {round(recall,4)}")
    print(f"- F1: {round(f1,4)}")

In [25]:
# dataOverThreshold = data.drop(labels=overThresholdLabels, axis=1)

Index(['path', 'rmse-mean', 'rmse-std', 'zero-cross-rate-mean',
       'zero-cross-rate-std', 'tempo', 'spectral-centroid-mean',
       'spectral-centroid-std', 'spectral-bandwith-mean',
       'spectral-bandwith.std', 'spectral-rolloff-mean',
       'spectral-rolloff-std', 'spectral-contrast1-mean',
       'spectral-contrast1-std', 'spectral-contrast2-mean',
       'spectral-contrast2-std', 'spectral-contrast3-mean',
       'spectral-contrast3-std', 'spectral-contrast4-mean',
       'spectral-contrast4-std', 'spectral-contrast5-mean',
       'spectral-contrast5-std', 'spectral-contrast6-mean',
       'spectral-contrast6-std', 'spectral-contrast7-mean',
       'spectral-contrast7-std', 'chroma1-mean', 'chroma1-std', 'chroma2-mean',
       'chroma2-std', 'chroma3-mean', 'chroma3-std', 'chroma4-mean',
       'chroma4-std', 'chroma5-mean', 'chroma5-std', 'chroma6-mean',
       'chroma6-std', 'chroma7-mean', 'chroma7-std', 'chroma8-mean',
       'chroma8-std', 'chroma9-mean', 'chroma9-std'