#### **Below are presented models that classify music genres using classic machine learning methods, i.e. KNN. Training and prediction are made on raw audio, mel-spectrogram and stft datasets with the adjustment of various parameters.**

##### Libraries that need to be imported

In [1]:
import os
import numpy as np
import pandas as pd
from typing import Tuple

from sklearn.ensemble import RandomForestClassifier
from  sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from skimage.io import imread

# KNN (K-Nearest Neighbors)

##### Before fitting the model it was necessary to process and label each data. For this purpose the function below was made.

In [2]:
# Data processing and labeling
def process_and_label(data_path: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Process and label the data.

    Args:
        data_path (str): The path to the data.

    Returns:
        Tuple[np.ndarray, np.ndarray]: A tuple containing the processed data and labels.
    """
    genres = os.listdir(data_path)

    X = []
    y = []

    for genre_id, genre in enumerate(genres):
        genre_path = os.path.join(data_path, genre)
        for image_file in os.listdir(genre_path):
            if image_file.endswith(".png"):
                image = imread(os.path.join(genre_path, image_file))
                # image_resized = resize(image, (100, 100), anti_aliasing=True)
                X.append(image.flatten())
                y.append(genre_id)

    X = np.array(X)
    y = np.array(y)
    
    return X, y

##### For each dataset classifier KNeighborsClassifier takes as n_neighbors parameter values appropriately 5, 10, 15 and 20.

## Raw-Audio

In [24]:
# Raw_audio processing and splitting
data_path = "./Data/images_augmented/raw_audio"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##### 5 Nearest Neighbors

In [25]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
raw_accuracy_5 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(raw_accuracy_5))

Accuracy: 0.21


##### 10 Nearest Neighbors

In [26]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
raw_accuracy_10 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(raw_accuracy_10))

Accuracy: 0.20


##### 15 Nearest Neighbors

In [27]:
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
raw_accuracy_15 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(raw_accuracy_15))

Accuracy: 0.21


##### 20 Nearest Neighbors

In [28]:
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
raw_accuracy_20 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(raw_accuracy_20))

Accuracy: 0.20


## Mel-Spectrogram

In [29]:
# Mel-Spectrogram processing and splitting
data_path = "./Data/images_augmented/mel_spectrogram"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##### 5 Nearest Neighbors

In [30]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
mel_accuracy_5 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(mel_accuracy_5))

Accuracy: 0.41


##### 10 Nearest Neighbors

In [31]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
mel_accuracy_10 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(mel_accuracy_10))

Accuracy: 0.39


##### 15 Nearest Neighbors

In [32]:
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
mel_accuracy_15 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(mel_accuracy_15))

Accuracy: 0.38


##### 20 Nearest Neighbors

In [33]:
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
mel_accuracy_20 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(mel_accuracy_20))

Accuracy: 0.39


## STFT (Short-Time Fourier Transform)

In [14]:
# STFT processing and splitting
data_path = "./Data/images_augmented/stft"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##### 5 Nearest Neighbors

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
stft_accuracy_5 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(stft_accuracy_5))

Accuracy: 0.55


##### 10 Nearest Neighbors

In [9]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
stft_accuracy_10 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(stft_accuracy_10))

Accuracy: 0.51


##### 15 Nearest Neighbors

In [10]:
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
stft_accuracy_15 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(stft_accuracy_15))

Accuracy: 0.51


##### 20 Nearest Neighbors

In [15]:
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
stft_accuracy_20 = accuracy_score(y_test, y_pred)
print("Accuracy:{:.2f}".format(stft_accuracy_20))

Accuracy:0.49


In [61]:
raw_accuracy_5 = float(raw_accuracy_5)
raw_accuracy_10 = float(raw_accuracy_10)
raw_accuracy_15 = float(raw_accuracy_15)
raw_accuracy_20= float(raw_accuracy_20)

In [62]:
mel_accuracy_5 = float(mel_accuracy_5)
mel_accuracy_10 = float(mel_accuracy_10)
mel_accuracy_15 = float(mel_accuracy_15)
mel_accuracy_20= float(mel_accuracy_20)

In [64]:
stft_accuracy_5 = float(stft_accuracy_5)
stft_accuracy_10 = float(stft_accuracy_10)
stft_accuracy_15 = float(stft_accuracy_15)
stft_accuracy_20= float(stft_accuracy_20)

In [65]:
df = pd.DataFrame(index=["Raw audio", "Mel-Spectrogram", "STFT"], columns=["5", "10", "15", "20"])
df["5"] = [raw_accuracy_5, mel_accuracy_5, stft_accuracy_5]
df["10"] = [raw_accuracy_10, mel_accuracy_10, stft_accuracy_10]
df["15"] = [raw_accuracy_15, mel_accuracy_15, stft_accuracy_15]
df["20"] = [raw_accuracy_20, mel_accuracy_20, stft_accuracy_20]

##### Scores for all datasets with different n_neighbor parameter

In [66]:
df.to_csv('classic_methods_scores.csv')
df

Unnamed: 0,5,10,15,20
Raw audio,0.21,0.2,0.21,0.2
Mel-Spectrogram,0.41,0.39,0.38,0.39
STFT,0.55,0.51,0.51,0.49


# Random Forest

## Raw audio

In [3]:
# Raw_audio processing and splitting
data_path = "./Data/images_augmented/raw_audio"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
clf = RandomForestClassifier(max_depth=10)
clf.fit(X, y)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(acc))

Accuracy: 0.85


## Mel-Spectrogram

In [5]:
# Mel-Spectrogram processing and splitting
data_path = "./Data/images_augmented/mel_spectrogram"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
clf = RandomForestClassifier(max_depth=10)
clf.fit(X, y)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(acc))

Accuracy: 1.00


## STFT (Short-Time Fourier Transform)

In [7]:
# STFT processing and splitting
data_path = "./Data/images_augmented/stft"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
clf = RandomForestClassifier(max_depth=10)
clf.fit(X, y)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(acc))

Accuracy: 1.00


# Decision Tree

## Raw audio

In [3]:
data_path = "./Data/images_augmented/raw_audio"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=2, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_1 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_1))

Accuracy: 0.12


In [6]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_2 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_2))

Accuracy: 0.13


In [7]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_3 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_3))

Accuracy: 0.16


In [4]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=20, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_4 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_4))

Accuracy: 0.16


## Mel-Spectrogram

In [5]:
data_path = "./Data/images_augmented/mel_spectrogram"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=2, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_mel1 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_mel1))

Accuracy: 0.24


In [7]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_mel2 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_mel2))

Accuracy: 0.28


In [8]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_mel3 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_mel3))

Accuracy: 0.31


## STFT (Short-Time Fourier Transform)

In [9]:
# STFT processing and splitting
data_path = "./Data/images_augmented/stft"
X, y = process_and_label(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=2, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_stft1 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_stft1))

Accuracy: 0.23


In [11]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_stft2 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_stft2))

Accuracy: 0.30


In [12]:
dt_classifier = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5, min_samples_leaf=5)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)

dt_accuracy_stft3 = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(dt_accuracy_stft3))

Accuracy: 0.32
