## Music Genre Analysis

### Imports

In [None]:
# Library imports

import pandas as pd

pd.set_option("display.max_columns", None)

### Importing the dataset and basic EDA

In [None]:
# Dataset import

df = pd.read_csv("./Data/features_30_sec.csv")
df.head()

In [None]:
df.shape

In [None]:
df.info()

#### General data explanation

Each of the musical characteristics is split into a pair of '_mean' and '_var' labeled columns:

- **mean** - represents the mean of characteristic value
- **var** - represents the variance of characteristic value

Characteristics of the dataset samples:

- **chroma_stft** - (Chroma Short-Time Fourier Transformation) - represents energy distribution of pitches in audio signals
- **rms** - (Root Mean Square) - represents the amplitude of the audio signal
- **spectral_centroid** - indicates where the center of mass of the spectrum is located and gives an idea about the sound brightness
- **spectral_bandwidth** - describes the width of the spectral band around the spectral centroid
- **rolloff** - (Spectral roll-off) - represents the frequency below which a specified percentage of the total spectral energy is contained
- **zero_crossing_rate** - Indicated the rate at which the signal changes its sign, related to sound noisiness or percussiveness
- **harmony** - represents the overall tonal quality and harmonic content of the sound
- **perceptr** - (Perceptual spread) - represents the perceived spread of the sound frequency
- **tempo** - represents the pace or speed of the music sample, measured in beats per minute (BPM)
- **mfcc** - (Mel-Frequency Cepstral Coefficients) - coefficient representation of the short-term power spectrum of the sound signal

In [None]:
df.describe().transpose()

In [None]:
# Check for missing values

df.isna().sum()

In [None]:
df['label'].value_counts().reset_index()

In [None]:
from matplotlib import pyplot as plt

# Drop str type columns from the dataset

df_trimmed = df.drop(columns=["filename", "label"], axis=1)
columns_to_plot = df_trimmed.columns

# Set up subplots

fig, axes = plt.subplots(nrows=len(columns_to_plot), ncols=1, figsize=(12, 60))

# Plot a histogram for each column

for i, column in enumerate(columns_to_plot):
    axes[i].hist(df_trimmed[column], color='blue', alpha=0.7)
    axes[i].set_title(column)

plt.tight_layout()
plt.show()


In [None]:
import re

# Remove all of the mfcc*_vars and mfcc*_means except for the initial ones

columns = df_trimmed.columns

columns_to_drop = []
first_occurances = {}

for column in columns:
    if column.startswith("mfcc") and ("_mean" in column or "_var" in column):
        match = re.search(r"mfcc(\d+)_", column)
        if match and int(match.group(1)) > 1:
            columns_to_drop.append(column)

df_trimmed.drop(columns=columns_to_drop, inplace=True)

print(df_trimmed.columns)
print(f'Columns: {len(df_trimmed.columns)}')

In [None]:
# Draw the correlation matrix

import seaborn as sns
import matplotlib.pyplot as plt

correlation_matrix = df_trimmed.corr()

plt.figure(figsize=(16, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation matrix")
plt.show()

### Visualizing the data

In [None]:
# Get every unique genre in the dataset

genres = df['label'].unique()
print(genres)

In [None]:
# Create a dictionary with file names for each music genre

import os

audio_dir_contents = {}
for genre in genres:
    audio_files = [
        file
        for file in os.listdir(f"./Data/genres_original/{genre}")
        if file.endswith(".wav")
    ]
    audio_dir_contents[genre] = audio_files

print(audio_dir_contents)

In [None]:
# Select a random file from each music genre

import random

random_audio_selection = {}
for genre, files in audio_dir_contents.items():
    random_audio = random.choice(files)
    random_audio_selection[genre] = random_audio

print(random_audio_selection)

In [None]:
# Load the audio with Librosa

import librosa

audios = {}
sample_rates = {}
for audio in random_audio_selection.keys():
    audio_path = f'./Data/genres_original/{audio}/{random_audio_selection[audio]}'
    data, sr = librosa.load(audio_path, sr=45600)
    audios[audio] = data
    sample_rates[f'{audio}:sr'] = sr

In [None]:
# Add a color dictionary for audio plots

audio_colors = {
    'blues': 'blue',
    'classical': 'purple',
    'country': 'brown',
    'disco': 'gray',
    'hiphop': 'red',
    'jazz': 'violet',
    'metal': 'black',
    'pop': 'pink',
    'reggae': 'green',
    'rock': 'orange'
}

#### Audio plot descriptions

- **Waveplot** - represents the change of air pressure at the current moment, ie. loudness of the audio sample at a given moment
- **STFT plot** - represents frequency components and their change over time
- **Spectrogram plot** - represents how the frequency content of a signal changes over time
- **Spectral roll-off plot** - represents the distribution of energy across the frequency spectrum
- **MFCC** - represents the frequency composition and the dynamics of the audio samples

In [None]:
# Waveplots

for audio in audios.keys():
    plt.figure(figsize=(12, 4))
    plt.title(f'Waveplot for {audio} music genre', size=15)
    librosa.display.waveshow(audios[audio], color=audio_colors[audio])
    plt.show()

In [None]:
# STFT plots

for audio in zip(audios.keys(), sample_rates.keys()):
    stft = librosa.stft(audios[audio[0]])
    plt.figure(figsize=(12, 4))
    plt.title(f'Short Time Fourier Transformation plot for {audio[0]} music genre', size=15)
    librosa.display.specshow(stft, sr=sample_rates[audio[1]], x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
# Spectrogram plots

for audio in zip(audios.keys(), sample_rates.keys()):
    stft = librosa.stft(audios[audio[0]])
    stft_db = librosa.amplitude_to_db(abs(stft))
    plt.figure(figsize=(12, 4))
    plt.title(f'Spectrogram plot for {audio[0]} music genre', size=15)
    librosa.display.specshow(stft_db, sr=sample_rates[audio[1]], x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
# Spectral roll-offs plots

for audio in zip(audios.keys(), sample_rates.keys()):
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audios[audio[0]], sr=sample_rates[audio[1]])[0]
    plt.figure(figsize=(12, 4))
    plt.title(f'Spectral Rolloff plot for {audio[0]} music genre', size=15)
    plt.plot(spectral_rolloff, color=audio_colors[audio[0]])
    plt.xlabel('Frame')
    plt.ylabel('Spectral Rolloff')
    plt.show()

In [None]:
# MFCC plots

for audio in zip(audios.keys(), sample_rates.keys()):
    mfcc = librosa.feature.mfcc(y=audios[audio[0]], sr=sample_rates[audio[1]])
    plt.figure(figsize=(12, 4))
    plt.title(f'MFCC plot for {audio[0]} music genre', size=15)
    librosa.display.specshow(mfcc, sr=sample_rates[audio[1]], x_axis='time')
    plt.show()

### Building the model

#### Splitting the data

In [None]:
from sklearn.model_selection import train_test_split

# Define the features (X) and the target (y)

X = df.drop(columns=['filename', 'label'])
y = df['label']

# Split the dataset

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

#### Scaling the data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X=X_train)
X_val_scaled = scaler.transform(X=X_val)
X_test_scaled = scaler.transform(X=X_test)

#### Training the model (SVM)

In [None]:
from sklearn.svm import SVC

# Define the linear kernel SVM

svm_linear = SVC(kernel='linear')
svm_linear.fit(X=X_train_scaled, y=y_train)

# Define the RBF kernel SVM

svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X=X_train_scaled, y=y_train)

In [None]:
svm_linear.get_params()

In [None]:
svm_rbf.get_params()

#### Testing the model (SVM)

In [None]:
from sklearn.metrics import classification_report

# Test the linear kernel SVM

y_pred_svm_linear = svm_linear.predict(X=X_val_scaled)
print(f'Classification report for the linear kernel:\n {classification_report(y_pred=y_val, y_true=y_pred_svm_linear)}')

# Test the RBF kernel SVM

y_pred_svm_rbf = svm_rbf.predict(X=X_val_scaled)
print(f'Classification report for the RBF kernel:\n {classification_report(y_pred=y_val, y_true=y_pred_svm_rbf)}')

#### Summary chart

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def extract_scores(report):
    report_dict = report
    return (report_dict['precision'], report_dict['recall'], report_dict['f1-score'])

# Generate reports for the models

report_svm_linear = classification_report(y_true=y_val, y_pred=y_pred_svm_linear, output_dict=True)
report_svm_rbf = classification_report(y_true=y_val, y_pred=y_pred_svm_rbf, output_dict=True)

# Extract classification scores

precision_svm_linear, recall_svm_linear, f1_score_svm_linear = extract_scores(report_svm_linear['macro avg'])
precision_svm_rbf, recall_svm_rbf, f1_score_svm_rbf = extract_scores(report_svm_rbf['macro avg'])

df_scores = pd.DataFrame({
    'Model': ['Linear', 'RBF'],
    'Precision': [precision_svm_linear, precision_svm_rbf],
    'Recall': [recall_svm_linear, recall_svm_rbf],
    'F1-Score': [f1_score_svm_linear, f1_score_svm_rbf]
})

# Plotting the chart

df_scores.plot(x='Model', y=['Precision', 'Recall', 'F1-Score'], kind='bar', figsize=(12, 6))
plt.title('Comparison of the SVM Model Performance')
plt.ylabel('Score')
plt.xlabel('Model')
plt.xticks(rotation=0)
plt.show()