# Chroma Based Christmas Carol Classification v1: Original Tracks

This model uses CQT method for chroma feature extraction and qMax for distance calculation.

In [8]:
from ChromaCoverId.chroma_features import ChromaFeatures
from ChromaCoverId.chroma_features import display_chroma
import ChromaCoverId.cover_similarity_measures as sims
import matplotlib.pyplot as plt

In [9]:
import os
import librosa
import numpy as np
import re

# Function to extract features from an audio file
def extract_features(audio_file_path):
    audio_features = ChromaFeatures(audio_file=audio_file_path, mono=True, sample_rate=44100) 
    return audio_features.chroma_cqt()

# Directory containing your audio files
audio_dir = "koledy/samples"

# List to store feature matrix X and label vector y
X = []
y = []

tuples2D3D = {}

# Regex pattern to extract label from the filename
pattern = re.compile(r"^(.+?)_\d{1,2}_\d+\.wav$")

# Iterate through audio files in the directory
for filename in os.listdir(audio_dir):
    if filename.endswith(".wav"):
        # Use regex to extract label from the filename
        match = pattern.match(filename)
        if match:
            label = match.group(1)
            
            # Extract features from the audio file
            features = extract_features(os.path.join(audio_dir, filename))
            features_flattened = features.flatten()
            tuples2D3D[np.array2string(features_flattened)]= features
            
            # Append features and label to X and y
            X.append(features_flattened)
            y.append(label)
# Convert lists to NumPy arrays
X = np.array(X)
y = np.array(y)

# Now, X contains your feature matrix, and y contains your label vector


== Audio vector of koledy/samples/w_zlobie_lezy_18_5.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/gdy_sliczna_panna_40_21.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/dzisiaj_w_betlejem_32_14.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/do_szopy_hej_pasterze_16_1.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/gdy_sliczna_panna_15_20.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/aniol_pasterzom_mowil_1_4.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/aniol_pasterzom_mowil_0_22.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/pojdzmy_wszyscy_do_stajenki_0_14.wav loaded with shape (441000,) and sample rate 44100 ==
== Audio vector of koledy/samples/a_wczora_z_wieczora_24_2.wav load

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# X is  feature matrix and y is  label vector
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Assuming your distance function is named 'calculate_distance'
def calculate_distance(x, y):
    # Reshape back to 3D
    x3D = tuples2D3D[np.array2string(x)]
    y3D = tuples2D3D[np.array2string(y)]
    # Compute cross recurrent plot from two chroma audio feature vectors
    sim_matrix = sims.cross_recurrent_plot(x3D, y3D)
    #Computing qmax audio similarity measure (distance)
    qmax, _ = sims.qmax_measure(sim_matrix)
    return qmax


# Instantiate and train the KNN classifier with custom distance metric
knn_classifier = KNeighborsClassifier(n_neighbors=3, metric=calculate_distance)
knn_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.8942307692307693


### Example predictions for new data

##### Prediction no.1 for  `Kapela Beskidy - Oj malućki malućki.wav`

In [11]:
import IPython
new_audio_file_path = "validation_data/Kapela Beskidy - Oj malućki malućki.wav"
IPython.display.Audio(new_audio_file_path)

In [12]:
new_features = extract_features(new_audio_file_path)
new_features_flattened = new_features.flatten()


tuples2D3D[np.array2string(features_flattened)] = features

X_validate = []
X_validate.append(features_flattened)
X_validate = np.array(X_validate)


# Make predictions using the trained KNN classifier
predicted_label = knn_classifier.predict(X_validate)

print(f"Predicted Label: {predicted_label[0]}")


== Audio vector of validation_data/Kapela Beskidy - Oj malućki malućki.wav loaded with shape (442748,) and sample rate 44100 ==
Predicted Label: oj_maluski_maluski


The prediction was correct!

##### Prediction no.2 for  `Gdy śliczna Panna - Mazowsze.wav`

In [13]:
import IPython
new_audio_path2 = "validation_data/Gdy śliczna Panna - Mazowsze.wav"
IPython.display.Audio(new_audio_path2)

In [14]:
new_features2 = extract_features(new_audio_path2)
new_features2_flattened = new_features2.flatten()


tuples2D3D[np.array2string(new_features2_flattened)] = new_features2

X_validate2 = []
X_validate2.append(new_features2_flattened)
X_validate2 = np.array(X_validate2)


# Make predictions using the trained KNN classifier
p_label2 = knn_classifier.predict(X_validate2)

print(f"Predicted Label: {p_label2}")

== Audio vector of validation_data/Gdy śliczna Panna - Mazowsze.wav loaded with shape (441000,) and sample rate 44100 ==
Predicted Label: ['bog_sie_rodzi']


Sadly, the prediction is incorrect...

##### Prediction no.3 for `Fasolki - Przybiezeli do Betlejem`

In [19]:
import IPython
new_audio_path3 = "validation_data/Elżbieta Zającówna - Przybieżeli do Betlejem.wav"
IPython.display.Audio(new_audio_path3)

In [21]:
new_features3 = extract_features(new_audio_path3)
new_features3_flattened = new_features3.flatten()


tuples2D3D[np.array2string(new_features3_flattened)] = new_features3

X_validate3 = []
X_validate3.append(new_features3_flattened)
X_validate3 = np.array(X_validate3)


# Make predictions using the trained KNN classifier
p_label3 = knn_classifier.predict(X_validate3)

print(f"Predicted Label: {p_label3}")

== Audio vector of validation_data/Elżbieta Zającówna - Przybieżeli do Betlejem.wav loaded with shape (441000,) and sample rate 44100 ==
Predicted Label: ['gdy_sliczna_panna']


Sadly, the prediction is incorrect. The model is probably too well fit to the training data.

### Summary

The accuracy level for various interpretations of Polish christmas carols is quite high, `~89%`. However the model seems to be overfitted, as it does not predict well for samples performed by artists outside of the training dataset.