## SVM and KKN with Distances Between Peaks as Features

In [53]:
import os
import numpy as np
from scipy.io import wavfile
from scipy.signal import find_peaks
import pandas as pd
from sklearn.utils import shuffle


### Load major and minor data

In [54]:
data_major = []
data_minor = []
sample_rates = []
data_length = []
for num in range(502):
    sample_rate, audio_data = wavfile.read(f"Audio_Files/Major/Major_{num}.wav")
    data_major.append(audio_data)
    sample_rates.append(sample_rate)
    data_length.append(len(audio_data))

for num in range(358):
    try:
        sample_rate, audio_data = wavfile.read(f"Audio_Files/Minor/Minor_{num}.wav")
    except:
        print(f"Minor_{num}.wav not found. Skipping...")
        continue
    data_minor.append(audio_data)
    sample_rates.append(sample_rate)
    data_length.append(len(audio_data))

Minor_251.wav not found. Skipping...


  sample_rate, audio_data = wavfile.read(f"Audio_Files/Major/Major_{num}.wav")


### Determine FFT Parameters (FFT size, Sampling Rate)

In [55]:
# The max data length amoung all data
max(data_length)

101429

In [56]:
fft_size = 2**17
fft_size
# Use this fft size

131072

In [57]:
np.sum(sample_rates == sample_rates[0] * np.ones((1,len(sample_rates)))) == len(sample_rates)
# Sampling rate for all audio file is the same

True

In [58]:
sample_rate = sample_rates[0]
sample_rate    

44100

In [59]:
fft_resoluion = sample_rate/fft_size
fft_resoluion

0.336456298828125

In [60]:
frequency_bins = np.fft.rfftfreq(n=fft_size,d=1/sample_rate)
num_peaks = 10

In [61]:
note_frequencies = [
    27.5000, 29.1352, 30.8677, 32.7032, 34.6478, 36.7081, 38.8909, 41.2034, 43.6535,
    46.2493, 48.9994, 51.9131, 55.0000, 58.2705, 61.7354, 65.4064, 69.2957, 73.4162,
    77.7817, 82.4069, 87.3071, 92.4986, 97.9989, 103.826, 110.000, 116.541, 123.471,
    130.813, 138.591, 146.832, 155.563, 164.814, 174.614, 184.997, 195.998, 207.652,
    220.000, 233.082, 246.942, 261.626, 277.183, 293.665, 311.127, 329.628, 349.228,
    369.994, 391.995, 415.305, 440.000, 466.164, 493.883, 523.251, 554.365, 587.330,
    622.254, 659.255, 698.456, 739.989, 783.991, 830.609, 880.000, 932.328, 987.767,
    1046.50, 1108.73, 1174.66, 1244.51, 1318.51, 1396.91, 1479.98, 1567.98, 1661.22,
    1760.00, 1864.66, 1975.53, 2093.00, 2217.46, 2349.32, 2489.02, 2637.02, 2793.83,
    2959.96, 3135.96, 3322.44, 3520.00, 3729.31, 3951.07, 4186.01
]

In [62]:
note_ranges = []
for i in range(len(note_frequencies)):
    if i == 0: 
        low = 0
    else:
        low = (note_frequencies[i - 1] + note_frequencies[i]) / 2
    if i == len(note_frequencies) - 1:  
        high = np.inf
    else:
        high = (note_frequencies[i] + note_frequencies[i + 1]) / 2
    note_ranges.append((low, high))

In [63]:
def match_frequencies_to_notes(frequencies):
    note_matches = []
    for freq in frequencies:
        matched = False
        for i, (low, high) in enumerate(note_ranges):
            if low <= freq < high:
                note_matches.append(i) 
                matched = True
                break
        if not matched:
            note_matches.append(None)  
    return note_matches

In [64]:
features_label = []
for data in data_major:
    freq_data = np.abs(np.fft.rfft(data, n=fft_size))
    peak_indices, _ = find_peaks(freq_data)
    sorted_peak_indices = peak_indices[np.argsort(freq_data[peak_indices])[::-1]]
    sorted_freq = frequency_bins[sorted_peak_indices]
    unqie_note_matches = pd.unique(match_frequencies_to_notes(sorted_freq))
    unqie_note_matches = unqie_note_matches[unqie_note_matches != 0]
    unqie_note_matches = unqie_note_matches[:num_peaks]
    unqie_note_matches = np.sort(unqie_note_matches)
    notes_with_label = np.append(unqie_note_matches,1)
    features_label.append(notes_with_label)

  unqie_note_matches = pd.unique(match_frequencies_to_notes(sorted_freq))


In [65]:
for data in data_minor:
    freq_data = np.abs(np.fft.rfft(data, n=fft_size))
    peak_indices, _ = find_peaks(freq_data)
    sorted_peak_indices = peak_indices[np.argsort(freq_data[peak_indices])[::-1]]
    sorted_freq = frequency_bins[sorted_peak_indices]
    unqie_note_matches = pd.unique(match_frequencies_to_notes(sorted_freq))
    unqie_note_matches = unqie_note_matches[unqie_note_matches != 0]
    unqie_note_matches = unqie_note_matches[:num_peaks]
    unqie_note_matches = np.sort(unqie_note_matches)
    notes_with_label = np.append(unqie_note_matches,0)
    features_label.append(notes_with_label)

  unqie_note_matches = pd.unique(match_frequencies_to_notes(sorted_freq))


In [385]:
features_label_shuffled = shuffle(features_label)


In [386]:
df = pd.DataFrame(features_label_shuffled)

In [387]:
output_file = "data_preprocessed_v2.csv"
df.to_csv(output_file, index=False)

In [388]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV


In [389]:
X = np.array(features_label_shuffled)[:, :-1]
for i in range(7):
    X[:,i] = X[:,i] - X[:,i+1] # Calcuate the distances between notes
X = X[:,:6] # Use first 6 distances => 6 features
y = np.array(features_label_shuffled)[:,-1]

In [390]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

### Normalization

In [392]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### SVM Training and Testing

In [393]:
param_grid = {
    'C': [1,2,3,4,5,10,15,20, 100],
    'gamma': np.linspace(2,6,100),
    'kernel': ['rbf']
}
grid = GridSearchCV(SVC(class_weight='balanced',random_state=42), param_grid, refit=True, cv=5)
grid.fit(X_train, y_train)

print("Best Parameters: ", grid.best_params_)
print("Best Cross-Validation Score: ", grid.best_score_)

best_svm = grid.best_estimator_


Best Parameters:  {'C': 2, 'gamma': 2.0, 'kernel': 'rbf'}
Best Cross-Validation Score:  0.8529567333121759


In [394]:
y_pred = best_svm.predict(X_test)


In [395]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9011627906976745


### KNN Training and Testing

In [396]:
from sklearn.neighbors import KNeighborsClassifier


In [397]:
param_grid = {
    'n_neighbors': [1, 2,3, 5, 7, 9],
    'weights': ['uniform', 'distance'], 
    'p': [1, 2]  #(1 = Manhattan, 2 = Euclidean)
}

grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
print("Best Cross-Validation Accuracy:", grid.best_score_)

best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)

from sklearn.metrics import accuracy_score
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Best Parameters: {'n_neighbors': 9, 'p': 1, 'weights': 'distance'}
Best Cross-Validation Accuracy: 0.8646355654289642
Test Accuracy: 0.872093023255814
