In [38]:
import librosa
import librosa.feature
import matplotlib.pyplot as plt
import os
from sklearn import neighbors
from sklearn import neural_network
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import numpy as np

In [39]:
def plot_song(data):
    plt.plot(data)
    plt.xlim([0, len(data)])
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    plt.show()

In [62]:
def import_song(path_to_song):
    data, samplerate = librosa.load(path_to_song)
    return data, samplerate

def show_song(path_to_song):
    data, samplerate = import_song(path_to_song)
    plot_song(data)

def get_song_tempo(path_to_song):
    data, samplerate = import_song(path_to_song)
    song_onset = librosa.onset.onset_strength(y=data, sr=samplerate)
    tempo = librosa.feature.tempo(onset_envelope=song_onset, sr=samplerate)
    return tempo

def get_song_pitches(path_to_song):
    data, samplerate = import_song(path_to_song)
    pitches, magnitudes = librosa.piptrack(y=data, sr=samplerate)
    return pitches, magnitudes

def get_max_pitches(pitches, magnitudes, number_of_notes, time=30):
    initial_chord = magnitudes[:, time]
    max_index = np.argpartition(initial_chord, -number_of_notes)[-number_of_notes:]
    max_pitches = pitches[max_index, time]
    scale_to_fourth_octave(max_pitches)
    return np.sort(max_pitches)

'''
All I care about is predicting the chord accurately, not the octave information
I scale each note to be in the 4th octave (440 - 800 Hz), this allows:
1) More samples
2) More accurate predictions using KNN since each chord is now 1 cluster as opposed to 1 cluster per octave.
For instance, without this, if we get an A6 chord it's nearest neighbor would be G5,
it would not be able to use A3, A4, A5, etc information.
3) Overtones get scaled down as well

For example, a C triad which is C (522 Hz) E (660 Hz) G (784 Hz) may have overtones that are factors of these.
I.E. the three dominant pitches could be 522 Hz, 784 Hz, and 1044 Hz. This will at least allow me to scale the
over tones down to the proper range
'''

def scale_to_fourth_octave(pitches):
    scale_low_notes_up(pitches)
    scale_high_notes_down(pitches)

def scale_low_notes_up(pitches):
    if 0 not in pitches:
        scale_up_factor = np.floor(np.log2(440 / pitches))
        scale_up_factor[scale_up_factor < 0] = 0
        pitches *= 2 ** scale_up_factor

def scale_high_notes_down(pitches):
    if 0 not in pitches:
        scale_down_factor = np.ceil(np.log2(pitches / 880))
        scale_down_factor[scale_down_factor < 0] = 0
        pitches /= 2 ** scale_down_factor



In [65]:
NOTE_COUNT = 4
PATH_TO_AUDIO_DATA = "C://Users//Arthur//Desktop//audio_augmented_x10"

'''
Data source: https://zenodo.org/records/5217057

File data has the naming convention:
- 3 octaves (3, 4, 5).
- 12 base notes per octave: Cn, Df, Dn, Ef, En, Fn, Gf, Gn, Af, An, Bf, Bn. (n is natural, f is flat).
- 4 triad types per note: major (j), minor (n), diminished (d), augmented (a). No inversions.
- 3 volumes per triad: forte (f), metsoforte (m), piano (p).
- Metadata is in the name of the chord. For example: "piano_4_Af_d_m_45.wav" is a piano chord, (4) 4th octave,
(Af) A flat base note, (d) diminished, (m) metsoforte, 45th example.
'''

MAJOR_FILE_NAME_SHORTHAND = "_j_"
MINOR_FILE_NAME_SHORTHAND = "_n_"

def initialize_model():
    training_labels, training_pitches, validation_labels, validation_pitches = split_training_validation_data()

    model = neighbors.KNeighborsClassifier(n_neighbors=4)
    model.fit(training_pitches, training_labels)
    validate_model(model, validation_labels, validation_pitches)
    return model

def split_training_validation_data():
    training_labels, training_pitches, validation_labels, validation_pitches = [], [], [], []

    for file in os.listdir(PATH_TO_AUDIO_DATA):
        # Focus on major/minor for now - soon will remove this and train on entire dataset (diminished, augmented)
        if chord_is_major_or_minor(file):
            if file_is_training(file):
                append_data(training_labels, training_pitches, file)
            else:
                append_data(validation_labels, validation_pitches, file)

    return training_labels, training_pitches, validation_labels, validation_pitches

def chord_is_major_or_minor(file_name):
    return MAJOR_FILE_NAME_SHORTHAND or MINOR_FILE_NAME_SHORTHAND in file_name

# Want to save some data for verification.
# 100 samples per chord type, so we train on the first 80 and save the last 20 for validation.
def file_is_training(file_name):
    return get_file_number(file_name) <= 80

def get_file_number(file_name):
    return int(file_name[-6:-4])

def append_data(labels, pitches_data, file):
    chord = get_chord_name(file)
    pitches, magnitudes = get_song_pitches(f"{PATH_TO_AUDIO_DATA}//{file}")
    max_pitches = get_max_pitches(pitches, magnitudes, NOTE_COUNT)
    pitches_data.append(max_pitches)
    labels.append(chord)

def get_chord_name(file_name):
    chord_root = file_name[8:10]
    chord_type = assign_chord_type(file_name)
    chord_name = f"{chord_root} {chord_type}"
    return chord_name

def assign_chord_type(file_name):
    if MAJOR_FILE_NAME_SHORTHAND in file_name:
        chord_type = "Major"
    else:
        chord_type = "Minor"
    return chord_type

def validate_model(model, validation_labels, validation_pitches):
    predictions = model.predict(validation_pitches)
    print(f"accuracy is {accuracy_score(validation_labels, predictions)}")
    print(f"F1 is {f1_score(validation_labels, predictions, average='micro')}")

In [66]:
trained_model = initialize_model()

accuracy is 0.9885477582846004
F1 is 0.9885477582846004


In [67]:
import math
def get_prediction(pitches, magnitudes, time_stamp):
    max_pitches = get_max_pitches(pitches, magnitudes, NOTE_COUNT, time_stamp)
    chord_prediction = trained_model.predict([max_pitches])
    return chord_prediction[0]

def test_song(song_path):
    pitches, magnitudes = get_song_pitches(song_path)
    song_length = len(pitches[0])

    song_tempo_int = song_length // 3 # Need a more sophisticated way of finding chord changes
                                      # good enough while I figure out the 3 chord progression samples
    predictions = []

    for time_stamp in range(0, song_length - 3, song_tempo_int):
        prediction = get_prediction(pitches, magnitudes, time_stamp)
        predictions.append(prediction)
    print(predictions)

# C major, E minor, G major on each downbeat (3 beats), 120 bpm
# Predicts the C triad as A flat
# A flat contains similar notes so needs some refinement
test_song("Songs/C_Em_G_progression.wav")

# A minor, C major, E minor on each downbeat (3 beats), 120 bpm
test_song("Songs/Am_C_Em_progression.wav")


['Af Major', 'En Minor', 'Gn Major']
['An Minor', 'Cn Major', 'En Minor']
