In [89]:
import librosa
import librosa.feature
import matplotlib.pyplot as plt
import os
from sklearn import neighbors
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import numpy as np

In [90]:
def plot_song(data):
    plt.plot(data)
    plt.xlim([0, len(data)])
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    plt.show()

In [91]:
def import_song(path_to_song):
    data, samplerate = librosa.load(path_to_song)
    return data, samplerate

def show_song(path_to_song):
    data, samplerate = import_song(path_to_song)
    plot_song(data)

def get_song_tempo(path_to_song):
    data, samplerate = import_song(path_to_song)
    song_onset = librosa.onset.onset_strength(y=data, sr=samplerate)
    tempo = librosa.feature.tempo(onset_envelope=song_onset, sr=samplerate)
    return tempo

def get_song_pitches(path_to_song):
    data, samplerate = import_song(path_to_song)
    pitches, magnitudes = librosa.piptrack(y=data, sr=samplerate)
    return pitches, magnitudes

def get_max_pitches(pitches, magnitudes, number_of_notes, time=30):
    initial_chord = magnitudes[:, time]
    max_index = np.argpartition(initial_chord, -number_of_notes)[-number_of_notes:]
    max_pitches = pitches[max_index, time]
    return np.sort(max_pitches)

In [94]:
# Can get away with 3 for Triads, but I made this number is higher
# in case I want to expand to predict 7th, 9ths, and more complex chords
NOTE_COUNT = 5

'''
Data source: https://zenodo.org/records/5217057

File data has the naming convention:
- 3 octaves (3, 4, 5).
- 12 base notes per octave: Cn, Df, Dn, Ef, En, Fn, Gf, Gn, Af, An, Bf, Bn. (n is natural, f is flat).
- 4 triad types per note: major (j), minor (n), diminished (d), augmented (a). No inversions.
- 3 volumes per triad: forte (f), metsoforte (m), piano (p).
- Metadata is in the name of the chord. For example: "piano_4_Af_d_m_45.wav" is a piano chord, (4) 4th octave,
(Af) A flat base note, (d) diminished, (m) metsoforte, 45th example.
'''

MAJOR_FILE_NAME_SHORTHAND = "_j_"
MINOR_FILE_NAME_SHORTHAND = "_n_"

PATH_TO_AUDIO_DATA = "C://Users//Arthur//Desktop//audio_augmented_x10"

def initialize_model():
    training_labels, training_pitches, validation_labels, validation_pitches = split_training_validation_data()

    model = neighbors.KNeighborsClassifier(n_neighbors=4)
    model.fit(training_pitches, training_labels)
    validate_model(model, validation_labels, validation_pitches)
    return model

def validate_model(model, validation_labels, validation_pitches):
    predictions = model.predict(validation_pitches)
    print(f"accuracy is {accuracy_score(validation_labels, predictions)}")
    print(f"F1 is {f1_score(validation_labels, predictions, average='micro')}")

def split_training_validation_data():
    training_labels, training_pitches, validation_labels, validation_pitches = [], [], [], []

    for file in os.listdir(PATH_TO_AUDIO_DATA):
        # Focus on major/minor for now - soon will remove this and train on entire dataset (diminished, augmented)
        if chord_is_major_or_minor(file):
            if file_is_training(file):
                append_data(training_labels, training_pitches, file)
            else:
                append_data(validation_labels, validation_pitches, file)

    return training_labels, training_pitches, validation_labels, validation_pitches

def chord_is_major_or_minor(file_name):
    return MAJOR_FILE_NAME_SHORTHAND or MINOR_FILE_NAME_SHORTHAND in file_name

# Want to save some data for verification.
# 100 samples per chord type, so we train on the first 80 and save the last 20 for validation.
def get_file_number(file_name):
    return int(file_name[-6:-4])

def file_is_training(file_name):
    return get_file_number(file_name) <= 80

def append_data(labels, pitches_data, file):
    chord = get_chord_name(file)
    pitches, magnitudes = get_song_pitches(f"{PATH_TO_AUDIO_DATA}//{file}")
    max_pitches = get_max_pitches(pitches, magnitudes, NOTE_COUNT)
    pitches_data.append(max_pitches)
    labels.append(chord)

def get_chord_name(file_name):
    chord_root = file_name[8:10]
    chord_type = assign_chord_type(file_name)
    chord_name = f"{chord_root} {chord_type}"
    return chord_name

def assign_chord_type(file_name):
    if MAJOR_FILE_NAME_SHORTHAND in file_name:
        chord_type = "Major"
    else:
        chord_type = "Minor"
    return chord_type

In [95]:
trained_model = initialize_model()

accuracy is 0.9919590643274854
F1 is 0.9919590643274854


In [107]:
import math

def get_prediction(pitches, magnitudes, time_stamp):
    max_pitches = get_max_pitches(pitches, magnitudes, NOTE_COUNT, time_stamp)
    chord_prediction = trained_model.predict([max_pitches])
    return chord_prediction[0]

def test_song(song_path):
    pitches, magnitudes = get_song_pitches(song_path)
    song_length = len(pitches[0])

    song_tempo = get_song_tempo(song_path)
    song_tempo_int = int(math.floor(song_tempo))

    predictions = []
    for time_stamp in range(0, song_length, song_tempo_int):
        predictions.append(get_prediction(pitches, magnitudes, time_stamp))

    print(predictions)

test_song("Songs/Heartaches By The Number In Ab.wav")

['An Minor', 'Fn Minor', 'Ef Major', 'An Major', 'Df Minor', 'Af Minor', 'Af Minor', 'Gf Minor', 'En Minor', 'Fn Minor', 'Gn Minor', 'Ef Minor', 'Cn Minor', 'En Minor', 'Ef Minor', 'Cn Major', 'Gf Minor', 'En Major', 'Fn Minor', 'Cn Minor', 'Gn Minor', 'Gf Minor', 'Gf Minor', 'Cn Minor', 'Af Minor', 'Df Major', 'Af Major', 'Cn Major', 'Dn Minor', 'Df Major', 'Gf Minor', 'En Major', 'An Major', 'An Major', 'An Minor', 'Bf Minor', 'Ef Major', 'Gn Minor', 'Df Minor', 'Dn Major', 'Cn Minor']
