## Installations and Imports :



In [None]:
!pip install sklearn librosa tensorflow mir_eval

In [2]:
#!/usr/bin/env python3
import argparse

from numpy.lib.npyio import save
from ACR_Training.Models import MLP, MLP_scalered, CRNN, CRNN_1, MLP2RNN, CRNN_2, BassVsThird
from ACR_Training.Datasets import IsophonicsDataset
from ACR_Training.SegmentationModels import SegmentationCRNN, EncoderDecoderSegmentation, colorize_spectrograms, chord_graphical_segmentations 
import sklearn
import sys

In [3]:
#!/usr/bin/env python3
parser = argparse.ArgumentParser()
# Directories, destinations, folders, files
parser.add_argument("--isophonics_audio_directory", default="./Datasets/Isophonics/AUDIO", type=str, help="Path to ISOPHONICS directory with audio files.")
parser.add_argument("--isophonics_annotations_directory", default="./Datasets/Isophonics/ANNOTATIONS", type=str, help="Path to ISOPHONICS directory with chord annotations.")
parser.add_argument("--billboard_audio_directory", default="./Datasets/Billboard/AUDIO", type=str, help="Path to BILLBOARD directory with audio files.")
parser.add_argument("--billboard_annotations_directory", default="./Datasets/Billboard/ANNOTATIONS", type=str, help="Path to BILLBOARD directory with chord annotations.")
parser.add_argument("--isophonics_prep_dest", default="./PreprocessedDatasets/isophonics_new.ds", type=str, help="Preprocessed ISOPHONICS dataset destination.")
parser.add_argument("--billboard_prep_dest", default="./PreprocessedDatasets/billboard_new.ds", type=str, help="Preprocessed BILLBOARD dataset destination.")

# Dataset preprocessing args
parser.add_argument("--dataset", default="isophonics", type=str, help="Dataset we want to preprocess, {isophonics, billboard}")
#           Isophonics
parser.add_argument("--sample_rate", default=44100, type=int, help="Sample rate for each song.")
parser.add_argument("--hop_length", default=512, type=int, help="10*(sample_rate/hop_length) is a number of miliseconds between two frames.")
parser.add_argument("--window_size", default=8, type=int, help="Spectrograms on left, and also spectrogram on right of the time bin -> window_size*2 + 1 spectrograms grouped together.")
parser.add_argument("--flattened_window", default=False, type=bool, help="Whether the spectrogram window should be flatten to one array or it sould be array of spectrograms.")
parser.add_argument("--ms_intervals", default=430.6640625, type=float, help="Miliseconds between generated spectrograms.")
parser.add_argument("--to_skip", default=10, type=int, help="How many spectrogram we want to skip when creating spectrogram window.")
parser.add_argument("--norm_to_C", default=True, type=bool, help="Whether we want to transpose all songs to C key (or D dorian, .. A minor, ...)")
parser.add_argument("--spectrogram_type", default="cqt", type=str, help="Spectrogram types, {cqt,log_mel}")
#           Billboard
parser.add_argument("--n_frames", default=1000, type=int, help="Length of song subsequence we are consinder when predicting chords to keep some context.")

# Training args
parser.add_argument("--test_size", default=0.3, type=lambda x:int(x) if x.isdigit() else float(x), help="Test set size.")
parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")


args = parser.parse_args([] if "__file__" not in globals() else None)

# DEMO

In [4]:
import numpy as np
from ACR_Training.Models import MLP_scalered
from ACR_Training.Spectrograms import log_mel_spectrogram
from ACR_Pipeline.KeyRecognizer import KeyRecognizer
from ACR_Pipeline.DataPreprocessor import DataPreprocessor
from ACR_Pipeline.ChordVoter import ChordVoter

def pipeline(waveform, sample_rate, hop_length, window_size, spectrogram_type):

    # Load models
    basic_mlp = MLP_scalered.load('./ACR_Pipeline/models/basic_mlp.model')
    C_transposed_mlp = MLP_scalered.load('./ACR_Pipeline/models/C_transposed_mlp.model')



    # Preprocess Data
    x = DataPreprocessor.flatten_preprocess(
        waveform=waveform,
        sample_rate=sample_rate,
        hop_length=hop_length,
        window_size=window_size,
        spectrogram_generator=spectrogram_type,
        norm_to_C=False
    )

    # Get list of played chords
    baisc_chord_prediction = basic_mlp.predict(x)
    chords, counts = np.unique(baisc_chord_prediction, return_counts=True)
    chord_counts = dict(zip(chords, counts))

    # Get song's key (not really tonic, A minor/ailoian is same as a C major or D dorian)
    key = KeyRecognizer.estimate_key(chord_counts)

    # Tranapose Song to a C major
    x_transposed = DataPreprocessor.flatten_preprocess(
        waveform=waveform,
        sample_rate=sample_rate,
        hop_length=hop_length,
        window_size=window_size,
        spectrogram_generator=spectrogram_type,
        norm_to_C=True,
        key=key
    )

    # Get chord sequence of a song
    transposed_chord_prediction = C_transposed_mlp.predict(x_transposed)

    # Chord voting for each beat
    chord_sequence = ChordVoter.vote_for_beats(
        chord_sequence=transposed_chord_prediction,
        waveform=waveform, sample_rate=sample_rate,
        hop_length=hop_length
    )

    # Transpose to the original sequence
    original_chord_sequence = DataPreprocessor.transpose(
        chord_sequence=chord_sequence, 
        from_key = 'C', 
        to_key = key
    )

    return DataPreprocessor.chord_indices_to_notations(original_chord_sequence)

In [6]:
import librosa
from ACR_Training.Spectrograms import log_mel_spectrogram
# Load audio
y, sr = librosa.load('Help!.wav', 44100)

# Predict chords
sequence = pipeline(
    waveform=y,
    sample_rate=sr,
    hop_length=22050,
    window_size=5,
    spectrogram_type=log_mel_spectrogram
)
print()
print()
print(sequence)
print()
print()

  "Empty filters detected in mel frequency basis. "




['A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'B:min', 'B:min', 'A', 'B:min', 'B:min', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'A', 'B:min', 'B:min', 'B:min', 'B:min', 'A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'B:min', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'B:min', 'A', 'A', 'B:min', 'B:min', 'A', 'A', 'A', 'B:min', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B:min', 'A', 'B:min', 'A', 'B:min']




  "Empty filters detected in mel frequency basis. "
