In [1]:
import numpy as np
import medleydb as mdb
from medleydb import download
import librosa
import os
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:
def get_hcqt_params():
    bins_per_octave=120
    n_octaves = 5
    harmonics = [1, 2, 3, 4, 5, 6]
    sr = 22050
    fmin = 32.7
    hop_length = 128
    return bins_per_octave, n_octaves, harmonics, sr, fmin, hop_length

In [3]:
def compute_hcqt(audio_fpath):
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    y, fs = librosa.load(audio_fpath, sr=sr)

    cqt_list = []
    shapes = []
    for h in harmonics:
        cqt = librosa.cqt(
            y, sr=fs, hop_length=hop_length, fmin=f_min*float(h),
            n_bins=bins_per_octave*n_octaves,
            bins_per_octave=bins_per_octave
        )
        cqt_list.append(cqt)
        shapes.append(cqt.shape)
    
    shapes_equal = [s == shapes[0] for s in shapes]
    if not all(shapes_equal):
        min_time = np.min([s[1] for s in shapes])
        new_cqt_list = []
        for i, cqt in enumerate(cqt_list):
            new_cqt_list.append(cqt[:, :min_time])
            cqt_list.pop(i)
        cqt_list = new_cqt_list

    log_hcqt = 20.0*np.log10(np.abs(np.array(cqt_list)) + 0.0001)
    log_hcqt = log_hcqt - np.min(log_hcqt)
    log_hcqt = log_hcqt / np.max(log_hcqt)
    return log_hcqt

In [4]:
def get_freq_grid():
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    freq_grid = librosa.cqt_frequencies(
        bins_per_octave*n_octaves, f_min, bins_per_octave=bins_per_octave
    )
    return freq_grid

def get_time_grid(n_time_frames):
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    time_grid = librosa.core.frames_to_time(
        range(n_time_frames), sr=sr, hop_length=hop_length)
    return time_grid

In [5]:
def grid_to_bins(grid, start_bin_val, end_bin_val):
    bin_centers = (grid[1:] + grid[:-1])/2.0
    bins = np.concatenate([[start_bin_val], bin_centers, [end_bin_val]])
    return bins

In [6]:
def create_annotation_target(freq_grid, time_grid, annotation_times, annotation_freqs):

    time_bins = grid_to_bins(time_grid, 0.0, time_grid[-1])
    freq_bins = grid_to_bins(freq_grid, 0.0, freq_grid[-1])

    annot_time_idx = np.digitize(annotation_times, time_bins) - 1
    annot_freq_idx = np.digitize(annotation_freqs, freq_bins) - 1

    annotation_target = np.zeros((len(freq_grid), len(time_grid)))
    annotation_target[annot_freq_idx, annot_time_idx] = 1

    return annotation_target

In [7]:
def get_all_pitch_annotations(mtrack):
    annot_times = []
    annot_freqs = []
    for stem in mtrack.stems.values():
        data = stem.pitch_annotation
        data2 = stem.pitch_estimate_pyin
        if data is not None:
            annot = data
        elif data2 is not None:
            annot = data2
        else:
            continue

        annot = np.array(annot).T
        annot_times.append(annot[0])
        annot_freqs.append(annot[1])

    annot_times = np.concatenate(annot_times)
    annot_freqs = np.concatenate(annot_freqs)

    return annot_times, annot_freqs

In [8]:
def plot_annot_target(annot_target, hcqt, annot_times, annot_freqs):
    plt.figure(figsize=(15,30))
    
    plt.subplot(3, 1, 1)
    plt.imshow(hcqt, origin='lower')
    plt.axis('auto')
    plt.axis('tight')
    
    plt.subplot(3, 1, 2)
    plt.imshow(annot_target, origin='lower')
    plt.axis('auto')
    plt.axis('tight')

    plt.subplot(3, 1, 3)
    plt.plot(annot_times, annot_freqs, ',')
    plt.axis('tight')

    plt.show()

In [9]:
def get_input_output_pairs(mtrack):
    hcqt = compute_hcqt(mtrack.mix_path)

    freq_grid = get_freq_grid()
    time_grid = get_time_grid(len(hcqt[0][0]))

    annot_times, annot_freqs = get_all_pitch_annotations(mtrack)

    annot_target = create_annotation_target(
        freq_grid, time_grid, annot_times, annot_freqs
    )
    plot_annot_target(annot_target, hcqt[0], annot_times, annot_freqs)
    return hcqt, annot_target

In [10]:
def get_input_output_pairs_solo_pitch(audio_path, annot_times, annot_freqs, plot=False):
    hcqt = compute_hcqt(audio_path)

    freq_grid = get_freq_grid()
    time_grid = get_time_grid(len(hcqt[0][0]))
    annot_target = create_annotation_target(
        freq_grid, time_grid, annot_times, annot_freqs
    )
    if plot:
        plot_annot_target(annot_target, hcqt[0], annot_times, annot_freqs)

    return hcqt, annot_target, freq_grid, time_grid

In [None]:
save_dir = "../output/training_data/"

failed_tracks = [
    'ChrisJacoby_BoothShotLincoln',
    'HezekiahJones_BorrowedHeart',
    'Handel_TornamiAVagheggiar',
    'JoelHelander_Definition',
    'JoelHelander_ExcessiveResistancetoChange',
    'JoelHelander_IntheAtticBedroom'
]

mtracks = mdb.load_all_multitracks(dataset_version=['V1'])
for mtrack in mtracks:
    print(mtrack.track_id)
    
    if mtrack.track_id in failed_tracks:
        continue
    
    stem = mtrack.predominant_stem
    if stem is None:
        continue

    data = stem.pitch_annotation
    save_path = os.path.join(
        save_dir,
        "{}_STEM_{}.npz".format(mtrack.track_id, stem.stem_idx)
    )

    if data is not None:
        print("    > Stem {} {}".format(stem.stem_idx, stem.instrument))
        annot = np.array(data).T
    else:
        continue

    if os.path.exists(save_path):
        one_stem_done = True
        continue

    if not os.path.exists(stem.audio_path):
        print("        >downloading stem...")
        download.download_stem(mtrack, stem.stem_idx)
        print("         done!")

    try:
        data_in, data_out, freq, time = get_input_output_pairs_solo_pitch(
            stem.audio_path, annot[0], annot[1]
        )

        np.savez(save_path, data_in=data_in, data_out=data_out, freq=freq, time=time)
    except:
        print("    > Something failed :(")


AClassicEducation_NightOwl
    > Stem 8 ['male singer']
AimeeNorwich_Child
    > Stem 4 ['female singer']
AimeeNorwich_Flying
    > Stem 4 ['clean electric guitar']
AlexanderRoss_GoodbyeBolero
    > Stem 6 ['male singer']
AlexanderRoss_VelvetCurtain
    > Stem 6 ['male singer']
AmarLal_Rest
    > Stem 1 ['clean electric guitar']
AmarLal_SpringDay1
    > Stem 1 ['acoustic guitar']
Auctioneer_OurFutureFaces
    > Stem 8 ['male singer']
AvaLuna_Waterduct
    > Stem 8 ['male singer']
BigTroubles_Phantom
    > Stem 4 ['male singer']
BrandonWebster_DontHearAThing
    > Stem 2 ['female singer']
BrandonWebster_YesSirICanFly
    > Stem 2 ['male singer']
CelestialShore_DieForUs
    > Stem 1 ['male singer']
ChrisJacoby_BoothShotLincoln
ChrisJacoby_PigsFoot
    > Stem 2 ['mandolin']
ClaraBerryAndWooldog_AirTraffic
    > Stem 8 ['female singer']
ClaraBerryAndWooldog_Boys
    > Stem 6 ['female singer']
ClaraBerryAndWooldog_Stella
    > Stem 7 ['female singer']
ClaraBerryAndWooldog_TheBadGuys
    > S

In [None]:
mtrack = mdb.MultiTrack("MusicDelta_Beatles")
data_input, data_target = get_input_output_pairs(mtrack)