In [None]:
%load_ext autoreload
%autoreload 2

import os, sys
import glob, pickle, yaml

PROJECT_DIR = os.path.dirname(os.getcwd())
sys.path.append(PROJECT_DIR)

import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import librosa
import librosa.display

from dataloader import make_supervised_dataset
from train_utils import make_supervised_model, make_supervised_dataset_from_config
from preprocessing import F0LoudnessPreprocessor, MidiF0LoudnessPreprocessor
from timbre_transfer import transfer_timbre_from_path

from feature_extraction import *
import utilities

def print_plot_play(x, Fs=16000, text='', normalize=False):
    import IPython.display as ipd
    print('%s\n' % (text))
    print('Fs = %d, x.shape = %s, x.dtype = %s' % (Fs, x.shape, x.dtype))
    plt.figure(figsize=(8, 2))
    plt.plot(x, color='gray')
    plt.xlim([0, x.shape[0]])
    plt.xlabel('Time (samples)')
    plt.ylabel('Amplitude')
    plt.tight_layout()
    plt.show()
    ipd.display(ipd.Audio(data=x, rate=Fs, normalize=normalize))

In [None]:
with open('../configs/Supervised_Latent_Violin_Timesteps.yaml', 'r') as file:
    config = dict(yaml.load(file, Loader=yaml.FullLoader))

In [None]:
input_title = "singing.mp3" 
input_path = os.path.join("../audio_clips", '{}'.format(input_title))
Fs = 16000
track = utilities.load_track(input_path,
                             sample_rate=Fs,
                             pitch_shift=2)

In [None]:
features = process_track(track, audio_length=8 ,mfcc=True, log_mel=True, mfcc_nfft=512)

In [None]:
for k, v in features.items():
    print(k, v.shape)

In [None]:
preprocessor = F0LoudnessPreprocessor(timesteps=500)

In [None]:
features.update(preprocessor(features))

In [None]:
for k, v in features.items():
    print(k, v.shape)

In [None]:
features['loudness_db']+3

In [None]:
frames = frame_generator(track, 4*Fs)

In [None]:
frames.shape

In [None]:
features = extract_features_from_frames(frames, mfcc=True, log_mel=True)

In [None]:
for k, v in features.items():
    print(k, v.shape)

In [None]:
features = feature_extractor(frames[0], mfcc=True, log_mel=True)

In [None]:
features.keys()

In [None]:
features['audio'].shape

In [None]:
features['f0_hz'].shape

In [None]:
features['loudness_db'].shape

In [None]:
features['mfcc'].shape

In [None]:
features['log_mel'].shape

In [None]:
train, val, _ = make_supervised_dataset('../audio_clips/Violin_short', mfcc=True)

In [None]:
for batch in train:
    break

In [None]:
for k, v in batch.items():
    print(k, v.shape)

In [None]:
for k,v in batch.items():
    print(k, v.shape)