In [None]:
import sys
sys.path.append('C:\\Users\\ralph\\code\\autoencoded-vocal-analysis')

# preprocessing 

In [None]:
from ava.preprocessing.utils import get_spec # makes spectrograms
from ava.models.vae import X_SHAPE # spectrogram dimensions

preprocess_params = {
    'get_spec': get_spec, # spectrogram maker
    'max_dur': 0.3, # maximum syllable duration
    'min_freq': 500, # minimum frequency
    'max_freq': 62500, # maximum frequency
    'num_freq_bins': X_SHAPE[0], # hard-coded
    'num_time_bins': X_SHAPE[1], # hard-coded
    'nperseg': 512, # FFT
    'noverlap': 256, # FFT
    'spec_min_val': -8, # minimum log-spectrogram value
    'spec_max_val': -5, # maximum log-spectrogram value
    'fs': 125000, # audio samplerate
    'mel': False, # frequency spacing, mel or linear
    'time_stretch': True, # stretch short syllables?
    'within_syll_normalize': False, # normalize spectrogram values on a
                                    # spectrogram-by-spectrogram basis
    'max_num_syllables': None, # maximum number of syllables per directory
    'sylls_per_file': 100, # syllable per file
    'real_preprocess_params': ('min_freq', 'max_freq', 'spec_min_val', \
            'spec_max_val', 'max_dur'), # tunable parameters
    'int_preprocess_params': ('nperseg','noverlap'), # tunable parameters
    'binary_preprocess_params': ('time_stretch', 'mel', \
            'within_syll_normalize'), # tunable parameters
}

In [None]:
from ava.preprocessing.preprocess import tune_syll_preprocessing_params
audio_dirs = ['cohort2_combined_audio',
              'cohort4_combined_audio',
              'cohort5_combined_audio'] # directories containing audio

seg_dirs = ['cohort2_segments',
            'cohort4_segments',
            'cohort5_segments']

preprocess_params = tune_syll_preprocessing_params(audio_dirs, seg_dirs, \
                preprocess_params)

In [None]:
spec_dirs = ['cohort2_specs',
             'cohort4_specs',
             'cohort5_specs']
    
from ava.preprocessing.preprocess import process_sylls
from joblib import Parallel, delayed
from itertools import repeat

gen = zip(audio_dirs, seg_dirs, spec_dirs, repeat(preprocess_params))
Parallel(n_jobs=-1)(delayed(process_sylls)(*args) for args in gen)

# train VAE 

In [None]:
from ava.models.vae_dataset import get_syllable_partition
from ava.models.vae_dataset import get_syllable_data_loaders
from ava.models.vae import VAE

In [None]:
# Directories containing saved spectrograms (hdf5s)
# Define directories.
spec_dirs = ['cohort2_specs',
             'cohort4_specs',
             'cohort5_specs']

split = 0.8 # 80/20 train/test split

# Construct a random train/test partition.
from ava.models.vae_dataset import get_syllable_partition
partition = get_syllable_partition(spec_dirs, split)

# Make Dataloaders.
from ava.models.vae_dataset import get_syllable_data_loaders
loaders = get_syllable_data_loaders(partition )

In [None]:
# Construct network.
from ava.models.vae import VAE
save_dir = 'models'
model = VAE(save_dir=save_dir, model_precision=40.0)

# Train.
model.train_loop(loaders, epochs=51, save_freq=5)