In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
import sys, os
sys.path.append('..')

import numpy as np
import librosa as lr
import torch
import IPython.display as ipd
import matplotlib.pyplot as plt
import pytorch_lightning as pl

from scipy.signal.windows import hann
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import LearningRateMonitor, EarlyStopping

from datasets.nsynth_datamodule import NsynthDataModule
from models.cvae_resnet import CvaeResnet
from models.cvae_inception import CvaeInception

pl.seed_everything(42)

Global seed set to 42


42

In [30]:
### CONFIGS

train_configs = {
    'descr': '1pitch_1gpu',
    'num_workers': 16,
    'batch_size': 28,
    'max_epochs': 5000,
    'patience': 100,
    'trainer_kwargs': {
        'gpus': '0',
        'accelerator': None,
        'num_nodes': 1,
        'precision': 32
    }
}

ds_configs = {
    'dataset_path': '/data/riccardo_datasets',
    'feature': 'spec',
    'feature_params': {
        'win_length': 256,
        'hop_length': 64,
        'window': hann(256).tolist()
    },
    'n_fft': 510,
    'ds_kwargs': {
        'pitches': [60],
        #'instrument_families': [0],
        'sr': 16000,
        'duration': 1.02
    }
}

m_configs_incept = {
    'lr': 5e-4,
    'lr_scheduler': {
        'factor': 10**(-1/4), 
        'patience': 50,
        'cooldown': 20,
        'min_lr': 1e-5
    },
    'c_labels': ['pitch'],
    'kl_coeff': 1e-4,
    'db_coeff': 1e-4,
    'latent_size': 32,
    'channel_size': 2,
    'use_inception': True,
    'repeat_per_block': 1,
    'dense_size': 256,
}

configs = {
    'train': train_configs,
    'dataset': ds_configs,
    'model': m_configs_incept
}

In [31]:
import json
with open('../configs/test_1gpu.json', 'w') as fp:
    json.dump(configs, fp, indent=2)

In [10]:
# init model
model = CvaeInception(m_configs_incept)

In [11]:
# logger
log_name = '{}_{}'.format(CvaeInception.model_name, )
logger = TensorBoardLogger('logs', name=log_name)

In [12]:
# init data loader
dm = NsynthDataModule(ds_configs, num_workers=num_workers, batch_size=batch_size)
dm.setup()

Caching data: /data/riccardo_datasets/nsynth-train/examples.json
Data: (289205, 13)
Caching data: /data/riccardo_datasets/nsynth-valid/examples.json
Data: (12678, 13)
Caching data: /data/riccardo_datasets/nsynth-test/examples.json


  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore


Data: (4096, 13)


In [11]:
# callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=patience)
lr_monitor = LearningRateMonitor(logging_interval='epoch')

# train!
trainer = pl.Trainer(
#    weights_summary='full',
#    overfit_batches=1,
#    terminate_on_nan=False,
#    gradient_clip_val=0.5,
    max_epochs=max_epochs,
    callbacks=[early_stop, lr_monitor],
    logger=logger,
    **configs['train']['trainer_kwargs'])

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


In [None]:
trainer.fit(model=model, datamodule=dm)

Global seed set to 42
