In [1]:
import time
import glob
from IPython.display import Audio
import logging
from concurrent.futures import ThreadPoolExecutor
from joblib import Parallel, delayed
import time
import logging
from IPython.display import Audio

from IDRnD.utils import *
from IDRnD.augmentations import *
from IDRnD.dataset import *
from IDRnD.resnet import resnet50
from IDRnD.focalloss import FocalLoss

import numpy as np
import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR, CyclicLR, ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

from sklearn.model_selection import StratifiedKFold, train_test_split

%reload_ext autoreload
%autoreload 2
%matplotlib inline

seed_everything(0)
logging.basicConfig(level=logging.DEBUG, filename="logs/logs.log",
                    filemode="w+")

In [2]:
X, y = get_train_data()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
train_transform = transforms.Compose([
    #RandomParameter(VTLP, [[0.8, 1.2]], p=0.5),
    #MinMaxChunkScaler(),
    #Normalize(),
    #RandomParameter(RandomNoise, [[0.01, 0.1]]),
    #RandomParameter(Shift, [[2000, 32000]]),
    #RandomParameter(TimeStretch, [[0.75, 1.3]]),
    #RandomParameter(PitchShift, [[-8, 8]]),
    #RandomParameter(Distortion, [[-1, -0.3], [.3, 1.]]),
    ToMellSpec(n_mels=128),
    #GetMFCC(),
    #PadOrClip(300),
    #Normalize(),
    #ToTensor(),
    #transforms.ToTensor(),
])

In [4]:
train_dataset = Base_Dataset(X_train, y_train, train_transform)
val_dataset = Base_Dataset(X_val, y_val, train_transform)

In [6]:
def precalculate_and_save_one_epoch(folder, run, dataset):
    for sample in tqdm(range(len(dataset))):
        k = dataset[sample][0]
        name = f"{run}_{sample}"
        path = os.path.join(folder, name)
        np.save(path, k)

In [7]:
def save_sample(sample, dataset, run, folder):
    k = dataset[sample][0]
    name = f"{run}_{sample}"
    path = os.path.join(folder, name)
    np.save(path, k)

In [8]:
def precalculate_and_save_one_epoch_parallel(run, folder, dataset):
    before = time.time()
    with ThreadPoolExecutor(max_workers=16) as executor:
        for sample in range(len(dataset)):
            future = executor.submit(save_sample, sample, dataset, run, folder)
    
    print(time.time() - before)

In [9]:
def precalculate_and_save_one_epoch_joblib(run, folder, dataset):
    Parallel(n_jobs=16, verbose=5, backend="multiprocessing")(delayed(save_sample)(sample, dataset, run, folder)
                        for sample in range(len(dataset)))

In [10]:
!rm -rf ../data/files/raw_mels
!rm -rf ../data/files/raw_mels_val

In [11]:
!mkdir ../data/files/raw_mels
!mkdir ../data/files/raw_mels_val

In [12]:
for num in range(0, 1):
    precalculate_and_save_one_epoch_joblib(num, "../data/files/raw_mels", train_dataset)
    precalculate_and_save_one_epoch_joblib(num, "../data/files/raw_mels_val", val_dataset)

[Parallel(n_jobs=16)]: Using backend MultiprocessingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done 112 tasks      | elapsed:    0.4s
[Parallel(n_jobs=16)]: Done 1012 tasks      | elapsed:    1.9s
[Parallel(n_jobs=16)]: Done 2272 tasks      | elapsed:    4.2s
[Parallel(n_jobs=16)]: Done 3892 tasks      | elapsed:    7.3s
[Parallel(n_jobs=16)]: Done 5872 tasks      | elapsed:   11.3s
[Parallel(n_jobs=16)]: Done 8212 tasks      | elapsed:   15.3s
[Parallel(n_jobs=16)]: Done 10912 tasks      | elapsed:   19.8s
[Parallel(n_jobs=16)]: Done 13972 tasks      | elapsed:   25.1s
[Parallel(n_jobs=16)]: Done 17392 tasks      | elapsed:   30.9s
[Parallel(n_jobs=16)]: Done 21127 tasks      | elapsed:   42.4s
[Parallel(n_jobs=16)]: Done 23800 tasks      | elapsed:   49.4s
[Parallel(n_jobs=16)]: Done 27400 tasks      | elapsed:   55.7s
[Parallel(n_jobs=16)]: Done 31288 tasks      | elapsed:  1.0min
[Parallel(n_jobs=16)]: Done 35464 tasks      | elapsed:  1.2min
[Parallel(n_jobs=16)]: 