In [8]:
import time
import glob
from IPython.display import Audio
import logging
from concurrent.futures import ThreadPoolExecutor
from joblib import Parallel, delayed

from Freesound.data import *
from Freesound.utils import *
from Freesound.model import *
from Freesound.augmentations import *

import numpy as np
import pandas as pd
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import StratifiedKFold, train_test_split

import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR, CyclicLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

%reload_ext autoreload
%autoreload 2

seed_everything(0)
logging.basicConfig(level=logging.DEBUG, filename="logs/logs.log", filemode="w+")

In [9]:
dataset_dir = "/src/workspace/data/files/"
train_dataset_dir = os.path.join(dataset_dir, "Training_Data/")

In [10]:
X = sorted(glob.glob(os.path.join(train_dataset_dir, '**/*.wav'), recursive=True))
y = np.array([1 if "human" in i else 0 for i in X])
X = pd.DataFrame(X)

In [11]:
train_transform = transforms.Compose([
    #RandomParameter(VTLP, [[0.8, 1.2]], p=0.5),
    #MinMaxChunkScaler(),
    #Normalize(),
    #RandomParameter(RandomNoise, [[0.01, 0.1]]),
    #RandomParameter(Shift, [[2000, 32000]]),
    #RandomParameter(TimeStretch, [[0.75, 1.3]]),
    #RandomParameter(PitchShift, [[-8, 8]]),
    #RandomParameter(Distortion, [[-1, -0.3], [.3, 1.]]),
    ToMellSpec(n_mels=128),
    #GetMFCC(),
    #PadOrClip(300),
    #Normalize(),
    #ToTensor(),
    #transforms.ToTensor(),
])

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [13]:
train_dataset = Dataset_Train(X_train, y_train, train_transform)
val_dataset = Dataset_Train(X_val, y_val, train_transform)

In [14]:
def precalculate_and_save_one_epoch(folder, run, dataset):
    for sample in tqdm(range(len(dataset))):
        k = dataset[sample][0]
        name = f"{run}_{sample}"
        path = os.path.join(folder, name)
        np.save(path, k)

In [15]:
def save_sample(sample, dataset, run, folder):
    k = dataset[sample][0]
    name = f"{run}_{sample}"
    path = os.path.join(folder, name)
    np.save(path, k)

In [16]:
def precalculate_and_save_one_epoch_parallel(run, folder, dataset):
    before = time.time()
    with ThreadPoolExecutor(max_workers=16) as executor:
        for sample in range(len(dataset)):
            future = executor.submit(save_sample, sample, dataset, run, folder)
    
    print(time.time() - before)

In [17]:
def precalculate_and_save_one_epoch_joblib(run, folder, dataset):
    Parallel(n_jobs=16, verbose=5, backend="multiprocessing")(delayed(save_sample)(sample, dataset, run, folder)
                        for sample in range(len(dataset)))

In [18]:
!rm -rf ../data/files/raw_mels

In [19]:
!mkdir ../data/files/raw_mels
!mkdir ../data/files/raw_mels_val

In [20]:
for num in range(0, 1):
    precalculate_and_save_one_epoch_joblib(num, "../data/files/raw_mels", train_dataset)
    precalculate_and_save_one_epoch_joblib(num, "../data/files/raw_mels_val", val_dataset)

[Parallel(n_jobs=16)]: Using backend MultiprocessingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  80 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 620 tasks      | elapsed:    2.9s
[Parallel(n_jobs=16)]: Done 1376 tasks      | elapsed:    5.9s
[Parallel(n_jobs=16)]: Done 2348 tasks      | elapsed:    9.4s
[Parallel(n_jobs=16)]: Done 3536 tasks      | elapsed:   13.9s
[Parallel(n_jobs=16)]: Done 4940 tasks      | elapsed:   19.0s
[Parallel(n_jobs=16)]: Done 6560 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done 8396 tasks      | elapsed:   31.4s
[Parallel(n_jobs=16)]: Done 10448 tasks      | elapsed:   39.1s
[Parallel(n_jobs=16)]: Done 12716 tasks      | elapsed:   47.6s
[Parallel(n_jobs=16)]: Done 15200 tasks      | elapsed:   57.2s
[Parallel(n_jobs=16)]: Done 17900 tasks      | elapsed:  1.1min
[Parallel(n_jobs=16)]: Done 20816 tasks      | elapsed:  1.3min
[Parallel(n_jobs=16)]: Done 23948 tasks      | elapsed:  1.5min
[Parallel(n_jobs=16)]: Don