In [None]:
%load_ext autoreload
%autoreload 2
import datasets
import sys
sys.path.insert(0, '..')
from IPython.display import Audio
from torch.utils.data import DataLoader
from data.datasets import preprocess_dataset, BaseDataset, Collator
from data.tokenizer import Tokenizer
import data.augmentations as augs
from torchaudio import transforms
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
!conda install -y -c conda-forge ipywidgets

In [None]:
def replace_file(sample):
    idx = sample['file'].split('/')[-1]
    sample['file'] = f'../flac/{idx}'
    return sample

In [None]:
dataset = datasets.load_from_disk('librespeech-1k')
tokenizer = Tokenizer(dataset, True)
datasets.set_caching_enabled(False)
dataset = dataset.map(replace_file)
preprocessed = dataset.map(lambda b: preprocess_dataset(b, tokenizer),
                                               batched=True, remove_columns=['chapter_id', 'file', 'speaker_id'],
                                               num_proc=2)

In [None]:
wav_transform = augs.Compose(
    augs.RandomApply(augs.AddNoise()),
    augs.RandomApply(augs.PitchShift(sr=16000), p=0.3),
    augs.RandomApply(augs.TimeStretch(), p=0.3)
)

mel_transform = augs.MelTransform(
            sample_rate=16000,
            n_fft=1024,
            win_length=1024,
            hop_length=256)

collator = Collator(wav_transform, mel_transform)

In [None]:
ds = BaseDataset(preprocessed)
loader = DataLoader(ds, batch_size=10, collate_fn=collator, num_workers=3, pin_memory=True)

In [None]:
batch = next(iter(loader))

In [None]:
batch.keys()

In [None]:
batch['mels'].shape

In [None]:
batch['mel_len']

In [None]:
plt.figure(figsize=(20, 5))
plt.xlabel('Time', size=20)
plt.ylabel('Frequency (Hz)', size=20)
plt.imshow(batch['mels'][4].clamp(1e-5).log())

In [None]:
import librosa

In [12]:
import wandb
from utils.logging import Logger

In [13]:
wandb.login()

True

In [15]:
logger = Logger(
    project='dla_hw1',
    config={
        'lr': 0.01,
        'try': 1
    }
)

{'project': 'dla_hw1', 'config': {'lr': 0.01, 'try': 1}}


In [16]:
logger.log({'acc': 1})

In [17]:
logger.set_summary({'best_score': 0.8, 'med': 0.9})

In [19]:
import torch
model = torch.nn.Linear(12, 13)

In [23]:
logger.watch(models=model)

In [24]:
logger.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁

0,1
acc,1.0
best_score,0.8
med,0.9


In [1]:
import datasets

In [None]:
dataset = datasets.load_dataset("librispeech_asr", 'clean', split='train.360')