# Minimum about Docker

## https://github.com/markovka17/dl-start-pack

# About W&B

In [None]:
import wandb
import torch
from torch import nn

## https://www.wandb.com/

Create free account and login

In [None]:
!wandb login <token>

In [None]:
# Init wandb
config = {
    'a': 10,
    'b': 20,
    'batch_size': 8
}

wandb.init(project="axial-transformer",
           config=config)

In [None]:
model = nn.Sequential(
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

wandb.watch(model)

In [None]:
wandb.log(...)

In [None]:
wandb.log({'accuracy': 0.9})

In [None]:
wandb.log({'accuracy': 0.95})

In [None]:
wandb.log({"Audio": wandb.Audio('../week01/audio.wav', sample_rate=22050, caption='Example')})

In [None]:
torch.save(model.state_dict(), 'model.pt')

In [None]:
wandb.save('model.pt')

# Augmentations

    1) Gaussian Noise
    2) Time Stretching
    3) Pitch Shifting
    4) Volume
    5) Impulse Response
        - https://www.acousticalsurfaces.com/acoustic_IOI/reverberation.htm
        - https://www.sonic-shield.com/echo-vs-reverberation
        - https://en.wikipedia.org/wiki/Convolution_reverb
        - https://danielpovey.com/files/2017_icassp_reverberation.pdf
    6) Noising with diff audio
        - https://medium.com/analytics-vidhya/adding-noise-to-audio-clips-5d8cee24ccb8
        - https://arxiv.org/pdf/1808.00563.pdf (3.1)
    7) SpecAug (Time/Freq masking, Cutout)
        - https://arxiv.org/pdf/1904.08779.pdf

In [None]:
import torchaudio

from IPython import display as display_

%pylab inline

In [None]:
wav, sr = torchaudio.load('../week01/audio.wav')
wav.squeeze_()

# Ground Truth 

In [None]:
def viz(wav):
    figsize(20, 5)
    plot(wav)
    plt.show()

    display_.display(display_.Audio(wav, rate=22050, normalize=False))

viz(wav)

# + Gaussian Noise

In [None]:
from torch import distributions

In [None]:
noiser = distributions.Normal(0, 0.05)

In [None]:
wav_2 = wav + noiser.sample(wav.size())
wav_2.clamp_(-1, 1)

In [None]:
viz(wav_2)

# + Time Stretching

In [None]:
# only for spectrograms
torchaudio.transforms.TimeStretch

# audio -> spec -> TimeStretch -> GriffinLim

In [None]:
import librosa

In [None]:
wav_3 = librosa.effects.time_stretch(wav.numpy(), 0.7)
wav_3 = torch.from_numpy(wav_3)

In [None]:
viz(wav_3)

# + Pitch Shifting

In [None]:
wav_4 = librosa.effects.pitch_shift(wav.numpy(), 22050, -5)

In [None]:
viz(wav_4)

# + Volume

In [None]:
voler = torchaudio.transforms.Vol(.1)

In [None]:
wav_5 = voler(wav)

In [None]:
viz(wav_5)

# + Impulse Response

In [None]:
rir, sr = torchaudio.load('/Users/markaa/MITIR/mitir_16kHz/h001_Bedroom_65txts.wav')

In [None]:
plot(rir.squeeze())

In [None]:
display_.Audio(rir, rate=sr)

In [None]:
import torch.nn.functional as F

def simulate(audio: torch.Tensor, rir: torch.Tensor):
    left_pad = right_pad = rir.shape[-1] - 1
    
    # Since torch.conv do cross-correlation (not convolution) we need to flip kernel
    flipped_rir = rir.squeeze().flip(0)

    audio = F.pad(audio, [left_pad, right_pad]).view(1, 1, -1)
    convolved_audio = torch.conv1d(audio, flipped_rir.view(1, 1, -1)) \
        .squeeze()
    
    # peak normalization
    if convolved_audio.abs().max() > 1:
        convolved_audio /= convolved_audio.abs().max()

    return convolved_audio

In [None]:
wav_6 = simulate(wav, rir)

In [None]:
viz(wav_6)

Compare with orig

In [None]:
viz(wav)

# + Noising with diff audio

In [None]:
filename = librosa.ex('trumpet')
y, sr = librosa.load(filename)

noise = y

viz(noise)

In [None]:
noize_level = torch.Tensor([1])  # [0, 40]

noize_energy = torch.norm(torch.from_numpy(noise))
audio_energy = torch.norm(wav)

alpha = (audio_energy / noize_energy) * torch.pow(10, -noize_level / 20)

# sample sub wave (but not now)
wav = wav[:noise.shape[0]]

wav_7 = wav + alpha * torch.from_numpy(noise)
wav_7.clamp_(-1, 1)

In [None]:
viz(wav_7)

# + SpecAug (Time/Freq masking, Cutout)

In [None]:
mel_spectrogramer = torchaudio.transforms.MelSpectrogram(
    sample_rate=22050,
    n_fft=1024,
    win_length=1024,
    hop_length=256,
    f_min=0,
    f_max=8000,
    n_mels=80,
)

mel_spectrogram = mel_spectrogramer(wav)
log_mel = torch.log(mel_spectrogram)

In [None]:
imshow(log_mel)

In [None]:
freq_masker = torchaudio.transforms.FrequencyMasking(40)

In [None]:
mel_spectrogram = mel_spectrogramer(wav)
log_mel = torch.log(mel_spectrogram)
imshow(freq_masker(log_mel))

In [None]:
time_masker = torchaudio.transforms.TimeMasking(100, True)
log_mel = torch.log(mel_spectrogram)
imshow(time_masker(log_mel))