## Update

In [None]:
import cv2
import audioread
import logging
import os
import random
import time
import warnings

import librosa
import librosa.display as display
import numpy as np
import pandas as pd
import soundfile as sf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

from contextlib import contextmanager
from IPython.display import Audio
from pathlib import Path
from typing import Optional, List

from catalyst.dl import SupervisedRunner, State, CallbackOrder, Callback, CheckpointCallback
from fastprogress import progress_bar
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, average_precision_score

In [None]:
pip install audiomentations

In [None]:
pip install acoustics

In [None]:
import acoustics
brown_noise = acoustics.generator.brown(120*48000)
pink_noise = acoustics.generator.pink(120*48000)

# define directories
os.mkdir('../noise')
noise_dir = ROOT / 'noise'

sf.write(noise_dir / "brown_noise.wav", brown_noise, samplerate=48000)
sf.write(noise_dir / "pink_noise.wav", pink_noise, samplerate=48000)

In [None]:
PERIOD = 10

from audiomentations import Compose, AddGaussianNoise
from audiomentations import AddGaussianSNR, Gain
from audiomentations import AddShortNoises, AddBackgroundNoise

augmenter = Compose([AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
                     AddGaussianSNR(min_SNR=0.001, max_SNR=0.5, p=0.5),
                     Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.5),
                     AddBackgroundNoise(sounds_path=noise_dir, min_snr_in_db=3, max_snr_in_db=30, p=0.5),
                     AddShortNoises(noise_dir)
                    ])

# modify with noise
class PANNsDataset(data.Dataset):
    def __init__(
            self,
            file_list: List[List[str]],
            phase):
        self.file_list = file_list  # list of list: [file_path, ebird_code]
        self.phase = phase

    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx: int):
        wav_path, target, t_begin, t_end = self.file_list[idx]
        wav_path = str(TRAIN_AUDIO_ROOT / wav_path) + ".flac"

        y = augmenter(samples=y, sample_rate=48000).astype(np.float32)

        return {"waveform": y}