In [1]:
import soundfile as sf
import shutil
import os

from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, TanhDistortion
import numpy as np
import librosa
import pandas as pd
from tqdm.notebook import tqdm_notebook
from sklearn.model_selection import train_test_split

# Split Training and Test Set

In [18]:
df = pd.read_csv('dataset_metadata/28k_cleaned.csv')

In [10]:
X = df['filename']
y = df['um_label'].to_numpy().reshape(-1,1).astype('float32')

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=14)

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

(array([0., 1.], dtype=float32), array([20590,  4892], dtype=int64))
(array([0., 1.], dtype=float32), array([5171, 1200], dtype=int64))


In [20]:
# transfer clips into train and test folder

def transfer_clips(filename, label, is_test):
    path = 'model_dataset'
    
    if is_test:
        if label == 1:
            shutil.copyfile(f'{path}/clips/{filename}', f'{path}/test/true/{filename}')
        else:
            shutil.copyfile(f'{path}/clips/{filename}', f'{path}/test/false/{filename}')
            
    else:
        if label == 1:
            shutil.copyfile(f'{path}/clips/{filename}', f'{path}/train/true/{filename}')
        else:
            shutil.copyfile(f'{path}/clips/{filename}', f'{path}/train/false/{filename}')

In [23]:
for i, filename in enumerate(X_train):
    transfer_clips(filename, y_train[i], False)

In [22]:
for i, filename in enumerate(X_test):
    transfer_clips(filename, y_test[i], True)

# Augment Audio

In [7]:
augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.95, max_rate=1.10, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    TanhDistortion(p=0.5)
])

In [8]:
def augment_audio(filename, n_output):
    path = 'dataset_clips'
    audio, sr = librosa.load(f'{path}/train/true/{filename}', sr=16000)
    for i in range(n_output):
        augmented_audio = augment(samples=audio, sample_rate=16000)
        sf.write(f'{path}/train/augmented_true/{filename.replace(".wav","")}_{i}.wav', augmented_audio, 16000)

In [9]:
path = 'dataset_clips'
for filename in os.listdir(f'{path}/train/true/'):
    augment_audio(filename, 2)