# Pipeline

## Imports

In [1]:
import os
import tensorflow as tf

from preprocessing import preprocess
from dataset import make_dataset
from model import CRNN
from util import *
import random

## Paths

In [2]:
DATA_DIR = "../../data/"
TRAIN_DIR = "train-clean-360/LibriSpeech/train-clean-360/"
VALID_DIR = "dev-clean/LibriSpeech/dev-clean/"
TEST_DIR = "test-clean/LibriSpeech/test-clean/"
NOISE_DIR = "noise/"

PROCESSED_DIR = os.path.join(DATA_DIR, "processed/")
DATASET_DIR = os.path.join(DATA_DIR, "dataset/")
MODEL_DIR = "../models/"

## Parameters

In [3]:
initial_s = 10
s = 5
max_k = 10
sample_rate = 16000
create_dataset = True
samples = [25, 10, 10]

## Preprocessing

In [4]:
if create_dataset:
    input_dirs = [TRAIN_DIR, VALID_DIR, TEST_DIR]
    datasets = ["train", "valid", "test"]

    for i, dataset in enumerate(datasets):
        print(f"Processing {dataset} set")

        input_dir = os.path.join(DATA_DIR, input_dirs[i])
        output_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        preprocess(input_dir, output_dir, initial_s)

Processing train set


100%|██████████| 40/40 [02:43<00:00,  4.09s/it]


Processing valid set


100%|██████████| 20/20 [01:12<00:00,  3.64s/it]


Processing test set


100%|██████████| 20/20 [01:13<00:00,  3.68s/it]


## Creating Dataset

In [5]:
if create_dataset:
    for i, dataset in enumerate(datasets):
        print(f"Processing {dataset} set")

        input_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")
        output_dir = os.path.join(DATASET_DIR, f"{dataset}/")

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        make_dataset(input_dir, output_dir, max_k, samples[i])


Processing train set


100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Processing valid set


100%|██████████| 10/10 [00:02<00:00,  3.63it/s]


Processing test set


100%|██████████| 10/10 [00:02<00:00,  3.67it/s]


## Add Noise for k = 0

In [6]:
if create_dataset:
    noise_files = get_files(os.path.join(DATA_DIR, NOISE_DIR, "audio/"), ".wav")

    speech_categories = ["cafe/restaurant", "grocery_store", "metro_station"]
    noise_files = remove_speech_noise(noise_files, os.path.join(DATA_DIR, NOISE_DIR, "meta.txt"), speech_categories)

    for i, n_samples in enumerate(samples):
        print(f"Processing {datasets[i]} set")
        
        # Select same number of noise samples as 
        # k speaker samples in each dataset
        noise_samples = random.sample(noise_files, n_samples)

        # Remove used samples
        [noise_files.remove(n) for n in noise_samples]

        # Downsample and save noise in dataset directories
        process_noise(DATASET_DIR, datasets[i], noise_samples, sample_rate)


Processing train set


25it [00:15,  1.64it/s]


Processing valid set


10it [00:04,  2.01it/s]


Processing test set


10it [00:04,  2.16it/s]


## Compute Train Set Mean and Standard Deviation

In [7]:
mean, std = get_train_mean_std(os.path.join(DATASET_DIR, "train/"))
print(f"Dataset mean: {mean}")
print(f"Dataset std: {std}")

100%|██████████| 275/275 [00:34<00:00,  7.90it/s]


Dataset mean: 0.0007686347817070782
Dataset std: 0.002093877410516143


## Train

### Data Generators

In [8]:
train_files = [f for f in os.listdir(os.path.join(DATASET_DIR, "train/")) if f.endswith(".wav")]
valid_files = [f for f in os.listdir(os.path.join(DATASET_DIR, "valid/")) if f.endswith(".wav")]

In [9]:
train_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "train/"), 
                                    train_files, dim=(500, 201), 
                                    max_k=max_k, batch_size=32, 
                                    mean=mean, std=std, 
                                    s=s, shuffle=True)

valid_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "valid/"), 
                                    valid_files, dim=(500, 201), 
                                    max_k=max_k, batch_size=1, 
                                    mean=mean, std=std, 
                                    s=s, shuffle=False)

### Model

In [10]:
model = CRNN((500, 201, 1), max_k)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 498, 199, 64)      640       
                                                                 
 conv2d_1 (Conv2D)           (None, 496, 197, 32)      18464     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 165, 65, 32)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 163, 63, 128)      36992     
                                                                 
 conv2d_3 (Conv2D)           (None, 161, 61, 64)       73792     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 53, 20, 64)       0         
 2D)                                                    

### Compile

In [11]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-8),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=tf.keras.metrics.CategoricalAccuracy(),
)

### Train

In [12]:
model.fit(train_generator, validation_data=valid_generator, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a8156e4400>