# Pipeline

## Imports

In [1]:
import os
import tensorflow as tf

from preprocessing import preprocess
from dataset import make_dataset
from model import CRNN
from util import CustomDataGenerator

## Paths

In [2]:
DATA_DIR = "../../data/"
TRAIN_DIR = "train-clean-360/LibriSpeech/train-clean-360/"
VALID_DIR = "dev-clean/LibriSpeech/dev-clean/"
TEST_DIR = "test-clean/LibriSpeech/test-clean/"
NOISE_DIR = ""

PROCESSED_DIR = os.path.join(DATA_DIR, "processed/")
DATASET_DIR = os.path.join(DATA_DIR, "dataset/")
MODEL_DIR = "../models/"

## Parameters

In [3]:
initial_s = 10
s = 5
max_k = 10

## Preprocessing

In [None]:
input_dirs = [TRAIN_DIR, VALID_DIR, TEST_DIR]
datasets = ["train", "valid", "test"]

for i, dataset in enumerate(datasets):
    input_dir = os.path.join(DATA_DIR, input_dirs[i])
    output_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    preprocess(input_dir, output_dir, initial_s)

## Creating Dataset

In [None]:
for i, dataset in enumerate(datasets):
    input_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")
    output_dir = os.path.join(DATASET_DIR, f"{dataset}/")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    make_dataset(input_dir, output_dir, max_k)

## Train

### Data Generators

In [4]:
train_files = os.listdir(os.path.join(DATASET_DIR, "train/"))
valid_files = os.listdir(os.path.join(DATASET_DIR, "valid/"))

In [5]:
train_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "train/"), train_files, dim=(500, 201), max_k=10, batch_size=32, shuffle=True)
valid_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "valid/"), valid_files, dim=(500, 201), max_k=10, batch_size=1, shuffle=False)

### Model

In [6]:
model = CRNN((500, 201, 1), max_k)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 498, 199, 64)      640       
                                                                 
 conv2d_1 (Conv2D)           (None, 496, 197, 32)      18464     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 165, 65, 32)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 163, 63, 128)      36992     
                                                                 
 conv2d_3 (Conv2D)           (None, 161, 61, 64)       73792     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 53, 20, 64)       0         
 2D)                                                    

### Compile

In [7]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-8),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=tf.keras.metrics.CategoricalAccuracy(),
)

### Train

In [8]:
model.fit(train_generator, validation_data=valid_generator, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x121e0055be0>