# Pipeline

## Imports

In [10]:
import os
import tensorflow as tf
from preprocessing import preprocess
from dataset import make_dataset
from model import CRNN
from util import CustomDataGenerator

## Paths

In [2]:
DATA_DIR = "../../data/"
TRAIN_DIR = "train-clean-360/LibriSpeech/train-clean-360/"
VALID_DIR = "dev-clean/LibriSpeech/dev-clean/"
TEST_DIR = "test-clean/LibriSpeech/test-clean/"
NOISE_DIR = ""

PROCESSED_DIR = os.path.join(DATA_DIR, "processed/")
DATASET_DIR = os.path.join(DATA_DIR, "dataset/")
MODEL_DIR = "../models/"

## Parameters

In [3]:
initial_s = 10
s = 5
max_k = 10

## Preprocessing

In [4]:
input_dirs = [TRAIN_DIR, VALID_DIR, TEST_DIR]
datasets = ["train", "valid", "test"]

for i, dataset in enumerate(datasets):
    input_dir = os.path.join(DATA_DIR, input_dirs[i])
    output_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    preprocess(input_dir, output_dir, initial_s)

## Creating Dataset

In [5]:
for i, dataset in enumerate(datasets):
    input_dir = os.path.join(PROCESSED_DIR, f"{dataset}/")
    output_dir = os.path.join(DATASET_DIR, f"{dataset}/")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    make_dataset(input_dir, output_dir, max_k)



## Train

### Data Generators

In [14]:
train_files = os.listdir(os.path.join(DATASET_DIR, "train/"))
valid_files = os.listdir(os.path.join(DATASET_DIR, "valid/"))

In [17]:
train_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "train/"), train_files, dim=(500, 201), max_k=10, batch_size=32, shuffle=True)
valid_generator = CustomDataGenerator(os.path.join(DATASET_DIR, "valid/"), valid_files, dim=(500, 201), max_k=10, batch_size=1, shuffle=False)

### Model

In [8]:
model = CRNN((500, 201, 1), max_k)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 498, 199, 64)      640       
                                                                 
 conv2d_1 (Conv2D)           (None, 496, 197, 32)      18464     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 165, 65, 32)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 163, 63, 128)      36992     
                                                                 
 conv2d_3 (Conv2D)           (None, 161, 61, 64)       73792     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 53, 20, 64)       0         
 2D)                                                    

### Compile

In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-8),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=tf.keras.metrics.CategoricalAccuracy(),
)

### Train

In [18]:
model.fit(train_generator, validation_data=valid_generator, epochs=10)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'categorical_crossentropy/softmax_cross_entropy_with_logits' defined at (most recent call last):
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\kernelbase.py", line 471, in dispatch_queue
      await self.process_one()
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\kernelbase.py", line 460, in process_one
      await dispatch(*args)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\kernelbase.py", line 367, in dispatch_shell
      await result
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\kernelbase.py", line 662, in execute_request
      reply_content = await reply_content
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\ipkernel.py", line 360, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\IPython\core\interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Marlous\AppData\Local\Temp\ipykernel_13628\1481380729.py", line 1, in <cell line: 1>
      model.fit(train_generator, validation_data=valid_generator, epochs=10)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 860, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\training.py", line 918, in compute_loss
      return self.compiled_loss(
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\losses.py", line 1789, in categorical_crossentropy
      return backend.categorical_crossentropy(
    File "C:\Users\Marlous\anaconda3\envs\asr\lib\site-packages\keras\backend.py", line 5109, in categorical_crossentropy
      return tf.nn.softmax_cross_entropy_with_logits(
Node: 'categorical_crossentropy/softmax_cross_entropy_with_logits'
logits and labels must be broadcastable: logits_size=[32,11] labels_size=[32,10]
	 [[{{node categorical_crossentropy/softmax_cross_entropy_with_logits}}]] [Op:__inference_train_function_7889]