# **DeepSpeech with CommonVoice**

## **Compiled Dataset**

In [0]:
from google.colab import files
files.upload() #upload "gcp_key.json"

In [0]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/gcp_key.json"

In [0]:
from google.cloud import storage

bucketName = 'cv-dataset'
bucketFolder = 'cv-dataset'
fileName = 'CV_large_no_pad.zip'
saveAs = 'CV.zip'

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucketName)

def download_file(bucketName, bucketFolder, fileName):
    """Download file from GCP bucket."""
    blob = bucket.blob(os.path.join(bucketFolder, fileName))
    fileName = blob.name.split('/')[-1]
    blob.download_to_filename(saveAs)
    return f'{fileName} downloaded from bucket as {saveAs}.'

download_file(bucketName, bucketFolder, fileName)

In [0]:
import os
os.mkdir('DeepSpeech')

In [0]:
!unzip -q CV.zip -d DeepSpeech

Upload model files

In [0]:
import os
os.chdir('DeepSpeech')

In [0]:
from google.colab import files
files.upload()

## **Prepare Dataset and Files**

In [0]:
from google.colab import files
files.upload() #upload kaggle.json

Download and unzip *(takes approx. 12 minutes)*

In [0]:
!pip install -q kaggle
!mkdir /root/.kaggle
!mv /content/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d mozillaorg/common-voice

!mkdir common-voice
!unzip -q common-voice.zip -d common-voice

Get required files and go to working directory

In [0]:
import shutil
import os

os.mkdir('DeepSpeech')
os.mkdir('DeepSpeech/CV')
shutil.move('common-voice/cv-valid-train/cv-valid-train', 'DeepSpeech/CV')
shutil.move('common-voice/cv-valid-test/cv-valid-test', 'DeepSpeech/CV')
shutil.move('common-voice/cv-valid-dev/cv-valid-dev', 'DeepSpeech/CV')
shutil.move('common-voice/cv-valid-train.csv', 'DeepSpeech/CV')
shutil.move('common-voice/cv-valid-test.csv', 'DeepSpeech/CV')
shutil.move('common-voice/cv-valid-dev.csv', 'DeepSpeech/CV')

shutil.rmtree('common-voice')
os.remove('common-voice.zip')
os.chdir('DeepSpeech')

Upload model files

In [0]:
files.upload()

Prepare common voice dataset

In [0]:
!python3 prepare_cv.py

## **Model Training**

In [0]:
from model import create_model, create_optimizer, create_model_checkpoint_cb, create_lr_scheduler_cb
from visualization import plot_accuracy, plot_loss
from BeamSearch import ctcBeamSearch
from dataset import create_dataset
from generator import cv_generator_compiled as generator
from text import decode
import constants as c

from tensorflow.keras.utils import plot_model
from tensorflow.keras import backend as k
from tensorflow.keras.models import Model
import numpy as np

In [0]:
model = create_model()

model.summary()

plot_model(model, to_file='rnn.png')

optimizer = create_optimizer()

loss = {'ctc': lambda y_true, y_pred: y_pred}

model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

In [0]:
import numpy as np
import pandas as pd

train = pd.read_csv(os.path.join('CV', 'cv_train.csv')).to_numpy()
steps_per_epoch = train.shape[0]//c.batch_size
dev = pd.read_csv(os.path.join('CV', 'cv_dev.csv')).to_numpy()
validation_steps = dev.shape[0]//c.batch_size

history = model.fit(
    generator(),
    steps_per_epoch=steps_per_epoch,
    validation_data=generator('dev'),
    validation_steps=validation_steps,
    callbacks=[create_model_checkpoint_cb(), create_lr_scheduler_cb()],
    epochs=2
    )

Epoch 1/2
  21/1272 [..............................] - ETA: 2:02:02 - loss: 349.7372 - accuracy: 0.0000e+00

In [0]:
plot_accuracy(history)
plot_loss(history)

model.load_weights(c.checkpoint_filepath)
model.save('model.h5')

## **Model Training**

In [0]:
sub_model = Model(inputs=model.get_layer('masking_layer').input, outputs=model.get_layer('output_layer').output)

for i in range(15):
    data = X_test[i]
    d = np.array([data])
    
    prediction=sub_model.predict(d)
    output = k.get_value(prediction)        
    path = ctcBeamSearch(output[0], ''.join(c.alphabet), None)

    print('true:', decode(y_test[i]))
    print('pred:', path)
    print()