### Imports

In [0]:
import os
import pickle
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import TimeDistributed, Activation, Bidirectional, ConvLSTM2D, Attention, Dense, Flatten, MaxPool3D, MaxPool2D,BatchNormalization, Conv3D, GRU
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import Model
from tensorflow.keras.backend import ctc_batch_cost, ctc_decode, ctc_label_dense_to_sparse
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input

#import Levenshtein as Lev
import sys
from string import ascii_uppercase

In [0]:
print(tf.__version__)

2.0.0


To Decide
1. Batch Norm vs Layer Norm or No Norm for attention model
2. Shuffle or No

To Do 
1. Check ss, sr
2. check if shapes match
3. append 0s

In [0]:

def Paths(v):
  if v==1:
    from google.colab import drive
    drive.mount('/content/drive')

    train_path = "/content/drive/My Drive/train_all/train_all/"
    dev_path = "/content/drive/My Drive/dev_all/dev_all/"
    test_path = "/content/drive/My Drive/test_all/test_all/"
    return(train_path, dev_path,test_path)

  else:
    train_path = "./LibriSpeech100/train/train_all/"
    dev_path = "./LibriSpeech100/dev/dev_all/"
    test_path = "./LibriSpeech100/test/test_all/"
    return(train_path, dev_path,test_path)


# Paths: Chaitanya :1, Rushabh 0

In [108]:
train_path, dev_path, test_path = Paths(1)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
class DataGenerator(Sequence):
    def __init__(self, path, to_fit = True):
        self.path = path
        self.list_X, self.list_Y = self.getLists()
        self.to_fit = to_fit
    
    def __len__(self):
        return len(self.list_X)
    
    def __getitem__(self, index):      
        dict_X = self.get_dict_X(index)   
        
#         if self.to_fit:
        dict_Y = self.get_dict_Y(index)
        X, Y = self.generate_XY(dict_X, dict_Y)
            
        return X, Y
    
    def getLists(self):
        list_X = []
        list_Y = []
        for item in sorted(os.listdir(self.path)):
            ext = item.split(".")[-1]
            if ext == 'pkl':
                list_X.append(item)
            elif ext == 'txt':
                list_Y.append(item)
        return list_X, list_Y
    
    def get_dict_X(self, index):
        file_name = self.path + self.list_X[index]
        with open(file_name, 'rb') as pickle_file:
            dict_X = pickle.load(pickle_file)
        return dict_X
    
    def get_dict_Y(self, index):
        filename = self.path + self.list_Y[index]
        file = open(filename)
        dict_Y = {}
        for line in file:
            data = line.split()
            key = data[0]
            value = ' '.join(data[1:])
            dict_Y[key] = value
        return dict_Y

    def generate_XY(self, dict_X, dict_Y):
        X = []
        Y = []
        input_len = []
        label_len = []
        
        max_x = 0
        max_y = 0
        
        for key in dict_X:
            x_temp = dict_X[key]
            y_temp = dict_Y[key]
            if max_x < x_temp.shape[1]:
                max_x = x_temp.shape[1]
            if max_y < len(y_temp):
                max_y = len(y_temp)
        
        for key in dict_X:
            x_temp = dict_X[key]
            y_temp = dict_Y[key]

            input_len.append(x_temp.shape[1])
            label_len.append(len(y_temp))
            
            to_pad_x = ( (0,0), (0, max_x - dict_X[key].shape[1]))
            to_pad_y = (  (0, max_y - len(dict_Y[key])))
            
            x_temp = np.pad(dict_X[key], pad_width = to_pad_x, mode='constant', constant_values=0)
            y_temp = self.generate_Y_array(dict_Y[key], max_y)
            X.append(x_temp.T)
            Y.append(y_temp)
          
        return [np.stack(X), np.stack(input_len), np.stack(label_len)], np.stack(Y)
#         print(type(Y))
#         return np.stack(X), np.stack(Y)

    def generate_Y_array(self, sentence, maxlen):
        space_token = ' '
        end_token = '>'
        blank_token = '%'
        apos_token = '\''
        while len(sentence) != maxlen:
            sentence += blank_token
        sentence += end_token
        
        alphabet = list(ascii_uppercase) + [space_token, apos_token, blank_token, end_token] 
        char_to_index = {}
        for idx, char in enumerate(alphabet):
            char_to_index[char] = idx

        y = []
#         print(type(sentence),"sentence")
        
        for char in sentence:
#             print(char, "char")
            y.append(char_to_index[char])
        
        return np.array(y)

In [0]:
def wer(s1, s2):

    s1 =s1.lower()
    s2 =s2.lower()
    b = set(s1.lower().split() + s2.lower().split())
    
    word2char = dict(zip(b, range(len(b))))


    w1 = [chr(word2char[w]) for w in s1.split()]
    w2 = [chr(word2char[w]) for w in s2.split()]
    return Lev.distance(''.join(w1), ''.join(w2))/float(len(s2.split()))

In [0]:
train_data = DataGenerator(train_path)
val_data = DataGenerator(dev_path)
test_data = DataGenerator(test_path)

In [0]:
x, y = train_data[0]
x, input_len, label_len = x

In [56]:
print(x.shape)
print(input_len.shape)
print(label_len.shape)

(58, 727, 20)
(58,)
(58,)


In [0]:
class BaseModel(Model):
    def __init__(self, op_dim = 28):
        super(BaseModel, self).__init__()

        self.rnn = GRU(op_dim, return_sequences= False)
        self.y_pred = Activation('softmax')
        
    def call(self, inputs):
        x = self.rnn(inputs)
        x = self.y_pred(x)
        return x

In [0]:

class ASRModel(Model):
    def __init__(self):
        super(ASRModel, self).__init__()
        self.base_model = BaseModel()
        self.activation = Activation('softmax')

    def call(self, inputs):
        input_data, input_length, label_length = inputs
        x = self.base_model(input_data)
        x = self.activation(x)   
        return [x, input_length, label_length]
        
        #         x = self.ctc(labels, x, input_length, label_length)

In [0]:
def ctc_loss(y_true, y_pred):
    x, input_length, label_length = y_pred
    return ctc_batch_cost(y_true, x, input_length, label_length)
    #return 1.0

In [97]:
tf.executing_eagerly

<function tensorflow.python.eager.context.executing_eagerly>

In [0]:
# def model(output_dim = 29):
# #     input_data = Input(shape = (None, input_dim))
# #     simple_rnn = GRU(output_dim, return_sequences= True)
# #     y_pred = Activation('softmax')

# #     model = Model(inputs=input_data, outputs = y_pred)
    
    
#     model = Sequential()
#     model.add(GRU(output_dim, return_sequences= True))
#     model.add(Activation('softmax'))
    
#     return model

In [0]:
def cnew_loss(y_true, y_pred):
    print("true", y_true.shape)
    print("pred",y_pred.shape)
    return tf.reduce_mean(tf.cast(y_true, tf.float32)-y_pred)

In [0]:
model = ASRModel()

In [0]:
model.compile(loss=ctc_loss, optimizer=tf.keras.optimizers.Adam())

In [105]:
model.fit_generator(train_data, shuffle=False)

OperatorNotAllowedInGraphError: ignored

In [0]:
model.summary()