### Imports

In [1]:
import os
import pickle
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import TimeDistributed, Activation, Bidirectional, ConvLSTM2D, Attention, Dense, Flatten, MaxPool3D, MaxPool2D,BatchNormalization, Conv3D, GRU
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import Model
from tensorflow.keras.backend import ctc_batch_cost, ctc_decode, ctc_label_dense_to_sparse
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input

import Levenshtein as Lev
import sys
from string import ascii_uppercase

In [2]:
print(tf.__version__)

2.0.0


To Decide
1. Batch Norm vs Layer Norm or No Norm for attention model
2. Shuffle or No

To Do 
1. Check ss, sr
2. check if shapes match
3. append 0s

### Paths

In [3]:
train_path = "./LibriSpeech100/train/train_all/"
dev_path = "./LibriSpeech100/dev/dev_all/"
test_path = "./LibriSpeech100/test/test_all/"

In [4]:
class DataGenerator(Sequence):
    def __init__(self, path, to_fit = True):
        self.path = path
        self.list_X, self.list_Y = self.getLists()
        self.to_fit = to_fit
    
    def __len__(self):
        return len(self.list_X)
    
    def __getitem__(self, index):      
        dict_X = self.get_dict_X(index)   
        
#         if self.to_fit:
        dict_Y = self.get_dict_Y(index)
        X, Y = self.generate_XY(dict_X, dict_Y)
            
        return X, Y
    
    def getLists(self):
        list_X = []
        list_Y = []
        for item in sorted(os.listdir(self.path)):
            ext = item.split(".")[-1]
            if ext == 'pkl':
                list_X.append(item)
            elif ext == 'txt':
                list_Y.append(item)
        return list_X, list_Y
    
    def get_dict_X(self, index):
        file_name = self.path + self.list_X[index]
        with open(file_name, 'rb') as pickle_file:
            dict_X = pickle.load(pickle_file)
        return dict_X
    
    def get_dict_Y(self, index):
        filename = self.path + self.list_Y[index]
        file = open(filename)
        dict_Y = {}
        for line in file:
            data = line.split()
            key = data[0]
            value = ' '.join(data[1:])
            dict_Y[key] = value
        return dict_Y
    
    def generate_XY(self, dict_X, dict_Y):
        X = []
        Y = []
        max_x = 0
        max_y = 0
        for key in dict_X:
            x_temp = dict_X[key]
            y_temp = dict_Y[key]
            if max_x < x_temp.shape[1]:
                max_x = x_temp.shape[1]
            if max_y < len(y_temp):
                max_y = len(y_temp)
        
        for key in dict_X:
            to_pad_x = ( (0,0), (0, max_x - dict_X[key].shape[1]))
            to_pad_y = (  (0, max_y - len(dict_Y[key])))
            
            x_temp = np.pad(dict_X[key], pad_width = to_pad_x, mode='constant', constant_values=0)
            y_temp = self.generate_Y_array(dict_Y[key], max_y)
            X.append(x_temp)
            Y.append(y_temp)
            
#         print(type(Y))
        return np.stack(X), np.stack(Y)
    
    def generate_Y_array(self, sentence, maxlen):
        space_token = ' '
        end_token = '>'
        blank_token = '%'
        apos_token = '\''
        while len(sentence) != maxlen:
            sentence += blank_token
        sentence += end_token
        
        alphabet = list(ascii_uppercase) + [space_token, apos_token, blank_token, end_token] 
        char_to_index = {}
        for idx, char in enumerate(alphabet):
            char_to_index[char] = idx

        y = []
#         print(type(sentence),"sentence")
        
        for char in sentence:
#             print(char, "char")
            y.append(char_to_index[char])
        
        return np.array(y)

In [5]:
def wer(s1, s2):

    s1 =s1.lower()
    s2 =s2.lower()
    b = set(s1.lower().split() + s2.lower().split())
    
    word2char = dict(zip(b, range(len(b))))


    w1 = [chr(word2char[w]) for w in s1.split()]
    w2 = [chr(word2char[w]) for w in s2.split()]
    return Lev.distance(''.join(w1), ''.join(w2))/float(len(s2.split()))

In [6]:
train_data = DataGenerator(train_path)
val_data = DataGenerator(dev_path)
test_data = DataGenerator(test_path)

In [7]:
# class BaseModel(Model):
#     def __init__(self, op_dim = 28):
#         super(BaseModel, self).__init__()
#         self.rnn = GRU(op_dim, return_sequences= False)
#         self.y_pred = Activation('softmax')
        
#     def call(self, inputs):
#         x = self.rnn(inputs)
#         x = self.y_pred(x)
#         return x

In [8]:
# class ASRModel(Model):
#     def __init__(self):
#         super(ASRModel, self).__init__()
#         self.base_model = BaseModel()
#         self.activation = Activation('softmax')

        
#     def call(self, inputs):
#         input_data, labels = inputs
#         input_length = input_data.shape[1] 
#         label_length = label
#         x = self.base_model(input_data)
#         x = self.activation()   
#         x = ctc_batch_cost(labels, x, input_length, label_length)
#         return x
        

[ 0 13  3 26 19  7  0 19 26  8  5 26 18  7  4 26 13 14 19  8  2  4  3 26
  0 13 24 19  7  8 13  6 26 14  3  3 26 14 17 26 14 20 19 26 14  5 26 15
 11  0  2  4 26 18  7  4 26 22 14 20 11  3 26 13  4 21  4 17 26 17  4 18
 19 26 20 13 19  8 11 26 18  7  4 26  7  0  3 26  5  4 17 17  4 19  4  3
 26 14 20 19 26 19  7  4 26 22  7 24 18 26  0 13  3 26 22  7  4 17  4  5
 14 17  4 18 26 19  7  4 17  4 14  5 26 19  7  4 17  4 26  0 17  4 26 15
 11  4 13 19 24 26 14  5 26 15  4 14 15 11  4 26  8 13 26  0 21 14 13 11
  4  0 26  0 13  3 26 14 20 19 26 14  5 26  8 19 26 22  7 14 26  2  0 13
 26  0 19 19  4 13  3 26  2 11 14 18  4 11 24 26 19 14 26 19  7  4  8 17
 26 13  4  8  6  7  1 14 17 27 18 26  1 20 18  8 13  4 18 18 26  1 24 26
  3  8 13 19 26 14  5 26 13  4  6 11  4  2 19  8 13  6 26 19  7  4  8 17
 26 14 22 13 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28
 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28
 28 28 28 28 28 28 29]


In [9]:
def model():
    model = Sequential()
    model.add(GRU(28, return_sequences= False))
    model.add(Activation('softmax'))
    return model

In [10]:
def cnew_loss(x, y):
    return tf.reduce_mean(tf.cast(x, tf.float32)-y)

In [11]:
model = model()
model.compile(loss=cnew_loss, optimizer=tf.keras.optimizers.Adam())

In [12]:
model.fit_generator(train_data)

InvalidArgumentError: Incompatible shapes: [24,283] vs. [24,28] [Op:Sub] name: loss/output_1_loss/sub/