In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.layers import Dense, Input, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization
from tensorflow.keras import Model
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.optimizers import Adam

In [16]:
IMG_SIZE = 224

In [17]:
inputs = Input(shape = (IMG_SIZE, IMG_SIZE, 1))

model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1))(inputs)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=32, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=64, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=128, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=256, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Flatten()(model)
model = Dense(1024, activation = "relu")(model)
model = Dropout(rate=0.3)(model)
dense = Dense(512, activation = "relu")(model)

head_root = Dense(168, activation = 'softmax', name='root')(dense)
head_vowel = Dense(11, activation = 'softmax', name='vowel')(dense)
head_consonant = Dense(7, activation = 'softmax', name='constant')(dense)

model = Model(inputs=inputs, outputs=[head_root, head_vowel, head_consonant])

In [18]:
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,loss='categorical_crossentropy', metrics=['accuracy'])

In [7]:
# plot_model(model=model,to_file='bengali-ai-model.png')

In [8]:
im_width,im_height=137,236

In [10]:
root_dir = '/Users/mrinmoysarkar/Documents/git_repos/data-sets/bengali_ai_dataset/bengaliai-cv19/'
train_label_df = pd.read_csv(root_dir+'train.csv')
train_img_df = None
# for i in range(0,4):
#     if None is train_img_df:
#         train_img_df = pd.read_parquet(root_dir+'train_image_data_'+str(i)+'.parquet')
#     else:
#         train_img_df = pd.concat([train_img_df, pd.read_parquet(root_dir+'train_image_data_'+str(i)+'.parquet')],ignore_index=True)

for i in range(0,1):
    train_img_df = pd.read_parquet(root_dir+'train_image_data_'+str(i)+'.parquet')
    train_df = pd.merge(train_img_df, train_label_df, on='image_id')
#     train_df.to_csv(root_dir+'train_image_data_'+str(i)+'.csv',)

# train_df = train_img_df.join()

In [11]:
train_df.columns

Index(['image_id', '0', '1', '2', '3', '4', '5', '6', '7', '8',
       ...
       '32326', '32327', '32328', '32329', '32330', '32331', 'grapheme_root',
       'vowel_diacritic', 'consonant_diacritic', 'grapheme'],
      dtype='object', length=32337)

In [20]:
def flow():
    batch_size = 32
    IMG_SIZE = 224
    n_channel = 1
    im_width,im_height=137,236
    X_batch = np.zeros(shape=(batch_size,IMG_SIZE,IMG_SIZE,n_channel))
    images = train_df.drop(columns=['image_id', 'grapheme_root','vowel_diacritic', 'consonant_diacritic', 'grapheme']).values
    
    root = to_categorical(train_df['grapheme_root'].values)
    vowel = to_categorical(train_df['vowel_diacritic'].values)
    constant = to_categorical(train_df['consonant_diacritic'].values)
    while True:
        indx = 0
        for i,im in enumerate(images):
            im = im.reshape((im_width,im_height))
            im = cv2.resize(im,dsize=(IMG_SIZE,IMG_SIZE),interpolation=cv2.INTER_NEAREST)
            X_batch[indx] = im.reshape((IMG_SIZE,IMG_SIZE,n_channel))/255.0
            indx += 1
            if indx == batch_size:
                indx = 0
                Y_batch = {}
                Y_batch['root'] = root[i:i+batch_size]
                Y_batch['vowel'] = vowel[i:i+batch_size]
                Y_batch['constant'] = constant[i:i+batch_size]
                yield (X_batch, Y_batch)
        

In [21]:
history = model.fit(flow(), epochs=10, steps_per_epoch=1000, verbose=2)

  {'root': '...', 'vowel': '...', 'constant': '...'}
    to  
  ['...', '...', '...']
Train for 1000 steps
Epoch 1/10
1000/1000 - 18991s - loss: 8.4165 - root_loss: 4.8706 - vowel_loss: 2.2542 - constant_loss: 1.2917 - root_accuracy: 0.0268 - vowel_accuracy: 0.1992 - constant_accuracy: 0.6193
Epoch 2/10


InvalidArgumentError:  Incompatible shapes: [3] vs. [32]
	 [[node metrics/vowel_accuracy/Equal (defined at <ipython-input-21-0aef7a971f40>:1) ]] [Op:__inference_distributed_function_5378]

Function call stack:
distributed_function
