In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '20'

In [6]:
import tensorflow as tf
import numpy as np
import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras import layers

In [7]:
device = tf.config.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(device, True)

In [8]:
train_dir = "mergedData/"

In [9]:
path = os.listdir(train_dir)
lbl = []
for e in path:
    lbl.append(e.split('_')[0])

In [10]:
characters = set(char for label in lbl for char in label)
characters = sorted(list(characters))
char_to_num = layers.StringLookup(
    vocabulary=list(characters), mask_token=None
)
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


In [11]:
dict_ = char_to_num.get_vocabulary()

In [12]:
image_paths = []
tokenized_lbls = []
for i in path:
    image_paths.append(os.path.join(train_dir, i))
    temp = []
    for char in i.split('_')[0]:
        temp.append(int(char_to_num(char)))
    tokenized_lbls.append(temp)

## Load Data

In [20]:
tf.config.optimizer.set_jit(True)
AUTOTUNE = tf.data.experimental.AUTOTUNE
desired_height = 65
desired_width = 256

def normalization(image):
    image = tf.cast(image, tf.float32)
    image = image / 255.0
    return image

def load_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image)
    image = normalization(image)    
    image = tf.image.resize(image, [desired_height, desired_width])
    return image

# def data_augmentation(image):
#     image = tf.image.adjust_contrast(image, 0.4)
#     image = tf.image.adjust_brightness(image, 0.3)
#     image = tf.image.adjust_saturation(image, 0.3)
#     return image




def load_image_label(path, label):
    image = load_image(path)
    image = tf.transpose(image, perm=[1, 0, 2])
    # image = data_augmentation(image)
    label = tf.cast(label, tf.float32)
    return image, label
    
SHUFFLE_BUFFER_SIZE = 256
BATCH_SIZE = 32








dataset = tf.data.Dataset.from_tensor_slices((image_paths, tokenized_lbls))


validation_split = 0.1
DATASET_SIZE = len(list(dataset))
print("Dataset size: ", DATASET_SIZE)
train_size = int((1-validation_split) * DATASET_SIZE)
print("train size: ", train_size)
train_dataset = dataset.take(train_size)
validation_dataset = dataset.skip(train_size)



train_dataset = train_dataset.map(load_image_label, num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE, num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.prefetch(AUTOTUNE)

validation_dataset = validation_dataset.map(load_image_label, num_parallel_calls=AUTOTUNE)
validation_dataset = validation_dataset.batch(BATCH_SIZE, num_parallel_calls=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(AUTOTUNE)


Dataset size:  8672
train size:  7804


### Define CNN Model

In [21]:
from keras.layers import GlobalAveragePooling2D, MaxPool1D, Conv1D, Reshape, Input, Dense, Conv2D, Dropout, MaxPooling2D, BatchNormalization
from keras.models import Model
from keras.losses import Loss
from keras.optimizers import Optimizer

In [22]:
new_shape = (width // 16, (height // 16) * 256)
nclasses = len(char_to_num.get_vocabulary())

In [26]:
input_layer = Input(shape=(width, height, 3))
conv2d_1 = Conv2D(32,(3, 3), activation="relu", padding="same")(input_layer)
maxpool_1 = MaxPooling2D((2, 2))(conv2d_1)

conv2d_2 = Conv2D(64,(3, 3), activation="relu", padding="same")(maxpool_1)
maxpool_2 = MaxPooling2D((2, 2))(conv2d_2)

conv2d_3 = Conv2D(128,(3, 3), activation="relu", padding="same")(maxpool_2)
maxpool_3 = MaxPooling2D((2, 2))(conv2d_3)

conv2d_4 = Conv2D(256,(3, 3), activation="relu", padding="same")(maxpool_3)
maxpool_4 = MaxPooling2D((2, 2))(conv2d_4)

reshape = Reshape(target_shape=new_shape)(maxpool_4)
dense_1 = Dense(128, activation="relu")(reshape)
dropout1 = Dropout(0.2)(dense_1)

conv1d = Conv1D(64, 3, activation="relu", padding="same")(dropout1)

maxpool_5 = MaxPool1D(2)(conv1d)

dropout2 = Dropout(0.5)(maxpool_5)

output_layer = Dense(nclasses, activation="softmax")(dropout2)
model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 256, 65, 3)]      0         
                                                                 
 conv2d_8 (Conv2D)           (None, 256, 65, 32)       896       
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 128, 32, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_9 (Conv2D)           (None, 128, 32, 64)       18496     
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 64, 16, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 64, 16, 128)       7385

In [27]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_obj = keras.losses.SparseCategoricalCrossentropy()
accuracy = keras.metrics.Accuracy()

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)
early_stopping_patience = 15
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7,
                              patience=3, min_lr=0.0001)

callbacks = [keras.callbacks.TensorBoard(), early_stopping, reduce_lr]

In [28]:
model.fit(train_dataset, validation_data=validation_dataset, epochs=50, callbacks=callbacks)

Epoch 1/50


I0000 00:00:1719576282.325001    4011 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1719576288.310007    4010 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


Epoch 2/50

W0000 00:00:1719576316.387808    4008 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50


<keras.src.callbacks.History at 0x7fa84c48fe90>

In [29]:
model.save('ModelCNN.tf')

INFO:tensorflow:Assets written to: ModelCNN.tf/assets


INFO:tensorflow:Assets written to: ModelCNN.tf/assets


In [None]:
# best_model = tf.keras.models.load_model('./ModelCNN.tf/')