In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '20'

In [2]:
import tensorflow as tf
import numpy as np
import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras import layers

In [None]:
device = tf.config.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(device, True)

In [None]:
train_dir = "mergedData/"

In [4]:
path = os.listdir(train_dir)
lbl = []
for e in path:
    lbl.append(e.split('_')[0])

In [5]:
characters = set(char for label in lbl for char in label)
characters = sorted(list(characters))
char_to_num = layers.StringLookup(
    vocabulary=list(characters), mask_token=None
)
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


In [None]:
dict_ = char_to_num.get_vocabulary()

In [None]:
image_paths = []
tokenized_lbls = []
for i in path:
    image_paths.append(os.path.join(train_dir, i))
    temp = []
    for char in i.split('_')[0]:
        temp.append(int(char_to_num(char)))
    tokenized_lbls.append(temp)

## Load Data

In [6]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
tf.config.optimizer.set_jit(True)

SHUFFLE_BUFFER_SIZE = 256
BATCH_SIZE = 32
height = 65
width = 256

def load_label(path):
    label = tf.strings.split(path, '/')[-1]
    label = tf.strings.split(label, '_')[0]
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    return label

def load_image(path):
    image = tf.io.read_file(path)
    return image

def preprocessing_image(image):
    image = tf.image.decode_jpeg(image, channels=1)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.image.resize(image, [height, width])
    image = tf.transpose(image, perm=[1, 0, 2])
    return image

def get_image_with_label(path):
    label = load_label(path)
    image = load_image(path)
    image = preprocessing_image(image)
    return image, label








dataset = tf.data.Dataset.from_tensor_slices((image_paths, tokenized_lbls))


validation_split = 0.1
DATASET_SIZE = len(list(dataset))
print("Dataset size: ", DATASET_SIZE)
train_size = int((1-validation_split) * DATASET_SIZE)
print("train size: ", train_size)
train_dataset = dataset.take(train_size)
validation_dataset = dataset.skip(train_size)



train_dataset = train_dataset.map(load_image_label, num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE, num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.prefetch(AUTOTUNE)

validation_dataset = validation_dataset.map(load_image_label, num_parallel_calls=AUTOTUNE)
validation_dataset = validation_dataset.batch(BATCH_SIZE, num_parallel_calls=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(AUTOTUNE)


### Define CNN Model

In [7]:
from keras.layers import GlobalAveragePooling2D, MaxPool1D, Conv1D, Reshape, Input, Dense, Conv2D, Dropout, MaxPooling2D, BatchNormalization
from keras.models import Model
from keras.losses import Loss
from keras.optimizers import Optimizer

In [8]:
new_shape = (width // 16, (height // 16) * 256)
nclasses = len(char_to_num.get_vocabulary())

In [9]:
input_layer = Input(shape=(width, height, 1))
conv2d_1 = Conv2D(32,(3, 3), activation="relu", padding="same")(input_layer)
maxpool_1 = MaxPooling2D((2, 2))(conv2d_1)

conv2d_2 = Conv2D(64,(3, 3), activation="relu", padding="same")(maxpool_1)
maxpool_2 = MaxPooling2D((2, 2))(conv2d_2)

conv2d_3 = Conv2D(128,(3, 3), activation="relu", padding="same")(maxpool_2)
maxpool_3 = MaxPooling2D((2, 2))(conv2d_3)

conv2d_4 = Conv2D(256,(3, 3), activation="relu", padding="same")(maxpool_3)
maxpool_4 = MaxPooling2D((2, 2))(conv2d_4)

reshape = Reshape(target_shape=new_shape)(maxpool_4)
dense_1 = Dense(128, activation="relu")(reshape)
dropout1 = Dropout(0.2)(dense_1)

conv1d = Conv1D(64, 3, activation="relu", padding="same")(dropout1)

maxpool_5 = MaxPool1D(2)(conv1d)

dropout2 = Dropout(0.5)(maxpool_5)

output_layer = Dense(nclasses, activation="softmax")(dropout2)
model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 65, 1)]      0         
                                                                 
 conv2d (Conv2D)             (None, 256, 65, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 128, 32, 32)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 32, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 64, 16, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 16, 128)       73856 

In [10]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_obj = keras.losses.SparseCategoricalCrossentropy()
accuracy = keras.metrics.Accuracy()

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)
early_stopping_patience = 15
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7,
                              patience=3, min_lr=0.0001)

callbacks = [keras.callbacks.TensorBoard(), early_stopping, reduce_lr]

In [11]:
model.fit(train_dataset, validation_data=validation_dataset, epochs=50, callbacks=callbacks)

Epoch 1/20


I0000 00:00:1718385888.541237   82804 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1718385894.424228   82805 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7fa4cbf20850>

In [18]:
model.save('ModelCNN.tf')

In [None]:
# best_model = tf.keras.models.load_model('./ModelCNN.tf/')