In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from PIL import Image

import random
random.seed = 69

In [2]:
input_size = 128
root_dir = "/home/ubuntu/Arrun/Combined_AnnotatedOCRText/"
img_dir = os.path.join(root_dir,"images_SR")
segmap_img_dir = os.path.join(root_dir,"segmaps")

In [3]:
# defining other metrics:
def psnr(y_true,y_pred):
    return tf.image.psnr(y_true,y_pred,1.0)
def ssim(y_true,y_pred):
    return tf.image.ssim(y_true,y_pred,1.0)

In [4]:
from tensorflow.keras.utils import to_categorical

2023-09-05 15:37:36.976315: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-05 15:37:37.029107: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
def get_randomized_filenames(directory_path):
    filenames = os.listdir(directory_path)
    random.shuffle(filenames)
    return filenames

# Example usage:
directory_path = root_dir+"/images_SR"
randomized_filenames = get_randomized_filenames(directory_path)

split_index = len(randomized_filenames)*9//10
train_filenames, val_filenames = randomized_filenames[:split_index],randomized_filenames[split_index:]

In [6]:
# load and prepare training images
def load_images(filenames, batch_size, batch_number):
    os.chdir(img_dir)
    in_img = []
        
    for i in filenames[batch_size*batch_number:batch_size*(batch_number+1)]:
        if(i.endswith('.png')):
            in_img.append(cv2.resize(cv2.imread(i),(input_size,input_size))[:,:,::-1]/255)
            
    return np.array(in_img, dtype='float32')

In [7]:
# load and prepare training images
def load_segmasks(filenames,batch_size,batch_number):
    segmasks = []
    
    new_filenames = [f'segmap_{i.split("_")[1]}' for i in filenames[batch_size*batch_number:batch_size*(batch_number+1)]]
    os.chdir(segmap_img_dir)
        
    for i in new_filenames:
        if(i.endswith('.png')):
            segmasks.append(cv2.resize(cv2.imread(i,cv2.IMREAD_GRAYSCALE),(input_size,input_size))/255)
    
    return np.array(to_categorical(segmasks,num_classes = 2), dtype='float32')

In [None]:
batch_number = 3

batch_size = 32
train_dataset = [load_images(train_filenames,batch_size,batch_number),load_segmasks(train_filenames,batch_size,batch_number)]
for i in range(batch_size):
    plt.figure(figsize = (18,12))
    plt.subplot(1,3,1).imshow(train_dataset[0][i])
    plt.axis('off')
    plt.subplot(1,3,2).imshow(np.argmax(train_dataset[1][i],axis = 2),cmap = 'gray')
    plt.axis('off')
    plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

In [None]:
def unet(input_shape, num_classes):
    inputs = Input(input_shape)

    # Encoder
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    # Middle
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)

    # Decoder
    up5 = UpSampling2D(size=(2, 2))(conv4)
    up5 = Conv2D(256, 2, activation='relu', padding='same')(up5)
    merge5 = concatenate([conv3, up5], axis=3)  # Concatenate before and after
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(merge5)
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(conv5)

    up6 = UpSampling2D(size=(2, 2))(conv5)
    up6 = Conv2D(128, 2, activation='relu', padding='same')(up6)
    merge6 = concatenate([conv2, up6], axis=3)  # Concatenate before and after
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(merge6)
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(conv6)

    up7 = UpSampling2D(size=(2, 2))(conv6)
    up7 = Conv2D(64, 2, activation='relu', padding='same')(up7)
    merge7 = concatenate([conv1, up7], axis=3)  # Concatenate before and after
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(merge7)
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(conv7)

    outputs = Conv2D(num_classes, 1, activation='softmax')(conv7)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Define input shape and number of classes
input_shape = (input_size, input_size, 3)  # Example input shape
num_classes = 2  # Example number of classes

# Create the U-Net model
model = unet(input_shape, num_classes)

# Compile the model
model.compile(optimizer='adamax', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
def dice_coefficient(y_true, y_pred):
    intersection = tf.reduce_sum(y_true * y_pred, axis=(0, 1, 2))
    union = tf.reduce_sum(y_true, axis=(0, 1, 2)) + tf.reduce_sum(y_pred, axis=(0, 1, 2))
    dice = (2.0 * intersection + 1e-7) / (union + 1e-7)
    mean_dice = tf.reduce_mean(dice[:num_classes])
    return mean_dice

In [None]:
def compute_iou(mask1, mask2):
    # Compute the intersection
    intersection = np.sum(np.logical_and(mask1, mask2))

    # Compute the union
    union = np.sum(np.logical_or(mask1, mask2))

    # Calculate the IoU
    iou = intersection / (union+1e-8)
    return iou

In [13]:
count_train_images = len(train_filenames)
count_val_images = len(val_filenames)
factor = 2
val_batch_size = count_val_images//factor
batch_size = 100
num_visits = batch_size*50
batch_number = 1
max_batch_number = count_train_images//batch_size - 1
total_train_epochs = max_batch_number*num_visits

print(f"Total Train Images = {count_train_images}, \nTotal Val Images = {count_val_images}, \nVal_batch_size : {val_batch_size}, \nBatch_size : {batch_size},\nNum_visits : {num_visits}\nTotal_train_epochs : {total_train_epochs},\nStart_batch_number : {batch_number},\nMax_batch_number : {max_batch_number}")

Total Train Images = 1513, 
Total Val Images = 169, 
Val_batch_size : 84, 
Batch_size : 100,
Num_visits : 5000
Total_train_epochs : 70000,
Start_batch_number : 1,
Max_batch_number : 14


In [14]:
import gc

In [29]:
test_imgdir = "/home/ubuntu/Arrun/OCRTestData/"

In [41]:
os.chdir(test_imgdir)
test_img = []
test_filenames = os.listdir(test_imgdir)
for i in test_filenames:
    if(i.endswith('.png')):
        test_img.append(cv2.resize(cv2.imread(i),(input_size,input_size))[:,:,::-1]/255)

test_img = np.array(test_img, dtype='float32')

In [None]:
max_val_dice = 0

for i in range(total_train_epochs):
    print(f"Iteration {i}")
    train_dataset = [load_images(train_filenames,batch_size,batch_number),load_segmasks(train_filenames,batch_size,batch_number)]
    print(f"Training on Batch {batch_number}")
    print("Loaded", np.array(train_dataset[0]).shape, np.array(train_dataset[1]).shape)
    model.fit(np.array(train_dataset[0]),np.array(train_dataset[1]),batch_size = 1, epochs = 1)
    
    del train_dataset
    gc.collect()
    
    if(batch_number == max_batch_number):
        batch_number = 1
    else:
        batch_number += 1
        
        
    if(i % max_batch_number == 0 and i!= 0):
        print(f"{i} epochs complete")
        val_iou = []
        #val_class_iou = []
        mean_val_dice = 0
        for v in range(factor):
            validation_dataset = [load_images(val_filenames,val_batch_size,v),load_segmasks(val_filenames,val_batch_size,v)]
            val_preds = model.predict(validation_dataset[0],batch_size = 1)
            for j in range(len(val_preds)):
                val_iou.append(compute_iou(np.argmax(val_preds[j],axis = 2),np.argmax(validation_dataset[1][j],axis = 2)))
                #val_class_iou.append(class_wise_iou(to_categorical(np.argmax(val_preds[j],axis = 2),num_classes = num_classes),validation_dataset[1][j]))
            mean_val_dice = (v*mean_val_dice + np.array(dice_coefficient(val_preds, validation_dataset[1]).cpu()))/(v+1)
            del val_preds
            gc.collect()    

        mean_val_iou = np.mean(np.array(np.nan_to_num(val_iou, nan=0)))
        #mean_val_class_iou = np.mean(np.array(np.nan_to_num(val_class_iou, nan=0)))
        
        print(f"Val IoU = {mean_val_iou}")
        #print(f"Val Class_IoU = {mean_val_class_iou}")
        print(f"Val DICE = {mean_val_dice}")
        if(mean_val_dice>max_val_dice):
            max_val_dice = mean_val_dice
            print("New Max DICE coeff created!")
            model.save('/home/ubuntu/Arrun/UNet_CombData_SDSR_OCR_Binarization_lossBCE_valDICE.h5')
        else:
            print(f"Earlier max dice was {max_val_dice}")
   
        del validation_dataset, val_iou
        gc.collect()