# Handwritten Line Detection with CNN

Prepare Environment
- Git clone etc.
- Download the raw data
- Preprocess the data

In [3]:
%cd drive/MyDrive/PixelwiseCNN/

/content/drive/MyDrive/PixelwiseCNN


In [5]:
%ls

[0m[01;34mdataset[0m/  [01;34mmodels[0m/  [01;34mshrankedDB_output[0m/  [01;34mshrankedDB_output_clipped[0m/  [01;34mweight[0m/


## Common Imports

In [6]:
import os
import re
import cv2
import glob
import random
import numpy as np
import matplotlib.pyplot as plt
from timeit import default_timer as timer
from PIL import Image
from tqdm import tqdm

import torch 
import torch.nn.functional as F
from torch import nn 
from torch import optim 
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

import torchvision 
import torchvision.transforms.functional as TF
from torchvision import transforms
from torchsummary import summary

from models.Unet_model import UnetModel
# from models.Unet_model_clipped import UnetModelClipped

# from torchvision.models.segmentation import deeplabv3_resnet50 as resnet50

Check Cuda Before Computation

In [7]:
print(f'Cuda Available: {torch.cuda.is_available()}')
print(f'{"Cuda Device Name: " + torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "No Cuda Device Found"}')

# CUDA for PyTorch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

Cuda Available: True
Cuda Device Name: Tesla P100-PCIE-16GB


## Hyperparameters and Data Paths

### Hyperparameters

In [8]:
# Hyperparameters
epochs = 40               # 4 predicts well, might be 2. 8 doesn't affect much ~0.5%
batch_size = 16           # 4 is OK, might be 8 (exceed mem.)
batch_extender = False    # Extends the batch so that training process done once in twice -> gives better result
learning_rate = 1e-2      # 1e-3 is OK., 5e-4 also OK. (0.01 -> 0.001 -> 0.0005) LR Scheduler!
dropout_rate = 0.0        # 0.2 is nice with big train data
loss_print_per_epoch = 1  # desired # loss data print per epoch
number_of_classes = 2     # OK.
validation_on = True
scheduler_on = True
sample_view = False
is_saving_output = True

# Hyperparameter Print
print(f'Epoch: {epochs}')
print(f'Dropout Rate: {dropout_rate}')
print(f'Learning Rate: {learning_rate}')
print(f'Batch Size: {batch_size*2 if batch_extender else batch_size} {"(Artifical Batch)" if batch_extender else ""}')

Epoch: 40
Dropout Rate: 0.0
Learning Rate: 0.01
Batch Size: 16 


### Paths

In [9]:
# Image Paths
data_dir = 'dataset'
train_path = data_dir + '/2_train'
test_path = data_dir + '/2_test'
validation_path = data_dir + '/2_validation'

# Trained Model Path
trained_model_path = 'weight/model_check.pt'
os.makedirs(os.path.join(os.getcwd(), trained_model_path.split("/")[0]), exist_ok=True)

## Boilerplate Functions

In [10]:
# Plots the given batch in 3 rows; Raw, Mask, Bitwise_Anded
def plt_images(images, masks):
    fig, axs = plt.subplots(3, batch_size, figsize=(images[0].shape))

    for i in range(len(images)):
        axs[0][i].imshow(images[i], cmap='gray')
        axs[1][i].imshow(masks[i], cmap='gray')
        axs[2][i].imshow(images[i] & masks[i], cmap='gray')
    fig.suptitle("Top Row: raw images, Middle Row: masks, Bottom Row: bitwise_and masks")
    plt.show()


# Returns the images and masks in the original format
def undo_preprocess(images, predicts):
    x = []
    y = []

    images = images.cpu().numpy()
    predicts = predicts.cpu().numpy()

    for index in range(images.shape[0]):
        image = images[index]
        # Needed to convert c,h,w -> h,w,c
        image = np.transpose(image, (1, 2, 0))
        # make every pixel 0-1 range than mul. 255 to scale the value
        image = np.squeeze(image) * 255
        x.append(image.astype(np.uint8))

        predict = predicts[index]
        # Needed to convert c,h,w -> h,w,c
        mask_array = np.transpose(predict, (1, 2, 0))
        # Every pixel has two class grad, so we pick the highest
        mask_array = np.argmax(mask_array, axis=2) * 255
        mask_array = mask_array.astype(np.uint8)
        y.append(mask_array)
    return np.array(x), np.array(y)


# Saves the given batch in directory
def save_output_batch(images, outputs):
    path = os.path.join(os.getcwd(), 'output_batch/')
    os.makedirs(path, exist_ok=True)
    print(f'You can find samples in \'{path}\'')

    for index in range(len(images)):
        image = images[index]
        save_image = Image.fromarray(image)
        save_image.save(path + str(index) + '_input.png')

        mask = outputs[index]
        save_mask = Image.fromarray(mask)
        save_mask.save(path + str(index) + '_output.png')


# Saves the given batch in directory
def save_predictions(images, predictions):
    path = os.path.join(os.getcwd(), 'output/')
    path = os.path.join(path, 'prediction/')
    os.makedirs(path, exist_ok=True)
    for index, prediction in enumerate(predictions):
        save_prediction = Image.fromarray(prediction)
        save_prediction.save(path + str(index) + '_output.png')
    
    for index in range(len(images)):
        save_image = Image.fromarray(images[index])
        save_image.save(path + str(index) + '_input.png')

        save_prediction = Image.fromarray(predictions[index])
        save_prediction.save(path + str(index) + '_output.png')
    print(f'You can find predictions in \'{path}\'')

## Loading Data

In [11]:
# Loads the data from the given path
def load_data(dataset_path):
    forms = []
    masks = []

    form_names = glob.glob(dataset_path + '/form' + '/*.png') # sample path -> './dataset/train/form/*.png'
    form_names.sort(key=lambda f: int(re.sub('\D', '', f))) # Sorts them as 0,1,2..

    mask_names = glob.glob(dataset_path + '/mask' + '/*.png') # sample path -> './dataset/train/mask/*.png'
    mask_names.sort(key=lambda f: int(re.sub('\D', '', f))) # Sorts them as 0,1,2..

    for image_file_name, mask_file_name in tqdm(zip(form_names, mask_names)):
        image = np.asarray(Image.open(image_file_name))
        mask = np.asarray(Image.open(mask_file_name))

        forms.append(image)
        masks.append(mask)
    
    return np.array(forms), np.array(masks)


# Load data from paths
train_images, train_masks = load_data(train_path) 
test_images, test_masks = load_data(test_path) 
validation_images, validation_masks = load_data(validation_path) 

984it [19:29,  1.19s/it]
308it [05:51,  1.14s/it]
247it [04:48,  1.17s/it]


## Handwriting Form Dataset

In [12]:
# DATASET CLASS
class FormDS(Dataset):
    def __init__(self, images, masks, number_of_classes: int, augmentation=False):
        self.images = images
        self.masks = masks
        self.number_of_classes = number_of_classes
        self.length = len(images)
        self.augmentation = augmentation

    # Converts the image, a PIL image, into a PyTorch Tensor
    def transform(self, image, mask):
        # needed to apply transforms
        image = transforms.ToPILImage()(image)
        mask = transforms.ToPILImage()(mask)

        if self.augmentation:
            # Random horizontal flipping
            if random.random() > 0.5:
                image = TF.hflip(image)
                mask = TF.hflip(mask)

            # Random vertical flipping
            if random.random() > 0.5:
                image = TF.vflip(image)
                mask = TF.vflip(mask)
            
        img = TF.to_tensor(np.array(image))
        msk = TF.to_tensor(np.array(mask))
        return img, msk

    def __getitem__(self, idx):
        image = self.images[idx]
        image = image.astype(np.float32)
        image = image / 255  # make pixel values between 0-1

        mask = self.masks[idx]
        mask = mask.astype(np.float32)
        mask = mask / 255   # make pixel values 0-1

        # make each pixel to have either 0 or 1  -> will be deleted because we used Nearest while scaling
        mask[mask > .7] = 1
        mask[mask <= .7] = 0

        image, mask = self.transform(image, mask)

        return image, mask

    def __len__(self):
        return self.length


### Dataloader

In [13]:
train_size = 984

# Train Dataset Loaded to Torch Here
train_dataset = FormDS(train_images[:train_size], train_masks[:train_size], number_of_classes, augmentation=True)
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
print(f'Train DS: Image Size: {len(train_dataset)} with {len(train_data_loader)} total batches')

# Test Dataset Loaded to Torch Here
test_dataset = FormDS(test_images, test_masks, number_of_classes, augmentation=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
print(f'Test  DS: Image Size: {len(test_dataset)} with {len(test_data_loader)} total batches')

# Validation Dataset Loaded to Torch Here
validation_dataset = FormDS(validation_images, validation_masks, number_of_classes, augmentation=True)
validation_data_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
print(f'Valid DS: Image Size: {len(validation_dataset)} with {len(validation_data_loader)} total batches\n')

Train DS: Image Size: 984 with 62 total batches
Test  DS: Image Size: 308 with 20 total batches
Valid DS: Image Size: 247 with 16 total batches



## Validation Function

In [14]:
# Validation Method for the model
def validation(validation_data_loader, device, criterion, model):
    val_loss = 0
    correct_pixel = 0
    total_pixel = 0

    for images, masks in validation_data_loader:
        images = images.to(device)
        masks = masks.type(torch.LongTensor)
        masks = masks.reshape(masks.shape[0], masks.shape[2], masks.shape[3])
        masks = masks.to(device)
        
        outputs = model(images)
        val_loss += criterion(outputs, masks).item()

        _, predicted = torch.max(outputs.data, 1)
        correct_pixel += (predicted == masks).sum().item()
        
        b, h, w = masks.shape
        batch_total_pixel = b * h * w
        
        total_pixel += batch_total_pixel

    acc = correct_pixel/total_pixel
    return val_loss, acc

## TRAIN

### Network Model

In [15]:
model = UnetModel(number_of_classes, dropout_rate).to(device)

In [16]:
summary(model, input_size=(1, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             320
       BatchNorm2d-2         [-1, 32, 256, 256]              64
              ReLU-3         [-1, 32, 256, 256]               0
         Dropout2d-4         [-1, 32, 256, 256]               0
            Conv2d-5         [-1, 32, 256, 256]           9,248
       BatchNorm2d-6         [-1, 32, 256, 256]              64
              ReLU-7         [-1, 32, 256, 256]               0
         MaxPool2d-8         [-1, 32, 128, 128]               0
            Conv2d-9         [-1, 64, 128, 128]          18,496
      BatchNorm2d-10         [-1, 64, 128, 128]             128
             ReLU-11         [-1, 64, 128, 128]               0
        Dropout2d-12         [-1, 64, 128, 128]               0
           Conv2d-13         [-1, 64, 128, 128]          36,928
      BatchNorm2d-14         [-1, 64, 1

### Loss and Optimizer of the Model

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Learning Rate Scheduler

In [18]:
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.2, patience=6, verbose=True) # Works better and easier
# scheduler = StepLR(optimizer, step_size=2, gamma=0.2)

### **Code**

In [19]:
# Training of the Model
total_steps = len(train_data_loader)
print(f"{epochs} Epochs & {total_steps} Total Steps per Epoch")

start_time = timer()
print(f'Training Started in {start_time} sec.')
model.train()

batch_step = 0
for epoch in range(epochs):
    for i, (images, masks) in enumerate(train_data_loader, 1):
        images = images.to(device)  # Sends to GPU
        masks = masks.type(torch.LongTensor)
        masks = masks.reshape(masks.shape[0], masks.shape[2], masks.shape[3])
        masks = masks.to(device)    # Sends to GPU

        # Forward pass
        predicts = model(images)
        loss = criterion(predicts, masks)

        # This doubles our batch size
        if batch_extender:
            if batch_step == 0:
                optimizer.zero_grad()
                loss.backward()
                batch_step = 1
            elif batch_step == 1:
                loss.backward()
                optimizer.step()
                batch_step = 0
        else:
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if i % int(total_steps/loss_print_per_epoch) == 0:
            if validation_on:
                acc = 0
                # Validation Part
                model.eval()
                with torch.no_grad():
                    validation_loss, validation_accuracy = validation(validation_data_loader, device, criterion, model)
                model.train()
            valstr = f'\tValid. Loss: {(validation_loss/len(validation_data_loader)):.4f}\tValid. Acc.: {validation_accuracy * 100:.3f}%' if validation_on else ''
            print(f'Epoch: {epoch + 1}/{epochs}\tSt: {i}/{total_steps}\tLast.Loss: {loss.item():4f}{valstr}')
    if scheduler_on:
        scheduler.step(acc) # -> ReduceLROnPlateau

print('Execution time:', '{:5.2f}'.format(timer() - start_time), 'seconds')

40 Epochs & 62 Total Steps per Epoch
Training Started in 2415.564549072 sec.
Epoch: 1/40	St: 62/62	Last.Loss: 0.239379	Valid. Loss: 0.2271	Valid. Acc.: 93.445%
Epoch: 2/40	St: 62/62	Last.Loss: 0.179410	Valid. Loss: 0.3607	Valid. Acc.: 86.899%
Epoch: 3/40	St: 62/62	Last.Loss: 0.135170	Valid. Loss: 0.1483	Valid. Acc.: 94.071%
Epoch: 4/40	St: 62/62	Last.Loss: 0.128391	Valid. Loss: 0.1702	Valid. Acc.: 93.152%
Epoch: 5/40	St: 62/62	Last.Loss: 0.106309	Valid. Loss: 0.1238	Valid. Acc.: 95.064%
Epoch: 6/40	St: 62/62	Last.Loss: 0.110958	Valid. Loss: 0.1233	Valid. Acc.: 95.125%
Epoch: 7/40	St: 62/62	Last.Loss: 0.101182	Valid. Loss: 0.1248	Valid. Acc.: 94.673%
Epoch: 8/40	St: 62/62	Last.Loss: 0.098810	Valid. Loss: 0.1442	Valid. Acc.: 94.191%
Epoch     8: reducing learning rate of group 0 to 2.0000e-03.
Epoch: 9/40	St: 62/62	Last.Loss: 0.080561	Valid. Loss: 0.1199	Valid. Acc.: 95.079%
Epoch: 10/40	St: 62/62	Last.Loss: 0.114220	Valid. Loss: 0.1022	Valid. Acc.: 95.833%
Epoch: 11/40	St: 62/62	Last.Lo

In [20]:
# Save the model
torch.save(model.state_dict(), trained_model_path)

In [21]:
# Restore the model from "model_check.pt"
model = UnetModel(number_of_classes, dropout_rate).to(device)

# Load to CPU. Later it can be moved to GPU as needed
model.load_state_dict(torch.load(trained_model_path, map_location=torch.device('cpu')))

<All keys matched successfully>

## TEST

In [22]:
all_forms = []
all_predictions = []
view_count = 0
# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():  # used for dropout layers
    correct_pixel = 0
    total_pixel = 0
    for images, masks in test_data_loader:
        images = images.to(device)
        masks = masks.type(torch.LongTensor)
        # delete color channel to compare directly with prediction
        masks = masks.reshape(masks.shape[0], masks.shape[2], masks.shape[3])
        masks = masks.to(device)

        predicts = model(images)
        _, predicted = torch.max(predicts.data, 1)
        correct_pixel += (predicted == masks).sum().item()

        b, h, w = masks.shape
        batch_total_pixel = b * h * w
        total_pixel += batch_total_pixel
        

        # if pre-set addes images to list
        if is_saving_output:
            af, ap = undo_preprocess(images, predicts)
            all_forms.extend(af)
            all_predictions.extend(ap)


        # To observe random batch prediction uncomment!
        if sample_view and view_count < 10 and random.random() > 0.5:
            view_count += 1
            images, masks = undo_preprocess(images, predicts)
            plt_images(images, masks)

    print(f"{correct_pixel} / {total_pixel}")
    print(f"Test Accuracy on the model with {len(test_data_loader) * batch_size} images: {100 * correct_pixel / total_pixel:.4f}%")


19571625 / 20185088
Test Accuracy on the model with 320 images: 96.9608%


## View

In [23]:
# Saves the output
if is_saving_output:
    save_predictions(np.array(all_forms), np.array(all_predictions))

# # Gets the images and their predicted masks in normalized
# images, masks = undo_preprocess(images, predicts)

# # Showing last batch as sample
# plt_images(images, masks)

# # Saves the last batch as sample as input and output images
# # save_output_batch(images, masks)

print("Program Finished!")

You can find predictions in '/content/drive/My Drive/PixelwiseCNN/output/prediction/'
Program Finished!


## Post Process

In [24]:
# %shell python boundingbox.py

'Boundingbox.py' Started!
100% 308/308 [00:02<00:00, 150.25it/s]
Program Finished!


