In [None]:
!python --version
!pip --version

In [None]:
import cv2
import random
import numpy as np
import pandas as pd
from PIL import Image
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

import torch
import torchvision
import torchmetrics
import torch.nn as nn
import torch.nn.functional as F
from torchvision.utils import make_grid
import torchvision.transforms as transforms
from torch.utils.data import random_split, dataloader, TensorDataset

In [None]:
print(torch.__version__)
print(torchvision.__version__)

In [None]:
def get_default_device():
    if torch.backends.mps.is_available():
        return torch.device('mps')
    elif torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [None]:
torch.manual_seed(47)
device = get_default_device()
device

In [None]:
DATASET_PATH = 'ocr_data/data.csv'
INPUT_SIZE = 1024
OUTPUT_SIZE = 46
EPOCHS = 60
MODEL_PATH = f'ocr_trained_model_{datetime.now().date()}.pt'
LR = 0.0015
BATCH_SIZE = 128
PYTORCH_ENABLE_MPS_FALLBACK = 1
print(MODEL_PATH)

In [None]:
df = pd.read_csv(DATASET_PATH)
df.head(10)

In [None]:
df.shape

In [None]:
df.character.value_counts()

In [None]:
df = df.sample(frac=1)
df.tail()

In [None]:
# class_names = np.unique(df.iloc[:, -1].values)
# class_names

In [None]:
X_df = df.iloc[:80000, :-1]
Y_df = df.iloc[:80000, -1]
print(f'X_df shape => {X_df.shape} | Y_df shape => {Y_df.shape}')

In [None]:
X_df.values[0].shape

In [None]:
X_numpy = X_df.values.reshape(X_df.shape[0], 32, 32, 1)
X_numpy.shape

In [None]:
plt.imshow(X_numpy[10].squeeze(), cmap='gray')
plt.axis('off')

In [None]:
def show_examples(data):
    fig = plt.figure(figsize=(20, 10))
    for i in range(25):
        ax = fig.add_subplot(5, 5, i+1)
        ax.imshow(data[random.randint(0, 80000)], cmap='gray')
        plt.axis('off')

In [None]:
show_examples(X_numpy.squeeze())

In [None]:
label_encoder = LabelEncoder()
Y_numpy = label_encoder.fit_transform(Y_df)
class_names = list(label_encoder.classes_)
class_names, len(class_names)

In [None]:
np.unique(Y_numpy)

In [None]:
X_torch = torch.tensor(X_numpy, dtype=torch.float32)
Y_torch = torch.LongTensor(Y_numpy)

In [None]:
X_torch.shape, Y_torch.shape

In [None]:
dataset = TensorDataset(X_torch, Y_torch)
len(dataset)

In [None]:
val_size = 10000
train_size = len(dataset) - val_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(val_ds)

In [None]:
test_df = df.iloc[80000:, :-1]
test_numpy = test_df.values.reshape(test_df.shape[0], 32, 32, 1)
test_torch = torch.tensor(test_numpy, dtype=torch.float32)
test_torch.shape

In [None]:
train_dl = dataloader.DataLoader(
    train_ds,
    BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

val_dl = dataloader.DataLoader(
    val_ds,
    BATCH_SIZE*2,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

test_dl = dataloader.DataLoader(
    test_torch,
    BATCH_SIZE*4,
    shuffle=True,
    num_workers=2,
)

In [None]:
for images, labels in train_dl:
    print(f'Shape of Images: {images.shape}')
    print(f'Shape of Labels: {labels.shape}')
    break

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(
        torch.sum(preds==labels).item()/len(preds)
    )

In [None]:
class OcrModel(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, 512)
        self.linear2 = nn.Linear(512, 256)
        self.linear3 = nn.Linear(256, 128)
        self.linear4 = nn.Linear(128, 64)
        self.linear5 = nn.Linear(64, 46)
        self.dropout = nn.Dropout(0.1)
        self.log_softmax = F.log_softmax
        self.loss = nn.CrossEntropyLoss()
        
    def forward(self, xb):
        xb = xb.view(xb.size(0), -1)
        x = self.dropout(F.relu(self.linear1(xb)))
        x = self.dropout(F.relu(self.linear2(x)))
        x = self.dropout(F.relu(self.linear3(x)))
        x = self.dropout(F.relu(self.linear4(x)))
        return self.log_softmax(self.linear5(x), dim=1)
    
    def training_step(self, batch):
        self.train()
        images, labels = batch
        out = self(images)
        return self.loss(out, labels)
    
    def validation_step(self, batch):
        with torch.no_grad():
            self.eval()
            images, labels = batch
            out = self(images)
            loss = F.cross_entropy(out, labels)
            acc = accuracy(out, labels)
            return {'valid_loss': loss, 'valid_accuracy': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['valid_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['valid_accuracy'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {
            'val_loss': epoch_loss.item(),
            'val_acc': epoch_acc.item()
        }
        
    def evaluate(self, val_loader):
        outputs = [
            self.validation_step(batch) for batch in val_loader
        ]
        return self.validation_epoch_end(outputs)
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

In [None]:
model = OcrModel(INPUT_SIZE, OUTPUT_SIZE)
model

In [None]:
for param in model.parameters():
    print(param.shape)

In [None]:
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)
            
    def __len__(self):
        return len(self.dl)

In [None]:
train_device_loader = DeviceDataLoader(train_dl, device)
val_device_loader = DeviceDataLoader(val_dl, device)
test_device_loader = DeviceDataLoader(test_dl, device)

In [None]:
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.Adam):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        result = model.evaluate(val_device_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = OcrModel(INPUT_SIZE, OUTPUT_SIZE)
model.to(device, non_blocking=True)

In [None]:
history = model.evaluate(val_device_loader)
print(history)

In [None]:
history = fit(EPOCHS, LR, model, train_device_loader, val_device_loader)

In [None]:
torch.save(model.state_dict(), MODEL_PATH)

In [None]:
trained_model = OcrModel(INPUT_SIZE, OUTPUT_SIZE)
trained_model.load_state_dict(torch.load(MODEL_PATH ))
trained_model.to(device)

In [None]:
def test(image, model, class_names):
    model.eval()
    outputs = model(image)
    pred_val, pred_idx = torch.max(outputs, dim=1)
    print(f'predicted class label is: {class_names[pred_idx.item()]}')
    plt.imshow(test_numpy[rand_idx].squeeze(), cmap='gray')
    plt.title(class_names[pred_idx.item()])
    plt.axis('off')

In [None]:
rand_idx = random.randint(0, 12000)
test(test_torch[rand_idx].view(1, 32, 32, 1), trained_model, class_names)

In [None]:
def test_scores(model, val_dl):
    preds = true_labels = None
    for images, labels in val_dl:
        with torch.no_grad():
            model.eval()
            preds = model(images)
            true_labels = labels
            break
    return preds, true_labels

In [None]:
preds, true_labels = test_scores(trained_model, val_device_loader)
conf_mat = torchmetrics.ConfusionMatrix(task='multiclass', num_classes=46)
torch.set_printoptions(threshold=10_000)
conf_mat(preds.cpu(), true_labels.cpu())

In [None]:
def load_test_image(img_path, model, class_names):
    image = cv2.imread(img_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_res = cv2.resize(gray, dsize=(32, 32), interpolation=cv2.INTER_CUBIC)
    gray_res = gray_res.reshape(1, 32, 32, 1)
    image = torch.tensor(gray_res, dtype=torch.float32)
    model.eval()
    outputs = model(image)
    pred_val, pred_idx = torch.max(outputs, dim=1)
    print(f'predicted class label is: {class_names[pred_idx.item()]}')
    plt.imshow(gray_res.reshape(32, 32), cmap='gray')
    plt.title(class_names[pred_idx.item()])
    plt.axis('off')

In [None]:
# load_test_image('test_images/', trained_model, class_names)