In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.ndimage import rotate
from numba import njit
from torch import optim

In [None]:
device = torch.device('cuda')
device

In [None]:
# Define the root directory where the dataset will be downloaded
train_csv_path = 'D://ENGLISH_CHARACTERS//train_info.csv'
test_csv_path = 'D://ENGLISH_CHARACTERS//test_info.csv'
train_images_folder = 'D://ENGLISH_CHARACTERS//train'
test_images_folder = 'D://ENGLISH_CHARACTERS//test'

In [None]:
digits_train_df = pd.read_csv('D://EMNIST//emnist-digits-train.csv', header=None,dtype=np.uint8).sample(30_000)
digits_test_df = pd.read_csv('D://EMNIST//emnist-digits-test.csv', header=None,dtype=np.uint8).sample(5_000)
letters_train_df = pd.read_csv('D://EMNIST//emnist-letters-train.csv', header=None,dtype=np.uint8).sample(30_000)
letters_train_df[0] += 9
letters_test_df = pd.read_csv('D://EMNIST//emnist-letters-test.csv', header=None,dtype=np.uint8).sample(5_000)
letters_test_df[0] += 9

In [None]:
# Create the character to number dictionary
char_to_num = {
    '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
    'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'I': 18,
    'J': 19, 'K': 20, 'L': 21, 'M': 22, 'N': 23, 'O': 24, 'P': 25, 'Q': 26, 'R': 27,
    'S': 28, 'T': 29, 'U': 30, 'V': 31, 'W': 32, 'X': 33, 'Y': 34, 'Z': 35
}

# Create the number to character dictionary by inverting char_to_num
num_to_char = {v: k for k, v in char_to_num.items()}

In [None]:
num_classes = 36
INPUT_SHAPE = (1,28,28)

In [None]:
# Concatenate train DataFrames
train_df = pd.concat([digits_train_df, letters_train_df,extra_train_df], axis=0, ignore_index=True)
# Concatenate test DataFrames
test_df = pd.concat([digits_test_df, letters_test_df], axis=0, ignore_index=True)

In [None]:
# Process training data
x_train = train_df.iloc[:, 1:].to_numpy().astype('float32')
x_train = ((x_train.T - x_train.mean(axis=1)) / x_train.std(axis=1)).T
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28).transpose(0, 1, 3, 2)
y_train = train_df.iloc[:, 0].to_numpy().astype('uint8')

# Process test data
x_test = test_df.iloc[:, 1:].to_numpy().astype('float32')
x_test = ((x_test.T - x_test.mean(axis=1)) / x_test.std(axis=1)).T
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28).transpose(0, 1, 3, 2)
y_test = test_df.iloc[:, 0].to_numpy().astype('uint8')

In [None]:
@njit
def scale_matrix(matrix):
    min_val = np.min(matrix)
    max_val = np.max(matrix)

    if max_val - min_val == 0:
        return np.zeros_like(matrix)

    scaled_matrix = (matrix - min_val) / (max_val - min_val)
    return scaled_matrix

In [None]:
for i in range(len(x_train)):
    x_train[i] = scale_matrix(x_train[i])
for i in range(len(x_test)):
    x_test[i] = scale_matrix(x_test[i])

In [None]:
@njit
def random_switch_pixels(matrix,p=0.02):
    total_pixels = matrix.size
    num_pixels_to_change = int(p * total_pixels)
    indices_to_change = np.random.choice(total_pixels, num_pixels_to_change, replace=False)
    flat_matrix = matrix.flatten()
    flat_matrix[indices_to_change] = np.random.rand(num_pixels_to_change)
    return flat_matrix.reshape(matrix.shape)

In [None]:
@njit
def one_hot_encode_array(indices, num_classes=36):
    n = len(indices)
    one_hot_matrix = np.zeros((num_classes, n), dtype=np.int32)
    for i in range(n):
        one_hot_matrix[indices[i], i] = 1
    return one_hot_matrix

In [None]:
@njit
def one_hot_decode(one_hot_vector):
    index = np.argmax(one_hot_vector)
    return index

In [None]:
def random_rotate_image(matrix, max_degrees):
    # Ensure the input matrix has the correct shape (1,n,n)
    if len(matrix.shape) != 3 or matrix.shape[0] != 1:
        raise ValueError("Input matrix must have shape (1,n,n)")
    
    # Generate a random angle in radians
    angle = np.random.uniform(-max_degrees, max_degrees) * np.pi / 180
    
    # Create rotation matrix
    n = matrix.shape[1]
    cos_theta = np.cos(angle)
    sin_theta = np.sin(angle)
    rotation_matrix = np.array([
        [cos_theta, -sin_theta],
        [sin_theta, cos_theta]
    ])
    
    # Apply rotation to each point
    rotated_matrix = np.zeros_like(matrix)
    for i in range(n):
        for j in range(n):
            x, y = j - (n-1)/2, (n-1)/2 - i  # Translate to origin
            x_rot, y_rot = np.dot(rotation_matrix, [x, y])
            j_rot, i_rot = x_rot + (n-1)/2, (n-1)/2 - y_rot
            
            # Use bilinear interpolation for smoother results
            i_low, j_low = int(np.floor(i_rot)), int(np.floor(j_rot))
            i_high, j_high = i_low + 1, j_low + 1
            
            if 0 <= i_low < n-1 and 0 <= j_low < n-1:
                tl = matrix[0, i_low, j_low]
                tr = matrix[0, i_low, j_high]
                bl = matrix[0, i_high, j_low]
                br = matrix[0, i_high, j_high]
                
                wi = i_rot - i_low
                wj = j_rot - j_low
                
                rotated_matrix[0, i, j] = (
                    (1-wi)*(1-wj)*tl + wi*(1-wj)*bl + 
                    (1-wi)*wj*tr + wi*wj*br
                )
    
    return rotated_matrix

In [None]:
def get_closest_gray(value,gray_shades): 
    distances = np.abs(gray_shades - value)
    min_id = np.argmin(distances)
    return gray_shades[min_id]


def clamp_grayscale_image(arr,gray_shades):
    h, w = arr.shape
    simplified_arr = np.zeros((h, w), dtype=np.float32)
    for i in range(h):
        for j in range(w):
            closest_shade = get_closest_gray(arr[i, j],gray_shades)
            simplified_arr[i, j] = closest_shade
    return simplified_arr

def simplify_grayscale_image(matrix, num_levels=4, remove_outlier_percentage = 0.1, unique = False):
    # Flatten the matrix to find min and max of the specified percentiles
    flattened = matrix.flatten()
    sorted_values = np.sort(flattened)
    if remove_outlier_percentage > 0:
        min_index = int(len(sorted_values) * remove_outlier_percentage)
        max_index = int(len(sorted_values) * (1-remove_outlier_percentage))
        sorted_values = sorted_values[min_index:max_index]
    if unique:
        sorted_values = np.unique(sorted_values)
    # this is done for certain images that have outlier pixels and that is annoying for the algorithm
    min_val = sorted_values[0]
    max_val = sorted_values[-1]
    gray_indices = np.linspace(0,len(sorted_values)-1,num_levels,dtype=int)
    grays = sorted_values[gray_indices]
    return clamp_grayscale_image(matrix,grays)

def keep_highest(matrix):
    simplified = matrix * (matrix == np.max(matrix))
    return scale_matrix(simplified)

In [None]:
@njit
def random_shift_matrix(matrix, max_pixels):
    # Ensure the input matrix has the correct shape (1,n,n)
    if len(matrix.shape) != 3 or matrix.shape[0] != 1:
        raise ValueError("Input matrix must have shape (1,n,n)")
    
    # Generate random shifts for x and y
    dx = np.random.randint(-max_pixels, max_pixels + 1)
    dy = np.random.randint(-max_pixels, max_pixels + 1)
    
    n = matrix.shape[1]
    shifted_matrix = np.zeros_like(matrix)
    
    for i in range(n):
        for j in range(n):
            new_i = i + dy
            new_j = j + dx
            
            if 0 <= new_i < n and 0 <= new_j < n:
                shifted_matrix[0, i, j] = matrix[0, new_i, new_j]
            else:
                # Optional: fill out-of-bounds pixels with a default value (e.g., 0)
                shifted_matrix[0, i, j] = 0
    
    return shifted_matrix

In [None]:
def distort_image(image):
    out = image.copy()
    out = random_switch_pixels(out,0.05)
    out = random_rotate_image(out,45)
    out = scale_matrix(out)
    out = random_shift_matrix(out,3)
    return out

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, n_classes=36):  
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1)
        self.flatten = nn.Flatten()
        
        # Placeholder for fc_input_size
        self.fc_input_size = None
        
        # Placeholder for fc layer
        self.fc = None
        
        # Call method to initialize fc layer
        self._initialize_fc(n_classes)

    def _initialize_fc(self, n_classes):
        sample_input = torch.zeros(1, 1, 28, 28)
        x = F.relu(self.conv1(sample_input))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.flatten(x)
        self.fc_input_size = x.shape[1]
        self.fc = nn.Linear(self.fc_input_size, n_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [None]:
model = CNN().to(device)
#model.load_state_dict(torch.load('./EMNIST_MODEL.pth',weights_only=True))

In [None]:
optimizer = optim.Adam(model.parameters(),lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    batch_size = 128
    loops = len(x_train)//batch_size
    indexes = list(range(len(x_train)))
    print_every = loops//200
    print('running for ', loops, ' rounds')
    for i in range(loops):
        ids = np.random.choice(indexes,size = batch_size)
        indexes = [i for i in indexes if i not in ids]
        inputs, labels = x_train[ids],y_train[ids]
        inputs = np.array([distort_image(im) if np.random.ranf() < 0.4 else im for im in inputs])
        labels = one_hot_encode_array(labels).transpose()
        inputs = torch.Tensor(inputs).to(device)
        labels = torch.Tensor(labels).to(device)
        # zero the parameter gradients
        optimizer.zero_grad()        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if (i+1) % (print_every) == 0:    # print every 2000 mini-batches
            torch.save(model.state_dict(), './EMNIST_MODEL.pth')
            print(f'epoch : {epoch + 1}, mini_batch : [{i+1}/{loops}] ,  loss: {running_loss/print_every:.3f}')
            running_loss = 0.0

In [None]:
# Create a figure with 3 rows and 5 columns (to display 15 images total)
fig, axs = plt.subplots(3, 5, figsize=(15, 9))

for i in range(15):
    idx = np.random.choice(range(len(x_test)))
    im = x_test[idx]
    label = y_test[idx]
    # Move the subplot index to row and column indices
    row = i // 5
    col = i % 5

    # Display the image
    axs[row, col].imshow(im[0], cmap='gray')
    
    # Disable axis for a cleaner view
    axs[row, col].axis('off')

    with torch.no_grad():
        im_tensor = torch.Tensor(im).to(device).unsqueeze(0) # Add batch and channel dimensions
        pred = model(im_tensor)
        pred = pred.cpu().detach().numpy() 
        pred = one_hot_decode(pred[0])  # Assuming one_hot_decode() decodes model output to label
        pred = num_to_char[pred]        # Map the prediction to character
        
        # Set the title of each image with the prediction
        axs[row, col].set_title("pred : " + str(pred))

# Adjust layout so that titles and images don't overlap
plt.tight_layout()
plt.show()