In [1]:
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageEnhance
import random

In [2]:
# Image dimensions
img_height, img_width = 45, 60  # Increase resolution

In [3]:
def augment_image(img):
    if random.random() < 0.5:
        img = img.rotate(random.uniform(-15, 15))  # Random rotation  
    if random.random() < 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)  # Horizontal flip  
    if random.random() < 0.5:
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(random.uniform(0.8, 1.2))  # Random contrast  
    if random.random() < 0.5:
        img = img.resize((int(img.width * random.uniform(0.9, 1.1)), 
                          int(img.height * random.uniform(0.9, 1.1))))
    if random.random() < 0.5:
        img = img.transform(img.size, Image.AFFINE, (1, 0, random.uniform(-2, 2), 0, 1, random.uniform(-2, 2)))  
    return img  

In [4]:
def load_csv(csv_path):
    df = pd.read_csv(csv_path)
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)  # Shuffle
    split = int(len(df) * 0.8)
    return df[:split], df[split:]

def load_images(dataframe, img_folder, img_height, img_width, augment=False):
    X_data, Y_data = [], []
    
    for _, row in dataframe.iterrows():
        img_path = os.path.join(img_folder, row["image"])
        try:
            img = Image.open(img_path).convert("L").resize((img_width, img_height), Image.LANCZOS)
            if augment:
                img = augment_image(img)
            
            img_array = np.array(img).flatten()
            
            if img_array.shape[0] != img_height * img_width:  # Check if resizing failed
                print(f"Skipping {img_path}: Unexpected shape {img_array.shape}")
                continue
            
            X_data.append(img_array)
            Y_data.append(row["label"])
        except Exception as e:
            print(f"Skipping {img_path} due to error: {e}")

    print(f"Final dataset size: {len(X_data)} images")  # Debugging

    X_data = np.array(X_data, dtype=np.float32)  # Ensure uniform dtype
    return X_data.T / 255.0, np.array(Y_data).reshape(1, -1)



In [5]:

def one_hot_encode(labels, num_classes):
    labels = np.array(labels, dtype=int)  # Ensure int type
    one_hot = np.zeros((num_classes, len(labels)))
    one_hot[labels, np.arange(len(labels))] = 1
    return one_hot


In [6]:
def initialize_parameters(layer_dims):
    np.random.seed(42)
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters[f"W{l}"] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2 / layer_dims[l-1])
        parameters[f"b{l}"] = np.zeros((layer_dims[l], 1))
        parameters[f"gamma{l}"] = np.ones((layer_dims[l], 1))
        parameters[f"beta{l}"] = np.zeros((layer_dims[l], 1))
    return parameters


In [7]:
def softmax(Z):
    Z_shifted = Z - np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(Z_shifted)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

def relu(Z):
    return np.maximum(0, Z)

def batch_norm(Z, gamma, beta):
    mu = np.mean(Z, axis=1, keepdims=True)
    sigma2 = np.var(Z, axis=1, keepdims=True) + 1e-8
    Z_norm = (Z - mu) / np.sqrt(sigma2)
    return gamma * Z_norm + beta, Z_norm  # Return normalized Z

def dropout(A, keep_prob):
    D = (np.random.rand(*A.shape) < keep_prob).astype(np.float32)
    A = A * D / keep_prob
    return A, D

def forward_propagation(X, parameters, keep_prob=0.8):
    caches = {"A0": X}
    L = len(parameters) // 4
    for l in range(1, L):  # Stops before the last layer
        Z = np.dot(parameters[f"W{l}"], caches[f"A{l-1}"]) + parameters[f"b{l}"]
        A = relu(Z)
        Z_bn, Z_norm = batch_norm(A, parameters[f"gamma{l}"], parameters[f"beta{l}"])
        A_dropout, D = dropout(Z_bn, keep_prob)  # Apply dropout  
        caches[f"Z{l}"], caches[f"A{l}"], caches[f"Z_norm{l}"], caches[f"D{l}"] = Z_bn, A_dropout, Z_norm, D  

    ZL = np.dot(parameters[f"W{L}"], caches[f"A{L-1}"]) + parameters[f"b{L}"]
    AL = softmax(ZL)
    caches[f"Z{L}"], caches[f"A{L}"] = ZL, AL
    return AL, caches


In [8]:
def compute_loss(A, Y, parameters, lambd=0.001):
    m = Y.shape[1]
    L = len(parameters) // 4
    cross_entropy_loss = -np.sum(Y * np.log(A + 1e-8)) / m
    L2_reg = lambd / (2 * m) * sum(np.sum(np.square(parameters[f"W{l}"])) for l in range(1, L+1))
    return cross_entropy_loss + L2_reg


In [9]:
def backward_propagation(Y, parameters, caches):
    grads = {}
    L = len(parameters) // 4
    m = Y.shape[1]
    
    dZL = caches[f"A{L}"] - Y
    grads[f"dW{L}"] = np.dot(dZL, caches[f"A{L-1}"].T) / m
    grads[f"db{L}"] = np.sum(dZL, axis=1, keepdims=True) / m

    for l in reversed(range(1, L)):  
        dA = np.dot(parameters[f"W{l+1}"].T, dZL)
        dZ = dA * (caches[f"Z{l}"] > 0)
        
        Z_norm = caches[f"Z_norm{l}"]
        grads[f"dgamma{l}"] = np.sum(dZ * Z_norm, axis=1, keepdims=True) / m
        grads[f"dbeta{l}"] = np.sum(dZ, axis=1, keepdims=True) / m

        grads[f"dW{l}"] = np.dot(dZ, caches[f"A{l-1}"].T) / m
        grads[f"db{l}"] = np.sum(dZ, axis=1, keepdims=True) / m

        dZL = dZ

    return grads



In [10]:
def update_parameters_adam(parameters, grads, v, s, t, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8, decay=0.01):
    L = len(parameters) // 4
    lr_t = lr / (1 + decay * t)
    
    for l in range(1, L + 1):
        for param in [f"W{l}", f"b{l}"]:
            v[param] = beta1 * v[param] + (1 - beta1) * grads[f"d{param}"]
            s[param] = beta2 * s[param] + (1 - beta2) * (grads[f"d{param}"] ** 2)
            v_corrected = v[param] / (1 - beta1 ** t)
            s_corrected = s[param] / (1 - beta2 ** t)
            parameters[param] -= lr_t * v_corrected / (np.sqrt(s_corrected) + epsilon)

    for l in range(1, L):  # Exclude last layer from batch norm updates
        for param in [f"gamma{l}", f"beta{l}"]:
            v[param] = beta1 * v[param] + (1 - beta1) * grads[f"d{param}"]
            s[param] = beta2 * s[param] + (1 - beta2) * (grads[f"d{param}"] ** 2)
            v_corrected = v[param] / (1 - beta1 ** t)
            s_corrected = s[param] / (1 - beta2 ** t)
            parameters[param] -= lr_t * v_corrected / (np.sqrt(s_corrected) + epsilon)

    return parameters, v, s



In [11]:
def create_minibatches(X, Y, batch_size):
    num_samples = X.shape[1]
    indices = np.random.permutation(num_samples)
    X_shuffled, Y_shuffled = X[:, indices], Y[:, indices]
    return [(X_shuffled[:, i:i+batch_size], Y_shuffled[:, i:i+batch_size]) for i in range(0, num_samples, batch_size)]



In [12]:
def train_model(X_train, Y_train, layer_dims, batch_size, epochs, lr=0.01):
    parameters = initialize_parameters(layer_dims)
    v = {key: np.zeros_like(val) for key, val in parameters.items()}
    s = {key: np.zeros_like(val) for key, val in parameters.items()}
    t = 0

    for epoch in range(epochs):
        minibatches = create_minibatches(X_train, Y_train, batch_size)
        total_loss = 0

        for X_batch, Y_batch in minibatches:
            t += 1
            AL, caches = forward_propagation(X_batch, parameters)
            loss = compute_loss(AL, Y_batch, parameters)
            grads = backward_propagation(Y_batch, parameters, caches)
            parameters, v, s = update_parameters_adam(parameters, grads, v, s, t, lr)

            total_loss += loss

        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss / len(minibatches):.4f}")

    return parameters


In [13]:
def predict(parameters, X):
    AL, _ = forward_propagation(X, parameters)
    return np.argmax(AL, axis=0)

def compute_accuracy(parameters, X, Y):
    predictions = predict(parameters, X)
    true_labels = np.argmax(Y, axis=0)
    return np.mean(predictions == true_labels) * 100


In [14]:
csv_path = "C:/Users/Bhanu Agrawal/Documents/GitHub/Handwriting-Recognition/English Handwritten Characters - Dhruvil Dave/english.csv"
img_folder = "C:/Users/Bhanu Agrawal/Documents/GitHub/Handwriting-Recognition/English Handwritten Characters - Dhruvil Dave/"


train_df, test_df = load_csv(csv_path)
train_X, train_Y_labels = load_images(train_df, img_folder, img_height, img_width, augment=False)
test_X, test_Y_labels = load_images(test_df, img_folder, img_height, img_width, augment=False)


Final dataset size: 2728 images
Final dataset size: 682 images


In [15]:
# Create label-to-index mapping
unique_labels = sorted(train_df["label"].unique())
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}

# Convert labels
train_Y_labels = np.asarray([label_to_index[label] for label in train_df["label"]])
test_Y_labels = np.asarray([label_to_index[label] for label in test_df["label"]])

# One-hot encode
num_classes = len(unique_labels)
train_Y = one_hot_encode(train_Y_labels.flatten(), num_classes)
test_Y = one_hot_encode(test_Y_labels.flatten(), num_classes)

In [16]:
layer_dims = [train_X.shape[0], 64, 32, num_classes]
parameters = train_model(train_X, train_Y, layer_dims, batch_size=64, epochs=1000, lr=0.0001)
print(f"Test Accuracy: {compute_accuracy(parameters, test_X, test_Y):.2f}%")

Epoch 1/1000 - Loss: 5.2801
Epoch 2/1000 - Loss: 4.4394
Epoch 3/1000 - Loss: 4.1404
Epoch 4/1000 - Loss: 4.1343
Epoch 5/1000 - Loss: 4.1329
Epoch 6/1000 - Loss: 4.1368
Epoch 7/1000 - Loss: 4.1336
Epoch 8/1000 - Loss: 4.1325
Epoch 9/1000 - Loss: 4.1328
Epoch 10/1000 - Loss: 4.1319
Epoch 11/1000 - Loss: 4.1315
Epoch 12/1000 - Loss: 4.1307
Epoch 13/1000 - Loss: 4.1304
Epoch 14/1000 - Loss: 4.1338
Epoch 15/1000 - Loss: 4.1320
Epoch 16/1000 - Loss: 4.1293
Epoch 17/1000 - Loss: 4.1291
Epoch 18/1000 - Loss: 4.1298
Epoch 19/1000 - Loss: 4.1310
Epoch 20/1000 - Loss: 4.1312
Epoch 21/1000 - Loss: 4.1302
Epoch 22/1000 - Loss: 4.1306
Epoch 23/1000 - Loss: 4.1286
Epoch 24/1000 - Loss: 4.1305
Epoch 25/1000 - Loss: 4.1295
Epoch 26/1000 - Loss: 4.1308
Epoch 27/1000 - Loss: 4.1299
Epoch 28/1000 - Loss: 4.1290
Epoch 29/1000 - Loss: 4.1298
Epoch 30/1000 - Loss: 4.1300
Epoch 31/1000 - Loss: 4.1293
Epoch 32/1000 - Loss: 4.1306
Epoch 33/1000 - Loss: 4.1298
Epoch 34/1000 - Loss: 4.1306
Epoch 35/1000 - Loss: 4