# Handwritten Equation solver using CNN
### **Nitish M. Satheesh**

`Python version 3.9.0`

### Imports & stuff

In [7]:
import json
import os
import cv2
import numpy as np
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input # type: ignore
from tensorflow.keras.models import Model # type: ignore
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
from tqdm import tqdm


In [8]:
# Path to dataset
BASE_PATH = './aida'


### Data Preprocessing
**Load and Parse JSON Annotations to extract the annotations**

In [9]:
def load_annotations(batch_num):
    json_path = os.path.join(BASE_PATH, f'batch_{batch_num}', 'JSON', f'kaggle_data_{batch_num}.json')
    with open(json_path, 'r') as file:
        data = json.load(file)
    print(f"  Loaded annotations 'JSON\\kaggle_data_{batch_num}.json'")
    return data


**Load Images**

Next, load the images from the `background_images` folder.

In [10]:
def load_images(batch_num):
    image_folder = os.path.join(BASE_PATH, f'batch_{batch_num}', 'background_images')
    images = []
    image_paths = []
    print(f"\n  Loading images from 'batch_{batch_num}\\background_images'", end="\n  ")
    for filename in tqdm(os.listdir(image_folder)):
        img_path = os.path.join(image_folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            images.append(img)
            image_paths.append(img_path)
    print(f"  Loaded {len(image_paths)} images.")
    return images, image_paths


**Preprocess Images**

Resize and normalize the images to fit the input requirements of MobileNet.

In [11]:
def preprocess_images(images, target_size=(224, 224)):
    processed_images = []
    print("\n  Preprocessing images", end="\n  ")
    for img in tqdm(images):
        img_resized = cv2.resize(img, target_size)
        img_preprocessed = preprocess_input(img_resized)
        processed_images.append(img_preprocessed)
    print("  Preprocessing images done.")
    return np.array(processed_images)


### Data Preparation

In [15]:
# def prepare_data(batch_nums):
#     all_images = []
#     all_labels = []
#     all_image_paths = []

#     for batch_num in batch_nums:
#         annotations = load_annotations(batch_num)
#         images, image_paths = load_images(batch_num)
        
#         for img, img_path in zip(images, image_paths):
#             img_filename = os.path.basename(img_path)
#             if img_filename in annotations:
#                 all_images.append(img)
#                 all_labels.append(annotations[img_filename]['label'])
#                 all_image_paths.append(img_path)

#     processed_images = preprocess_images(all_images)
#     return processed_images, all_labels, all_image_paths

def prepare_data(batch_nums):
    all_images = []
    all_labels = []
    all_image_paths = []

    for batch_num in batch_nums:
        print(f"\n[~] Processing batch {batch_num}")
        annotations = load_annotations(batch_num)
        images, image_paths = load_images(batch_num)
        
        print(f" [*] Found {len(images)} images and {len(annotations)} annotations")
        
        # Create a dictionary mapping filenames to annotations for faster lookup
        annotation_dict = {item['filename']: item for item in annotations}
        
        # Print first 5 image filenames and first 3 annotation filenames
        print(f"  Sample image filenames: {[os.path.basename(path) for path in image_paths[:3]]}")
        print(f"  Sample annotation filenames: {[item['filename'] for item in annotations[:3]]}")
        
        matched_count = 0
        unmatched_count = 0
        for img, img_path in tqdm(zip(images, image_paths)):
            img_filename = os.path.basename(img_path)
            if img_filename in annotation_dict:
                latex = annotation_dict[img_filename].get('latex')
                if latex:  # Check if latex is not None or empty string
                    all_images.append(img)
                    all_labels.append(latex)
                    all_image_paths.append(img_path)
                    matched_count += 1
                else:
                    unmatched_count += 1
                    print(f"[!] Image {img_filename} found in annotations but has no latex")
            else:
                unmatched_count += 1
                if unmatched_count <= 5:  # Print only first 5 unmatched files to avoid cluttering the output
                    print(f"[!] Image {img_filename} not found in annotations")
        
        print(f"  Matched {matched_count} images with annotations")
        print(f"  Unmatched {unmatched_count} images")

    print(f"[*] Total processed images: {len(all_images)}")
    print(f"[*] Total labels: {len(all_labels)}")
    print(f"[*] Sample labels: {all_labels[:5]}")  # Print first 5 labels for inspection

    if not all_labels:
        raise ValueError("[!] No valid labels found in the dataset")

    processed_images = preprocess_images(all_images)
    return processed_images, all_labels, all_image_paths


In [13]:
# FIXME: Change range to (1, 11). Using only batch 1 for now
BATCH_NUMS = range(1, 2)

# Prepare data
X, y, image_paths = prepare_data(BATCH_NUMS)

print()
print(f"[INFO] Shape of X: {X.shape}")
print(f"[INFO] Length of y: {len(y)}")

# Get unique labels and create a label-to-index mapping
unique_labels = list(set(y))
print(f"[INFO] Number of unique labels: {len(unique_labels)}")
print(f"[INFO] Sample unique labels: {unique_labels[:5]}")  # Print first 5 unique labels

label_to_index = {label: index for index, label in enumerate(unique_labels)}

# Convert string labels to indices
y_indices = [label_to_index[label] for label in y]

print(f"[INFO] Sample y_indices: {y_indices[:5]}")  # Print first 5 indices

# Convert to one-hot encoding
num_classes = len(unique_labels)
y_one_hot = to_categorical(y_indices, num_classes=num_classes)

[---] Processing batch 1
 [*] Loaded annotations 'JSON\kaggle_data_1.json'
 [---] Loading images from 'batch_1\background_images'


100%|██████████| 10000/10000 [01:36<00:00, 103.52it/s]


 [*] Loaded 10000 images.
 [*] Found 10000 images and 10000 annotations
 [*] Sample image filenames: ['00063690-954d-42e7-86eb-434d9416ead3.jpg', '00095b3c-cd87-4326-addc-9225a6042407.jpg', '000f3f60-c994-4df4-81ca-0c10e3feb67c.jpg']
 [*] Sample annotation filenames: ['bd85ee85-2549-4539-9cd9-122d0ea3dca2.jpg', 'e97b0b1f-08bf-4c2b-86cb-f264af2160df.jpg', '3c72e1a1-c1de-4d67-a0ad-9f7afb6cee01.jpg']


10000it [00:00, 90183.41it/s]


  [>] Matched 10000 images with annotations
  [>] Unmatched 0 images
[*] Total processed images: 10000
[*] Total labels: 10000
[*] Sample labels: ['\\lim_{v\\to3}\\frac{\\frac{d}{dv}\\left(e^{v}+-2\\sin{v}-9\\right)}{\\frac{d}{dv}\\left(v^{1}+1v^{9}+98v^{4}\\right)}', '\\lim_{c\\to\\frac{\\pi}{3}}\\frac{\\cos^{9}{c}+\\tan^{3}{c}}{3}', '\\lim_{a\\to\\infty}\\frac{9}{a^{-2}}\\left(\\frac{9}{a}+-9\\sin{\\frac{5}{a}}\\right)', '\\lim_{\\theta\\to\\pi/3}\\frac{9\\cos^{8}{\\theta}+8\\cos^{4}{\\theta}}{9}', '\\ln{w}=\\lim_{v\\to2^{+}}v\\left|\\ln{v}\\right|']
[---] Preprocessing images


100%|██████████| 10000/10000 [02:01<00:00, 81.97it/s]


[*] Preprocessing images done.
[INFO] Shape of X: (10000, 224, 224, 3)
[INFO] Length of y: 10000
[INFO] Number of unique labels: 9997
[INFO] Sample unique labels: ['\\lim_{t\\to7^{+}}\\frac{\\frac{d}{dt}\\left(0+-\\cos^{4}{t}\\right)}{\\frac{d}{dt}\\left(\\sin{t}+t\\cos^{6}{t}\\right)}', '\\lim_{a\\to3^{+}}\\frac{2}{a^{4}+-7a}', '\\lim_{s\\to8}\\frac{\\left(s-5\\right)\\left(s-4\\right)}{s-8}', '\\lim_{\\theta\\to0}\\frac{2}{\\pi^{9}\\left(\\sin^{8}{\\left(\\pi\\theta\\right)}+9\\tan^{2}{\\left(\\pi\\theta\\right)}\\right)}', '\\lim_{x\\to\\pi/6^{-}}5/5\\cos^{5}{x}\\left(2x+\\left(-5\\pi\\right)^{9}\\right)']
[INFO] Sample y_indices: [6651, 1569, 8530, 2843, 5141]


### Training MobileNet model


**Methods for defining, compiling and training the model**

In [None]:
def create_model(num_classes):
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

def train_model(model, X_train, y_train, epochs=10, batch_size=32):
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)


**Implementation**

In [16]:
# Create and train the model
model = create_model(num_classes)
train_model(model, X, y_one_hot)

# Save the model
model.save('models/aida_mobilenet_model.keras')

# Save the label mapping
with open('label_mapping.json', 'w') as f:
    json.dump(label_to_index, f)

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 2s/step - accuracy: 0.0000e+00 - loss: 9.3044 - val_accuracy: 0.0000e+00 - val_loss: 9.2343
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m484s[0m 2s/step - accuracy: 0.0306 - loss: 8.7639 - val_accuracy: 0.0000e+00 - val_loss: 10.5616
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 2s/step - accuracy: 0.4056 - loss: 4.5297 - val_accuracy: 0.0000e+00 - val_loss: 13.4030
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m475s[0m 2s/step - accuracy: 0.9510 - loss: 0.6906 - val_accuracy: 0.0000e+00 - val_loss: 15.1821
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m476s[0m 2s/step - accuracy: 0.9999 - loss: 0.0549 - val_accuracy: 0.0000e+00 - val_loss: 15.0671
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 2s/step - accuracy: 1.0000 - loss: 0.0180 - val_accuracy: 0.0000e+00 - val_los



### Evaluation

In [27]:
# FIXME: Change range to load the test data
BATCH_NUMS = range(2, 3)

# Prepare data
X_eval, y_eval, image_paths = prepare_data(BATCH_NUMS)

print()
print(f"[INFO] Shape of X: {X_eval.shape}")
print(f"[INFO] Length of y: {len(y_eval)}")

# Get unique labels and create a label-to-index mapping
unique_labels = list(set(y_eval))
print(f"[INFO] Number of unique labels: {len(unique_labels)}")
print(f"[INFO] Sample unique labels: {unique_labels[:5]}")  # Print first 5 unique labels

label_to_index = {label: index for index, label in enumerate(unique_labels)}

# Convert string labels to indices
y_eval_indices = [label_to_index[label] for label in y_eval]

print(f"[INFO] Sample y_indices: {y_eval_indices[:5]}")  # Print first 5 indices

# Convert to one-hot encoding
num_classes = len(unique_labels)
y_eval_one_hot = to_categorical(y_eval_indices, num_classes=num_classes)

[---] Processing batch 2
 [*] Loaded annotations 'JSON\kaggle_data_2.json'
 [---] Loading images from 'batch_2\background_images'


100%|██████████| 10000/10000 [02:31<00:00, 65.98it/s]


 [*] Loaded 10000 images.
 [*] Found 10000 images and 10000 annotations
 [*] Sample image filenames: ['00110449-640f-4e28-a238-f05e36278690.jpg', '001b0b6b-42c1-4516-86e6-367c4de17d6e.jpg', '002588d9-5f86-4b33-9038-4422e57a1c2a.jpg']
 [*] Sample annotation filenames: ['9f6d0918-cc25-4796-b01d-fbd71baf5fdf.jpg', '20cb4215-8c1b-4a7a-bbe9-c71f6fe73fa5.jpg', '51df991b-6a4e-444d-a460-874c68ece435.jpg']


10000it [00:00, 12078.37it/s]


  [>] Matched 10000 images with annotations
  [>] Unmatched 0 images
[*] Total processed images: 10000
[*] Total labels: 10000
[*] Sample labels: ['\\lim_{c\\to1^{+}}c^{\\sin{c}}', '\\lim_{t\\to\\infty}\\frac{\\log_{43}{t}}{\\log_{48}{8}}\\frac{\\log_{10}{7}}{\\log_{84}{t}}', '\\lim_{b\\to5^{-}}\\frac{b\\left(b-5\\right)}{\\left|b\\right|}', 'u\\lim_{h\\to1}\\frac{h^{1}+-5h}{h^{2}+-h+9}', '\\lim_{w\\to\\pi/2^{-}}\\frac{\\sin^{8}{w}\\left(2w+\\left(-7\\pi\\right)^{6}\\right)}{-2}']
[---] Preprocessing images


100%|██████████| 10000/10000 [02:32<00:00, 65.56it/s]


[*] Preprocessing images done.
[INFO] Shape of X: (10000, 224, 224, 3)
[INFO] Length of y: 10000
[INFO] Number of unique labels: 9998
[INFO] Sample unique labels: ['\\lim_{x\\to\\infty}\\frac{8}{e^{x}}', '\\lim_{v\\to\\infty}\\frac{\\log_{72}{v}}{\\log_{13}{6}}\\frac{\\log_{79}{2}}{\\log_{21}{v}}', '\\lim_{z\\to\\frac{\\pi}{3}^{-}}\\frac{\\sin^{2}{z}\\left(9z+\\left(-4\\pi\\right)^{3}\\right)}{-2}', '\\lim_{w\\to5^{+}}\\frac{\\frac{9}{w}}{-8\\cot{w}\\sin{w}}', '\\lim_{p\\to3}\\frac{e^{p}+-5\\sin{p}-2}{p^{3}+7p^{9}+11p^{6}}']
[INFO] Sample y_indices: [2612, 5852, 3077, 4481, 5768]


In [28]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluate on the evaluation set
def evaluate_model(model, X_test, y_test, label_to_index):
    predictions = model.predict(X_test)
    predicted_indices = np.argmax(predictions, axis=1)
    true_indices = np.argmax(y_test, axis=1)
    
    # Convert indices back to labels
    index_to_label = {index: label for label, index in label_to_index.items()}
    predicted_labels = [index_to_label[idx] for idx in predicted_indices]
    true_labels = [index_to_label[idx] for idx in true_indices]
    
    # Accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Classification report
    print(classification_report(true_labels, predicted_labels))


In [29]:
from sklearn.model_selection import train_test_split

# Split the data (e.g., 80% for training, 20% for evaluation)
# _, X_eval, _, y_eval = train_test_split(X, y_one_hot, test_size=0.05, random_state=0)

# Evaluate the model
evaluate_model(model, X_eval, y_eval_one_hot, label_to_index)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 385ms/step
Accuracy: 0.02%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                                                                                                            precision    recall  f1-score   support

                                                                                                                                                             -2\lim_{c\to-\infty}\ln{\left|c^{9}+6\right|}       0.00      0.00      0.00         1
                                                                                                                                                             -2\lim_{t\to-\infty}\ln{\left|t^{2}+0\right|}       0.00      0.00      0.00         1
                                                                                                                                                             -2\lim_{w\to-\infty}\ln{\left|w^{0}+0\right|}       0.00      0.00      0.00         1
                       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


---

**Compile the Model**

In [None]:
# from tensorflow.keras.optimizers import Adam
# model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


### Training the Model

**Prepare Data for Training**

Convert annotations to a format suitable for training (e.g., one-hot encoding for classification).

In [None]:
# from tensorflow.keras.utils import to_categorical

# # Assuming annotations contain class labels
# labels = [annotation['label'] for annotation in annotations]
# labels_one_hot = to_categorical(labels, num_classes=num_classes)


In [None]:
# train the model
# model.fit(processed_images, labels_one_hot, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# evaluate model
# model.evaluate(validation_images, validation_labels)