In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from tensorflow.keras import Input, Model
import os

2024-02-11 19:20:08.206102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-11 19:20:08.257563: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [123]:
import cv2
from matplotlib import pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [65]:
def get_character_bounding_boxes(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    output = cv2.connectedComponentsWithStats(
        thresh, 8, cv2.CV_32S)
    (numLabels, labels, stats, centroids) = output
    new_stats = []
    added = [False for i in range(len(stats))]
    threshhold = .5
    for i in range(1,len(stats)):
        if added[i]:
            continue

        (xi, yi, wi, hi, ai) = stats[i]
        for j  in range(i+1,len(stats)):
            if added[j]:
                continue

            (xj, yj, wj, hj, aj) = stats[j]
            if xi<=xj:
                if xi+wi>=xj+wj:
                    hi = max(yi+hi,yj+hj)
                    yi = min(yi,yj)
                    hi = hi-yi
                    ai += aj
                    added[j] = True
                elif xi+wi>=xj:
                    o = xi+wi-xj
                    if o/wi>threshhold or o/wj>threshhold:
                        wi = xj+wj-xi
                        hi = max(yi+hi,yj+hj)
                        yi = min(yi,yj)
                        hi = hi-yi
                        ai += aj
                        added[j] = True

            if xj<xi:
                if xj+wj>=xi+wi:
                    xi = xj
                    wi = wj
                    hi = max(yi+hi,yj+hj)
                    yi = min(yi,yj)
                    hi = hi-yi
                    ai += aj
                    added[j] = True
                elif xj+wj>=xi:
                    o = xj+wj-xi
                    if o/wi>threshhold or o/wj>threshhold:
                        wi = xj+wj-xi
                        hi = max(yi+hi,yj+hj)
                        yi = min(yi,yj)
                        hi = hi-yi
                        ai += aj
                        added[j] = True

        new_stats.append([xi, yi, wi, hi, ai])
        added[i] = True

    new_stats = sorted(new_stats, key=lambda x: x[0])
    return new_stats

def save_symbol_as_image(location, name, img, bounding_box):
    (x, y, w, h, a) = bounding_box
    cropped = img[y:y+h, x:x+w]
    cropped = cv2.resize(cropped, (28, 28))
    cv2.imwrite(location + '/' + name + '.png', cropped)
    return cropped

In [79]:
def binarize_image(img, threshold=127):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding with background-foreground inversion
    binary_img = cv2.threshold(img, threshold, 1, cv2.THRESH_BINARY_INV)[1]

    return binary_img

In [167]:
def extract_and_split(dir_path):
    Path("./data/fine-tune processed").mkdir(parents=True, exist_ok=True)
    x_train = []
    y_train = []
    x_test = []
    y_test = []
    
    for root, directories, files in os.walk(dir_path):
        for subfolder in directories:
            class_name = subfolder
            if '.ipynb_checkpoints' in class_name:
                continue
            class_folder_location = f"./data/fine-tune processed/{class_name}"
            Path(class_folder_location).mkdir(parents=True, exist_ok=True)
            count = 0
            processed_img = []
            
            for img_file in os.listdir(os.path.join(root, subfolder)):
                if '.ipynb_checkpoints' in img_file or 'image-checkpoint' in img_file:
                    continue
                img_path = os.path.join(root, subfolder, img_file)
                im = cv2.imread(img_path)
                print(img_path)
                boxes = get_character_bounding_boxes(im)
                
                for box in boxes:
                    symbol_img = save_symbol_as_image(class_folder_location, class_name + '_' + str(count), im, box)
                    symbol_img = binarize_image(symbol_img)
                    processed_img.append(symbol_img)
                    count += 1

            train_imgs, test_imgs = train_test_split(processed_img, train_size=0.8)
            x_train += train_imgs
            y_train += [class_name for _ in range(len(train_imgs))]
            x_test += test_imgs
            y_test += [class_name for _ in range(len(test_imgs))]
                    
    return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)

In [168]:
x_train, y_train, x_test, y_test = extract_and_split('./fine-tune-dataset')

./data/fine-tune unprocessed/6/image5.jpg
./data/fine-tune unprocessed/6/image6.jpg
./data/fine-tune unprocessed/6/image3.jpg
./data/fine-tune unprocessed/6/image2.jpg
./data/fine-tune unprocessed/6/image0.jpg
./data/fine-tune unprocessed/6/image7.jpg
./data/fine-tune unprocessed/6/image4.jpg
./data/fine-tune unprocessed/6/image9.jpg
./data/fine-tune unprocessed/6/image1.jpg
./data/fine-tune unprocessed/6/image8.jpg
./data/fine-tune unprocessed/+/image5.jpg
./data/fine-tune unprocessed/+/image6.jpg
./data/fine-tune unprocessed/+/image3.jpg
./data/fine-tune unprocessed/+/image2.jpg
./data/fine-tune unprocessed/+/image0.jpg
./data/fine-tune unprocessed/+/image7.jpg
./data/fine-tune unprocessed/+/image4.jpg
./data/fine-tune unprocessed/+/image9.jpg
./data/fine-tune unprocessed/+/image1.jpg
./data/fine-tune unprocessed/+/image8.jpg
./data/fine-tune unprocessed/-/image5.jpg
./data/fine-tune unprocessed/-/image6.jpg
./data/fine-tune unprocessed/-/image3.jpg
./data/fine-tune unprocessed/-/ima

In [169]:
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(2560, 28, 28, 1) (642, 28, 28, 1)
(2560,) (642,)


In [191]:
mapping = {'%': 0, '*': 1, '+': 2, '-': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '[': 14, ']': 15}
reverse_mapping = {0: '%', 1: '*', 2: '+', 3: '-', 4: '0', 5: '1', 6: '2', 7: '3', 8: '4', 9: '5', 10: '6', 11: '7', 12: '8', 13: '9', 14: '[', 15: ']'}

In [171]:
def custom_transform(x):
    transformed = [mapping[y] for y in x]
    return np.array(transformed)

In [172]:
y_train = custom_transform(y_train)
y_test = custom_transform(y_test)

In [173]:
y_train.shape

(2560,)

In [174]:
MATH_best = load_model('MATH.h5')

In [175]:
MATH_best.trainable = True

In [176]:
print("Number of layers in the base model: ", len(MATH_best.layers))

Number of layers in the base model:  9


In [177]:
fine_tune_at = 4
for layer in MATH_best.layers[:fine_tune_at]:
    layer.trainable = False

In [178]:
MATH_best.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
MATH_best.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 20)        520       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 20)        10020     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 20)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 14, 14, 50)        25050     
                                                                 
 conv2d_3 (Conv2D)           (None, 14, 14, 50)        62550     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 50)         0         
 2D)                                                    

In [179]:
hist = MATH_best.fit(x_train, y_train, epochs=20, validation_split=0.15, verbose=2, batch_size=256)

Epoch 1/20
9/9 - 2s - loss: 0.7839 - accuracy: 0.9347 - val_loss: 13.3131 - val_accuracy: 0.2786 - 2s/epoch - 246ms/step
Epoch 2/20
9/9 - 1s - loss: 0.0248 - accuracy: 0.9954 - val_loss: 16.8011 - val_accuracy: 0.1719 - 1s/epoch - 147ms/step
Epoch 3/20
9/9 - 2s - loss: 0.0059 - accuracy: 0.9982 - val_loss: 17.6239 - val_accuracy: 0.1719 - 2s/epoch - 168ms/step
Epoch 4/20
9/9 - 1s - loss: 0.0019 - accuracy: 0.9991 - val_loss: 17.7358 - val_accuracy: 0.1589 - 1s/epoch - 152ms/step
Epoch 5/20
9/9 - 1s - loss: 2.0818e-04 - accuracy: 1.0000 - val_loss: 17.8214 - val_accuracy: 0.1641 - 1s/epoch - 140ms/step
Epoch 6/20
9/9 - 1s - loss: 1.2476e-04 - accuracy: 1.0000 - val_loss: 17.8524 - val_accuracy: 0.1667 - 1s/epoch - 141ms/step
Epoch 7/20
9/9 - 1s - loss: 7.8355e-05 - accuracy: 1.0000 - val_loss: 17.8633 - val_accuracy: 0.1667 - 1s/epoch - 146ms/step
Epoch 8/20
9/9 - 1s - loss: 5.7267e-05 - accuracy: 1.0000 - val_loss: 17.8536 - val_accuracy: 0.1667 - 1s/epoch - 150ms/step
Epoch 9/20
9/9 -

In [180]:
loss_train,train_acc = MATH_best.evaluate(x_train, y_train)
loss_test,test_acc   = MATH_best.evaluate(x_test, y_test)
print('train acc', train_acc)
print('test acc', test_acc)

train acc 0.8746093511581421
test acc 0.8722741603851318


In [181]:
before_fine_tuning = load_model('MATH.h5')

In [182]:
loss_train,train_acc = before_fine_tuning.evaluate(x_train, y_train)
loss_test,test_acc   = before_fine_tuning.evaluate(x_test, y_test)
print('train acc', train_acc)
print('test acc', test_acc)

train acc 0.6968749761581421
test acc 0.6853582262992859


In [183]:
MATH_best.save("fine_tuned_model.keras")

In [184]:
Path('./data/fine-tune-complete').mkdir(parents=True, exist_ok=True)
with open('./data/fine-tune-complete/x_train.npy', 'wb+') as f:
    np.save(f, x_train)
with open('./data/fine-tune-complete/y_train.npy', 'wb+') as f:
    np.save(f, y_train)
with open('./data/fine-tune-complete/x_test.npy', 'wb+') as f:
    np.save(f, x_test)
with open('./data/fine-tune-complete/y_test.npy', 'wb+') as f:
    np.save(f, y_test)

In [201]:
MATH_best = load_model('./fine_tuned_model.keras')

In [202]:
images_dir = 'handwritten-full-test'
images = [f.path for f in os.scandir(images_dir)]
images

['handwritten-full-test/.ipynb_checkpoints',
 'handwritten-full-test/x+9.jpg',
 'handwritten-full-test/6_div_3.jpg',
 'handwritten-full-test/1+1=.jpg',
 'handwritten-full-test/8+4.jpg',
 'handwritten-full-test/1+1.jpg',
 'handwritten-full-test/3.3.jpg',
 'handwritten-full-test/2x2.jpg']

In [212]:
img = images[5]
img = cv2.imread(img)
bbs = get_character_bounding_boxes(img)

In [213]:
def format_input_img(img, bb):
    (x, y, w, h, a) = bb
    cropped = img[y:y+h, x:x+w]
    cropped = cv2.resize(cropped, (28, 28))
    return binarize_image(cropped)

In [214]:
count = 0
for bb in bbs:
    char = format_input_img(img, bb)
    char = char.reshape(-1, 28, 28, 1) 
    yh_test = MATH_best.predict(char).argmax(axis=1)
    print(reverse_mapping[yh_test[0]])

1
+
1
