In [30]:
import tensorflow as tf
import cv2
import numpy as np
import pytesseract
import os
import pickle
from PIL import Image

In [2]:
img_height = 135
img_width = 155
batch_size = 32

In [81]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  'data/train',
  image_size=(img_height, img_width),
  batch_size=batch_size)
class_names = train_ds.class_names
print(class_names)

Found 7557 files belonging to 16 classes.
['decimal', 'div', 'eight', 'equal', 'five', 'four', 'minus', 'multiply', 'nine', 'one', 'plus', 'seven', 'six', 'three', 'two', 'zero']


In [4]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  'data/eval', 
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 1010 files belonging to 16 classes.


In [5]:
from tensorflow import keras
from tensorflow.keras import layers

In [6]:
num_classes = 16

In [7]:
model = keras.Sequential([
  keras.Input(shape=(img_height, img_width, 3)), 
  
  layers.Rescaling(1./255),

  # First convolutional block
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),

  # Second convolutional block
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),

  # Third convolutional block
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),

  # Flatten the results to feed into a Dense layer
  layers.Flatten(),

  # A standard fully-connected layer
  layers.Dense(128, activation='relu'),

  # Output layer
  layers.Dense(num_classes)
])

In [8]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [9]:
epochs = 10

history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

Epoch 1/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 157ms/step - accuracy: 0.6496 - loss: 1.1734 - val_accuracy: 0.8327 - val_loss: 0.5932
Epoch 2/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 154ms/step - accuracy: 0.9067 - loss: 0.3114 - val_accuracy: 0.8960 - val_loss: 0.3595
Epoch 3/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 151ms/step - accuracy: 0.9559 - loss: 0.1502 - val_accuracy: 0.8980 - val_loss: 0.3864
Epoch 4/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 153ms/step - accuracy: 0.9735 - loss: 0.0784 - val_accuracy: 0.9149 - val_loss: 0.3298
Epoch 5/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 156ms/step - accuracy: 0.9845 - loss: 0.0527 - val_accuracy: 0.9059 - val_loss: 0.3854
Epoch 6/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 153ms/step - accuracy: 0.9847 - loss: 0.0435 - val_accuracy: 0.9228 - val_loss: 0.4410
Epoch 7/10

In [10]:
model.save('model.h5')



In [192]:
# -----------------------------
image_path = "input_sample.png"
output_folder = "characters"
os.makedirs(output_folder, exist_ok=True)
resize_dim = (img_width, img_height)

# -----------------------------
# Step 1: Load & preprocess
# -----------------------------
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
    raise ValueError("Image not found. Check path.")

_, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Remove small noise
kernel = np.ones((2,2), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# -----------------------------
# Step 2: Horizontal dilation
# -----------------------------
# Connect vertically separated parts like ÷ or +, so they don't split
hor_kernel = np.ones((1, 5), np.uint8)
thresh_hor = cv2.dilate(thresh, hor_kernel, iterations=1)

# -----------------------------
# Step 3: Find vertical cuts (only between digits)
# -----------------------------
vertical_sum = np.sum(thresh_hor, axis=0)  # sum along rows
threshold = 5  # adjust depending on image

cuts = []
start = None
for i, val in enumerate(vertical_sum):
    if val > threshold and start is None:
        start = i
    elif val <= threshold and start is not None:
        end = i
        if end - start > 2:
            cuts.append((start, end))
        start = None
if start is not None:
    cuts.append((start, len(vertical_sum)))

# -----------------------------
# Step 4: Crop & save each character
# -----------------------------
char_count = 0
for (x_start, x_end) in cuts:
    char_img = thresh[:, x_start:x_end]
    char_img = cv2.bitwise_not(char_img)
    
    # Maintain aspect ratio with padding
    h, w = char_img.shape
    
    # Create a square canvas with padding
    max_dim = max(h, w)
    square_img = np.ones((max_dim, max_dim), dtype=np.uint8) * 255  # white background
    
    # Center the character
    y_offset = (max_dim - h) // 2
    x_offset = (max_dim - w) // 2
    square_img[y_offset:y_offset+h, x_offset:x_offset+w] = char_img
    
    # Now resize the square to model dimensions
    char_img_resized = cv2.resize(square_img, (img_width, img_height))
    char_img_rgb = cv2.cvtColor(char_img_resized, cv2.COLOR_GRAY2RGB)
    
    char_count += 1
    cv2.imwrite(f"{output_folder}/char_{char_count}.png", char_img_rgb)

In [194]:
# Load the trained model
model = tf.keras.models.load_model('model.h5')

# Process each saved character
predictions = []
for i in range(1, char_count + 1):
    # Load image
    img = tf.keras.utils.load_img(
        f"{output_folder}/char_{i}.png",
        target_size=(img_height, img_width),
        color_mode='rgb'
    )
    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)  # Create batch
    
    # Predict
    prediction = model.predict(img_array)
    predicted_class = class_names[np.argmax(prediction)]
    confidence = tf.nn.softmax(prediction[0])[np.argmax(prediction)]
    
    predictions.append(predicted_class)
    print(f"Character {i}: {predicted_class} (confidence: {confidence:.2f})")

symbol_map = {
    'five': '5', 'div': '÷', 'three': '3', 
    'plus': '+', 'minus': '-', 'multiply': '×',
    'equal': '=', 'zero': '0', 'one': '1',
    'two': '2', 'four': '4', 'six': '6',
    'seven': '7', 'eight': '8', 'nine': '9',
    'decimal': '.'
}

result = ' '.join([symbol_map.get(p, p) for p in predictions])
print(f"\nExpression: {result}")  



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
Character 1: nine (confidence: 1.00)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Character 2: multiply (confidence: 1.00)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Character 3: eight (confidence: 1.00)

Expression: 9 × 8
