# Handwritten Number Recognition Model (CNN)

## Importing Libraries

In [247]:
import numpy as np
import os
import cv2
import csv
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import easyocr
reader = easyocr.Reader(['en'])

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


## Importing Dataset

In [3]:
(x_train, y_train) , (x_test, y_test) = mnist.load_data()

In [4]:
print(np.shape(x_train), np.shape(y_train))
print(np.shape(x_test), np.shape(y_test))

(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)


60000 samples of 28x28 px in train \
10000 samples of 28x28 px in test

## Reshaping

We reshape it to 28,28,1 because Conv2D expects color value

In [5]:
x_train, y_train = x_train.reshape(60000, 28,28,1) , y_train.reshape(60000, 1)
x_test, y_test = x_test.reshape(10000, 28,28,1) , y_test.reshape(10000, 1)

In [6]:
print(np.shape(x_train), np.shape(y_train))
print(np.shape(x_test), np.shape(y_test))

(60000, 28, 28, 1) (60000, 1)
(10000, 28, 28, 1) (10000, 1)


## Normalization

In [7]:
x_train, x_test = x_train/255.0 , x_test/255.0

## Building Model

In [11]:
model = Sequential()

In [12]:
model.add(Conv2D(32, (3,3), activation = 'relu', input_shape = (28,28,1)))
model.add(MaxPooling2D(2,2))

model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))

model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))

In [16]:
model.compile(loss = 'sparse_categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'])

## Training

In [17]:
model.fit(x = x_train , y = y_train, validation_split = 0.2, epochs = 5, batch_size = 10000)

Epoch 1/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.9969 - loss: 0.0103 - val_accuracy: 0.9915 - val_loss: 0.0336
Epoch 2/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9985 - loss: 0.0062 - val_accuracy: 0.9919 - val_loss: 0.0321
Epoch 3/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9988 - loss: 0.0045 - val_accuracy: 0.9921 - val_loss: 0.0330
Epoch 4/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9991 - loss: 0.0038 - val_accuracy: 0.9924 - val_loss: 0.0337
Epoch 5/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9993 - loss: 0.0028 - val_accuracy: 0.9916 - val_loss: 0.0349


<keras.src.callbacks.history.History at 0x23229683f20>

In [18]:
model.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9903 - loss: 0.0323


[0.02434762381017208, 0.9927999973297119]

# OCR/ CNN function

## CNN

In [19]:
def hand_prediction(img_obj):
    image = img_obj
    def remove_borders(image, border_size=5):
        # Crop out the borders by a fixed size
        return image[border_size:-border_size, border_size:-border_size]
    # Remove borders from the entire image
    image = remove_borders(image, border_size=5)

    image = cv2. cvtColor(image, cv2. COLOR_BGR2GRAY)

    # Apply binary thresholding to the image
    _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
    
    # Find contours in the thresholded image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Sort contours by their x position
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
    
    digits = []

    global num
    num = 0

    for ctr in contours:
        num = num*10
        # Get bounding box for each contour
        x, y, w, h = cv2.boundingRect(ctr)
        
        # Add padding to the bounding box
        padding = 5  # Adjust padding as needed
        x = max(x - padding, 0)
        y = max(y - padding, 0)
        w = min(w + 2 * padding, image.shape[1] - x)
        h = min(h + 2 * padding, image.shape[0] - y)
        
        # Extract the digit using the bounding box
        digit = thresh[y:y+h, x:x+w]
        
        # Resize the digit to 28x28 pixels
        resized_digit = cv2.resize(digit, (28, 28), interpolation=cv2.INTER_AREA)
        
        # Normalize the digit
        digit = resized_digit / 255.0
        num = num + np.argmax(model.predict(digit.reshape(1,28,28,1)))
    return num

## OCR

In [187]:
def ocr_prediction(img_obj):
    try:
        results = reader.readtext(img_obj)
        result_set = set()
        if len(results) > 1:
            for x in results[0][1]:
                result_set.add(x)
            for x in results[1][1]:
                result_set.add(x)
            attention = False
            if  len( (results[0][1])+(results[1][1]) ) - len(result_set) > 5:
                attention = True
        if results:
            return results
        else:
            return 'ERR'
    except Exception as e:
        return 'ERR'

# Extracting Data

In [21]:
#!pip install pymupdf 
import pymupdf
import io
import os
from PIL import Image as PILImage
from matplotlib import pyplot as plt

In [292]:
# from google.colab.patches import cv2_imshow

In [242]:
def read_pdf(pdf_path):
    pdf_document = pymupdf.open(pdf_path)
    global rows_written
    rows_written = 0

    if not os.path.exists('outputs'):
        os.makedirs('outputs')
    
    
    #Clearing any past file
    with open('outputs/output.csv', mode='w') as csv_file:
        csv_writer = csv.writer(csv_file)
    
    with open('outputs/attention.txt', mode='w') as txt_file:
        txt_write = txt_file.write('')
   
    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        # To image (using matrix transformation)
        zoom = 2  # Adjust zoom level as needed
        mat = pymupdf.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat, alpha=False)

        # pixmap to PIL Image
        img_bytes = pix.tobytes("ppm")
        image = PILImage.open(io.BytesIO(img_bytes))

        image = np.array(image)
        get_table(image, page_number)

In [94]:
def get_table(image_name, page_no):
    image = image_name
    
    ## Filters
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)
    
    
    ## Contour detection
    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]
    table_contour = None
    for contour in contours:
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
    
        if len(approx) == 4:
            table_contour = approx
            break
    
    if table_contour is not None:
        # Get bounding box coordinates
        x, y, w, h = cv2.boundingRect(table_contour)
    
        # Crop the table region from the original image
        table_image = image[y:y+h, x:x+w]
        num_rows = 0
        extract_cells_to_csv(table_image, page_no)
    else:
        print("error")


In [240]:
def extract_cells_to_csv(table_image,  page_no, csv_filename='outputs/output.csv'):
    image = table_image
    
    # Determine number of rows using page height
    # height = 181 + 59*rows
    height = len(image)
    num_rows = round((height - 181) / 59)
    global rows_written
    
    # Parameters (adjust these values as needed)
    title_offset = 106 + (num_rows * 2)  # Offset to skip the title
    header_row_height = 75  # Height of the header row
    row_height = 57  # Height of each subsequent row
    
    # Column widths (adjust these values according to your table)
    col_names = ["SNo.","ID","NAME","DOB","INTERVIEW MARKS"]
    column_widths = [67, 73, 392 + (num_rows * 2), 175, 380]
    
    # Function to get the column boundaries
    def get_column_boundaries(column_widths):
        boundaries = [0]
        for width in column_widths:
            boundaries.append(boundaries[-1] + width)
        return boundaries
    
    column_boundaries = get_column_boundaries(column_widths)
    
    # Open a CSV file to write the results
    with open(csv_filename, mode='a', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)
        
        # Extract header row
        header_row = image[title_offset:title_offset + header_row_height, :]
        header_cells = []
        
        # Process each cell of the header row
        if page_no == 0:
            for col in range(0,5):
                cell = header_row[:, column_boundaries[col]:column_boundaries[col + 1]]
                cell_image = np.array(cell)
                header_cells.append(ocr_prediction(cell_image)[0][1])
            
            # Write the header row to the CSV
            csv_writer.writerow(header_cells)
        
        # Extract subsequent rows
        for row in range(1, num_rows + 1):
            row_start = title_offset + header_row_height + (row - 1) * row_height
            row_end = row_start + row_height
            table_row = image[row_start:row_end, :]
            row_cells = []
            
            # Process each cell of the row
            row_cells.append(f'{row + rows_written}')
            for col in range(1,5):
                cell = table_row[:, column_boundaries[col]:column_boundaries[col + 1]]
                cell_image = np.array(cell)
                if col < 4:
                   row_cells.append(ocr_prediction(cell_image)[0][1])
                   if len(ocr_prediction(cell_image)) > 1:
                       txt_file = open("outputs/attention.txt", "a", newline="\n")
                       txt_file.write(f"Attention needed at: SNo. {rows_written + row} , {col_names[col]} \n")
                if col == 4:
                    row_cells.append(hand_prediction(cell_image))
            
            # Write the row to the CSV
            csv_writer.writerow(row_cells)
            
    print(f"Written {num_rows} row(s) to '{csv_filename}'.")
    rows_written += num_rows

In [248]:
read_pdf("demo1.pdf")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Written 6 row(s) to 'outputs/output.csv'.
[1m1/1[0m [32m━━━━━