<a href="https://colab.research.google.com/github/mayanksreal/marks-evaluation/blob/main/copy_of_notebook_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Handwritten Number Recognition Model (CNN)

## Importing Libraries

In [None]:
!pip install easyocr
!pip install pymupdf
import io
import PIL
import numpy as np
import os
import cv2
import csv

from tensorflow.keras.preprocessing import image #type:ignore
from tensorflow.keras.preprocessing.image import ImageDataGenerator #type:ignore
from tensorflow.keras.datasets import mnist #type:ignore
from tensorflow.keras.models import Sequential #type:ignore
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input #type:ignore
from tensorflow.keras.callbacks import EarlyStopping #type:ignore
from tensorflow.keras.models import load_model #type:ignore

from google.colab.patches import cv2_imshow #type:ignore
import easyocr
reader = easyocr.Reader(['en'])
import pymupdf

Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl.metadata (11 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->easyocr)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->easyocr)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->easyocr)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB



Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% Complete

## Importing Dataset

In [None]:
(x_train, y_train) , (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print(np.shape(x_train), np.shape(y_train))
print(np.shape(x_test), np.shape(y_test))

(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)


60000 samples of 28x28 px in train \
10000 samples of 28x28 px in test

## Reshaping

We reshape it to 28,28,1 because Conv2D expects color value

In [None]:
x_train, y_train = x_train.reshape(60000, 28,28,1) , y_train.reshape(60000, 1)
x_test, y_test = x_test.reshape(10000, 28,28,1) , y_test.reshape(10000, 1)

In [None]:
print(np.shape(x_train), np.shape(y_train))
print(np.shape(x_test), np.shape(y_test))

(60000, 28, 28, 1) (60000, 1)
(10000, 28, 28, 1) (10000, 1)


## Normalization

In [None]:
x_train, x_test = x_train/255.0 , x_test/255.0

## Building Model / Train

In [None]:
try:
    model = load_model('models/model.keras')
except:

    model = Sequential()

    model.add(Input(shape=(28,28,1)))
    model.add(Conv2D(32, (3,3), activation = 'relu'))
    model.add(MaxPooling2D(2,2))

    model.add(Conv2D(64, (3,3), activation = 'relu'))
    model.add(MaxPooling2D(2,2))

    model.add(Flatten())
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(10, activation = 'softmax'))

    model.compile(loss = 'sparse_categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'])
    early = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model.fit(x = x_train , y = y_train, validation_split = 0.2, epochs = 20, batch_size = 1000, callbacks = [early])
    model.save('models/model.keras')

Epoch 1/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 1s/step - accuracy: 0.5985 - loss: 1.4306 - val_accuracy: 0.9299 - val_loss: 0.2481
Epoch 2/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 866ms/step - accuracy: 0.9368 - loss: 0.2213 - val_accuracy: 0.9648 - val_loss: 0.1258
Epoch 3/20
[1m39/48[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m7s[0m 814ms/step - accuracy: 0.9640 - loss: 0.1235

KeyboardInterrupt: 

In [None]:
model.evaluate(x_test, y_test)

# OCR/ CNN function

## CNN

In [None]:
def hand_prediction(img_obj):
    image = img_obj
    def remove_borders(image, border_size=5):
        # Crop out the borders by a fixed size
        return image[border_size:-border_size, border_size:-border_size]
    # Remove borders from the entire image
    image = remove_borders(image, border_size=5)

    image = cv2.cvtColor(image, cv2. COLOR_BGR2GRAY)

    # Apply binary thresholding to the image
    _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)

    # Find contours in the thresholded image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort contours by their x position
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])

    digits = []

    global num
    global probability
    global n
    num = 0

    for ctr in contours:
        num = num*10
        # Get bounding box for each contour
        x, y, w, h = cv2.boundingRect(ctr)

        # Add padding to the bounding box
        padding = 5  # Adjust padding as needed
        x = max(x - padding, 0)
        y = max(y - padding, 0)
        w = min(w + 2 * padding, image.shape[1] - x)
        h = min(h + 2 * padding, image.shape[0] - y)

        # Extract the digit using the bounding box
        digit = thresh[y:y+h, x:x+w]
        # Resizing to fit model input
        resized_digit = cv2.resize(digit, (28, 28), interpolation=cv2.INTER_AREA)
        # Normalization
        digit = resized_digit / 255.0
        r = model.predict(digit.reshape(1,28,28,1))

        if np.max(r) < 0.95:

            cv2_imshow(np.reshape((digit*255), (28,28,1)))
            print(f"Predicted:{np.argmax(r)} ({round(np.max(r)*100,2)})")
            print("Actual:")
            n = int(input())
            digit = digit.reshape(1,28,28,1)
            model.fit(digit, np.array([[n]]))
            model.save('models/model.keras')
            num = num + n
        else:
            num = num + np.argmax(r)
            print(np.max(r))


    return [num]

## OCR

In [None]:
def ocr_prediction(img_obj):
    try:
        results = reader.readtext(img_obj)
        result_set = set()
        if len(results) > 1:
            for x in results[0][1]:
                result_set.add(x)
            for x in results[1][1]:
                result_set.add(x)

        if results:
            return results
        else:
            return 'ERR'
    except Exception as e:
        return 'ERR'

## ALL PAGES
Now for all pages, we cascadingly call all functions, in a for loop


In [None]:
def read_pdf(pdf_path):
    pdf_document = pymupdf.open(pdf_path)
    global rows_written
    rows_written = 0

    if not os.path.exists('outputs'):
        os.makedirs('outputs')


    #Clearing any past file
    with open('outputs/output.csv', mode='w') as csv_file:
        csv_writer = csv.writer(csv_file)

    with open('outputs/attention.txt', mode='w') as txt_file:
        txt_write = txt_file.write('')

    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        # To image (using matrix transformation)
        zoom = 2  # Adjust zoom level as needed
        mat = pymupdf.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat, alpha=False)

        # pixmap to PIL Image
        img_bytes = pix.tobytes("ppm")
        image = PIL.Image.open(io.BytesIO(img_bytes))

        image = np.array(image)

        #return image

        #instead lets read all pages

        table = get_table(image, page_number)

In [None]:
def get_table(image, page_number):
    ## Filters
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)


    ## Contour detection
    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]
    table_contour = None
    for contour in contours:
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)

        if len(approx) == 4:
            table_contour = approx
            break

    if table_contour is not None:
        # Get bounding box coordinates
        x, y, w, h = cv2.boundingRect(table_contour)

        # Crop the table region from the original image
        table_image = image[y:y+h, x:x+w]
        num_rows = 0

        #return table_image

        extract_cells_to_csv(table_image, page_number)

    else:
        print("error")

In [None]:
def extract_cells_to_csv(image, page_number, csv_filename='outputs/output.csv'):

    # Determine number of rows using page height
    # height = 181 + 59*rows
    height = len(image)
    num_rows = round((height - 181) / 59)
    global rows_written

    # Parameters (adjust these values as needed)
    title_offset = 106 + (num_rows * 2)  # Offset to skip the title
    header_row_height = 75  # Height of the header row
    row_height = 57  # Height of each subsequent row

    # Column widths (adjust these values according to your table)
    col_names = ["SNo.","ID","NAME","DOB","INTERVIEW MARKS"]
    column_widths = [67, 73, 392 + (num_rows * 2), 175, 380]

    # Function to get the column boundaries
    def get_column_boundaries(column_widths):
        boundaries = [0]
        for width in column_widths:
            boundaries.append(boundaries[-1] + width)
        return boundaries

    column_boundaries = get_column_boundaries(column_widths)

    # Open a CSV file to write the results
    with open(csv_filename, mode='a', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)

        if page_number == 0:
          # Extract header row
          header_row = image[title_offset:title_offset + header_row_height, :]
          header_cells = []

          # Process each cell of the header row
          for col in range(0,5):
              cell = header_row[:, column_boundaries[col]:column_boundaries[col + 1]]
              cell_image = np.array(cell)
              header_cells.append(ocr_prediction(cell_image)[0][1])

          # Write the header row to the CSV
          csv_writer.writerow(header_cells)

        # Extract subsequent rows
        for row in range(1, num_rows + 1):
            row_start = title_offset + header_row_height + (row - 1) * row_height
            row_end = row_start + row_height
            table_row = image[row_start:row_end, :]
            row_cells = []

            # Process each cell of the row
            row_cells.append(f'{row + rows_written}')
            for col in range(1,5):
                cell = table_row[:, column_boundaries[col]:column_boundaries[col + 1]]
                cell_image = np.array(cell)
                if col < 4:

                   if len(ocr_prediction(cell_image)) > 1:
                       cv2_imshow(cell_image)
                       print(f"PREDICTION:{ocr_prediction(cell_image)[0][1]}")
                       print("ACTUAL:")
                       n = str(input())
                       row_cells.append(n)
                   else:
                      row_cells.append(ocr_prediction(cell_image)[0][1])

                if col == 4:
                    r = hand_prediction(cell_image)
                    row_cells.append(r[0])

            # Write the row to the CSV
            csv_writer.writerow(row_cells)

    print(f"Written {num_rows} row(s) to '{csv_filename}'.")
    rows_written += num_rows

ADDING PROBABILITY BASED WARNING FOR CNN

In [None]:
read_pdf("data/pdf-marked-demo.pdf")