# Creating OCR Model from Scratch

## Imports

### Libraries & Packages

In [1]:
import cv2
import matplotlib.pyplot as plt 

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [2]:
from tensorflow.python.client import device_lib

In [3]:
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())

['/device:CPU:0']


In [8]:
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [7]:
print("Available devices:", tf.config.experimental.list_physical_devices())

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


### Data

In [2]:
print("Available devices:", tf.config.experimental.list_physical_devices())

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


### Functions

#### Display image

In [None]:
def display_image(image_path):
    
    #Read image from path    
    image = cv2.imread(image_path)
    
    # Convert image from BGR (OpenCV format) to RGB (matplotlib format)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

#### Preprocess Image

In [None]:
def preprocess_image(image_path, width=128, height=128):
    # Read image from path
    image = cv2.imread(image_path)
    
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Resize
    resized_image = cv2.resize(gray_image, (width, height))
    
    # Reduce noise
    denoised_image = cv2.fastNlMeansDenoising(resized_image, None, 30, 7, 21)
    
    # Normalize the image
    normalized_image = denoised_image.astype('float32') / 255.0
    
    # Expand dimensions to match the expected input shape of the model
    preprocessed_image = np.expand_dims(normalized_image, axis=-1)
    
    return preprocessed_image

#### Creating the model

In [None]:
def create_ocr_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(26, activation='softmax'))  # For example, for alphabet letters
    return model

#### Compile the model

In [None]:
model = create_ocr_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Data Preparation

In [None]:
import numpy as np

def preprocess_data(images, labels):
    # Normalize the images
    images = images.astype('float32') / 255.0
    # One-hot encode the labels
    labels = tf.keras.utils.to_categorical(labels, num_classes=26)
    return images, labels

# Example usage
X_train, y_train = preprocess_data(X_train, y_train)
X_val, y_val = preprocess_data(X_val, y_val)

#### Training the model

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

#### Model Evaluation

In [None]:
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {accuracy}")