
# Human Reinforcement Learning Model

This notebook implements a human reinforcement learning model that:
1. Loads pre-trained weights from the previous file.
2. Prompts the user to input characters `0-9`, `a-z`, and `A-Z`.
3. Requests the user to provide input again, prioritizing characters in the order of lowest to highest accuracy, as determined by the earlier model.


In [2]:
import os
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from collections import defaultdict
import tkinter as tk
from PIL import ImageGrab, Image


In [3]:
# Enhanced CNN
class EnhancedCNNModel(nn.Module):
    def __init__(self, num_classes):
        super(EnhancedCNNModel, self).__init__()

        self.num_classes = num_classes

        # Convolutional blocks
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # Output: 32x320x240
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 32x160x120

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # Output: 64x160x120
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 64x80x60

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # Output: 128x80x60
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 128x40x30

        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # Output: 256x40x30
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 256x20x15

        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, padding=1)  # Output: 512x20x15
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 512x10x7

        # Fully connected layers are initialized dynamically
        self.fc1 = None
        self.fc2 = None
        self.fc3 = None

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def initialize_fc_layers(self, input_shape):
        """Initialize the fully connected layers dynamically based on the input shape."""
        with torch.no_grad():
            dummy_input = torch.zeros(1, *input_shape)
            x = self.pool1(self.bn1(self.conv1(dummy_input)))
            x = self.pool2(self.conv2(x))
            x = self.pool3(self.conv3(x))
            x = self.pool4(self.conv4(x))
            x = self.pool5(self.conv5(x))
            flattened_size = x.numel()  # Total elements after flattening

        # Dynamically initialize fully connected layers
        self.fc1 = nn.Linear(flattened_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, self.num_classes)

        # Print the initialized sizes for debugging
        print(f"Initialized fc1 with input size {flattened_size} and output size 512")
        print(f"Initialized fc2 with input size 512 and output size 256")
        print(f"Initialized fc3 with input size 256 and output size {self.num_classes}")

    def forward(self, x):
        # Convolutional blocks
        x = self.pool1(self.bn1(self.conv1(x)))
        x = self.pool2(self.conv2(x))
        x = self.pool3(self.conv3(x))
        x = self.pool4(self.conv4(x))
        x = self.pool5(self.conv5(x))

        # Flatten and pass through fully connected layers
        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)

        return x

In [4]:
# Instantiate the model and load the weights
num_classes = 62  # 10 digits + 26 lowercase + 26 uppercase
model = EnhancedCNNModel(num_classes=num_classes)

# Initialize the fully connected layers
input_shape = (1, 160, 120)  # Example input shape, adjust as necessary
model.initialize_fc_layers(input_shape)

weights_path = './weights/cnn_weights.pth'
model.load_state_dict(torch.load(weights_path))
model.eval()

print("Model loaded successfully!")

Initialized fc1 with input size 7680 and output size 512
Initialized fc2 with input size 512 and output size 256
Initialized fc3 with input size 256 and output size 62


  model.load_state_dict(torch.load(weights_path))


Model loaded successfully!


In [None]:
def draw_letter_prompt(letter, save_folder="user_drawings"):
    """Create a drawing interface for the user to draw a given letter and save the image."""
    # Ensure the save folder exists
    os.makedirs(save_folder, exist_ok=True)

    # Create the main application window
    root = tk.Tk()
    root.title(f"Draw the Letter: {letter}")

    # Set the window position
    window_x, window_y = 100, 100  # Adjust these values as needed
    root.geometry(f"+{window_x}+{window_y}")

    # Canvas for drawing
    canvas_width, canvas_height = 320, 240
    canvas = tk.Canvas(root, width=canvas_width, height=canvas_height, bg="white")
    canvas.pack()

    # Variables to store drawing state
    drawing = False
    last_x, last_y = None, None
    drawn_image = None  # To store the drawn image

    def start_draw(event):
        """Start drawing."""
        nonlocal drawing, last_x, last_y
        drawing = True
        last_x, last_y = event.x, event.y

    def draw(event):
        """Draw lines on the canvas."""
        nonlocal drawing, last_x, last_y
        if drawing:
            canvas.create_line(last_x, last_y, event.x, event.y, width=5, fill="black")
            last_x, last_y = event.x, event.y

    def stop_draw(event):
        """Stop drawing."""
        nonlocal drawing
        drawing = False

    def clear_canvas():
        """Clear the canvas."""
        canvas.delete("all")

    def save_drawing(file_name=None):
        """Save the drawn image."""
        nonlocal drawn_image
        x = root.winfo_rootx() + canvas.winfo_x()
        y = root.winfo_rooty() + canvas.winfo_y()
        x1 = x + canvas.winfo_width()
        y1 = y + canvas.winfo_height()
        # Capture the canvas area as an image
        image = ImageGrab.grab((x, y, x1, y1)).convert("L").resize((160, 120))
        # Convert image to a NumPy array (normalized)
        drawn_image = np.array(image) / 255.0
        # Determine the image file name
        image_file_name = f"{file_name if file_name else letter}.png"
        # Save the image to the specified folder
        image_path = os.path.join(save_folder, image_file_name)
        ImageGrab.grab((x, y, x1, y1)).convert("L").save(image_path)
        print(f"Saved image for letter '{letter}' at {image_path}")
        root.destroy()  # Close the window

    # Bind mouse events
    canvas.bind("<ButtonPress-1>", start_draw)
    canvas.bind("<B1-Motion>", draw)
    canvas.bind("<ButtonRelease-1>", stop_draw)

    # Add buttons
    button_frame = tk.Frame(root)
    button_frame.pack()

    clear_button = tk.Button(button_frame, text="Clear", command=clear_canvas)
    clear_button.pack(side="left", padx=10)

    submit_button = tk.Button(button_frame, text="Submit", command=save_drawing)
    submit_button.pack(side="left", padx=10)

    # Run the Tkinter main loop
    root.mainloop()

    # Ensure the drawn image is returned
    return drawn_image

In [None]:
import csv

def collect_or_load_images(characters, save_folder="user_drawings"):
    """
    Collect user input for characters if not saved, otherwise load existing images.
    """
    # Ensure the save folder exists
    os.makedirs(save_folder, exist_ok=True)

    # Dictionary to store user input images
    user_images = defaultdict(list)

    for char in characters:
        # Generate the expected file name
        image_file_name = f"{char}.png"
        image_path = os.path.join(save_folder, image_file_name)

        # Check if the exact file exists
        if os.path.exists(image_path):
            print(f"Loading saved image for letter '{char}' from {image_path}")
            # Load the image using Pillow
            image = Image.open(image_path).convert("L")  # Convert to grayscale
            image = image.resize((160, 120))  # Resize to match model input size
            image_array = np.array(image) / 255.0  # Normalize the image
            user_images[char].append(image_array)
        else:
            # If the image does not exist, prompt the user to draw it
            print(f"Draw the character: {char}")
            user_input = draw_letter_prompt(char, save_folder=save_folder)
            user_images[char].append(user_input)

    return user_images

# Define characters
characters = [chr(i) for i in range(48, 58)] + [chr(i) for i in range(65, 91)] + [chr(i) for i in range(97, 123)]

# Collect or load images
save_folder = "user_drawings"
user_images = collect_or_load_images(characters, save_folder=save_folder)
# Save images with sequential filenames and create a CSV mapping file

# Create a CSV file to map image paths to characters
csv_file_path = os.path.join(save_folder, "image_mapping.csv")
with open(csv_file_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["image_path", "character"])  # Write header

    image_counter = 1
    for char, images in user_images.items():
        for img in images:
            # Generate sequential filename
            image_file_name = f"{image_counter:04d}.png"
            image_path = os.path.join(save_folder, image_file_name)
            
            # Save the image
            img_pil = Image.fromarray((img * 255).astype(np.uint8))  # Convert back to PIL image
            img_pil.save(image_path)
            
            # Write the mapping to the CSV file
            csv_writer.writerow([image_path, char])
            
            image_counter += 1

print(f"Image mapping saved to {csv_file_path}")

Draw the character: 0
Saved image for letter '0' at user_drawings\0.png
Draw the character: 1


In [None]:
# Initialize loop for reinforcement until all characters are 100% accurate
while True:
    y_true = []
    y_pred = []

    # Predict user inputs and collect true/predicted labels
    for char in characters:
        for img in user_images[char]:
            img_tensor = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
            with torch.no_grad():
                output = model(img_tensor)
                _, predicted_idx = torch.max(output, 1)
            y_true.append(char)
            y_pred.append(characters[predicted_idx])

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred, labels=characters)

    # Accuracy per character
    accuracy_per_char = np.diag(cm) / np.sum(cm, axis=1)
    char_accuracy_dict = dict(zip(characters, accuracy_per_char))

    # Display current accuracy
    print("\nCurrent Accuracy Per Character:")
    for char, accuracy in char_accuracy_dict.items():
        print(f"{char}: {accuracy:.2%}")

    # Filter characters below 100% accuracy
    below_100_accuracy = [char for char, acc in char_accuracy_dict.items() if acc < 1.0]

    # Break the loop if all characters are 100% accurate
    if not below_100_accuracy:
        print("\nAll characters have achieved 100% accuracy. Process complete!")
        break

    # Re-prompt user for characters below 100% accuracy
    print("\nRe-prompting for characters below 100% accuracy:")
    for char in below_100_accuracy:
        print(f"Draw the character: {char}")
        user_input = draw_letter_prompt(char, save_folder=save_folder)
        user_images[char].append(user_input)


Current Accuracy Per Character:
0: 0.00%
1: 0.00%
2: 0.00%
3: 0.00%
4: 0.00%
5: 0.00%
6: 0.00%
7: 0.00%
8: 0.00%
9: 0.00%
A: 0.00%
B: 0.00%
C: 0.00%
D: 0.00%
E: 0.00%
F: 0.00%
G: 0.00%
H: 0.00%
I: 0.00%
J: 0.00%
K: 0.00%
L: 100.00%
M: 0.00%
N: 0.00%
O: 0.00%
P: 0.00%
Q: 0.00%
R: 0.00%
S: 0.00%
T: 0.00%
U: 0.00%
V: 0.00%
W: 0.00%
X: 0.00%
Y: 0.00%
Z: 0.00%
a: 0.00%
b: 0.00%
c: 0.00%
d: 0.00%
e: 0.00%
f: 0.00%
g: 0.00%
h: 0.00%
i: 0.00%
j: 0.00%
k: 0.00%
l: 0.00%
m: 0.00%
n: 0.00%
o: 0.00%
p: 0.00%
q: 0.00%
r: 0.00%
s: 0.00%
t: 0.00%
u: 0.00%
v: 0.00%
w: 0.00%
x: 0.00%
y: 0.00%
z: 0.00%

Re-prompting for characters below 100% accuracy:
Draw the character: 0
Saved image for letter '0' at user_drawings\0.png
Draw the character: 1
Saved image for letter '1' at user_drawings\1.png
Draw the character: 2
Saved image for letter '2' at user_drawings\2.png
Draw the character: 3
Saved image for letter '3' at user_drawings\3.png
Draw the character: 4
Saved image for letter '4' at user_drawings\4.p