### Character Recognition of Handwritten Hiragana

In [2]:
# %pip install torch
# %pip install torchvision
# %pip install matplotlib
# %pip install Pillow
# %pip install pygame


## Import of the Data

In [3]:
# Import necessary libraries/modules
from torchvision import datasets, transforms  # Import datasets and transforms from the torchvision library
from torch.utils.data import TensorDataset, DataLoader  # Import TensorDataset and DataLoader from torch.utils.data
import torch  # Import the main PyTorch library
import torch.nn as nn  # Import the neural network module from PyTorch
import torch.nn.functional as F  # Import functional components of neural networks from PyTorch
import torch.optim as optim  # Import optimization algorithms from PyTorch
import matplotlib.pyplot as plt  # Import the matplotlib library for plotting
import numpy as np  # Import the NumPy library for numerical operations

# Define a function 'load' that loads data from a given file
def load(data):
    return np.load(data)['arr_0']  # Load data from the specified file using NumPy and return it

# Load the training and testing data and labels
train_data = load('Data\K49-data\k49-train-imgs.npz')  # Load training image data from a file
test_data = load('Data\K49-data\k49-test-imgs.npz')    # Load testing image data from a file

train_labels = load('Data\K49-data\k49-train-labels.npz')  # Load training labels from a file
test_labels = load('Data\K49-data\k49-test-labels.npz')    # Load testing labels from a file
# Convert the data to the appropriate data type (float32)
train_data = train_data.astype(np.float32)
test_data = test_data.astype(np.float32)

# Combine the training data and labels into a PyTorch TensorDataset
train_dataset = TensorDataset(torch.tensor(train_data), torch.tensor(train_labels))
test_dataset = TensorDataset(torch.tensor(test_data), torch.tensor(test_labels))

## Mapping of the Data

In [4]:
# Import the csv module for working with CSV files
import csv

# Define a list of encodings to try
encodings_to_try = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']

# Define a function to load the mapping from the CSV file with various encodings
def load_class_mapping(csv_file):
    class_mapping = None  # Initialize class_mapping as None

    for encoding in encodings_to_try:
        try:
            class_mapping = {}  # Initialize an empty dictionary to store the mapping

            # Open the CSV file specified by csv_file for reading with the current encoding
            with open(csv_file, newline='', encoding=encoding) as csvfile:
                # Create a CSV reader object that treats the first row as headers and maps columns to keys
                csvreader = csv.DictReader(csvfile)

                # Iterate through each row (record) in the CSV file
                for row in csvreader:
                    # Extract the 'index' value from the current row and convert it to an integer
                    index = int(row['index'])

                    # Extract the 'char' value from the current row
                    char = row['char']

                    # Add an entry to the class_mapping dictionary, mapping index to char
                    class_mapping[index] = char

            # If successful, break out of the loop
            break
        except UnicodeDecodeError:
            # If there's a decoding error, try the next encoding in the list
            pass

    # Check if class_mapping is still None (indicating no successful decoding)
    if class_mapping is None:
        raise ValueError("Unable to decode the CSV file with any of the specified encodings.")

    # Return the populated class_mapping dictionary containing the mapping from index to char
    return class_mapping

# Specify the path to your CSV file (e.g., "k49_classmap.csv")
csv_file_path = 'k49_classmap.csv'

# Load the class mapping from the specified CSV file by calling the load_class_mapping function
class_mapping = load_class_mapping(csv_file_path)




## Checkup

In [5]:
# import numpy as np
# import matplotlib.pyplot as plt

# # Define the class index you want to look up
# index = 1
# class_index = test_labels[index]

# # Display the image using Matplotlib
# plt.imshow(test_data[index], cmap='gray')
# plt.show()

# # Check if the class index exists in the class_mapping dictionary
# if class_index in class_mapping:
#     # If it exists, retrieve the corresponding character label
#     character = class_mapping[class_index]

#     # Print the result
#     print(f"Class index {class_index} corresponds to character: {character}")
# else:
#     # If it doesn't exist, print a message indicating that it was not found
#     print(f"Class index {class_index} not found in the mapping.")


## Classification of the Neural Network

In [6]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Set parameters for the feedforward neural network
input_size = 784  # Update the input size to 784 (28*28)
hidden_size = 2048*2  # Number of neurons in the hidden layer
num_classes = 49    # Number of output classes (categories)
num_epochs = 20    # Number of training epochs
batch_size = 2048   # Batch size for data processing
learning_rate = 0.0005  # Learning rate for the optimizer

# Create data loaders for training and testing data
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the architecture of the neural network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        # Define the input layer that accepts 784-dimensional input
        self.input_layer = nn.Linear(input_size, input_size).to(dtype=torch.float32)
        self.l1 = nn.Linear(input_size, hidden_size).to(dtype=torch.float32)  # Fully connected layer
        self.relu = nn.ReLU()  # Rectified Linear Unit (ReLU) activation function
        # Define the output layer
        self.l2 = nn.Linear(hidden_size, num_classes).to(dtype=torch.float32)  # Fully connected layer

    def forward(self, x):  # Forward pass through the network
        x = self.input_layer(x)  # Pass input through the input layer
        out = self.l1(x)  # Pass input through the first hidden layer
        out = self.relu(out)  # Apply ReLU activation
        out = self.l2(out)  # Pass through the output layer to get predictions
        return out

# Create an instance of the neural network model
model = NeuralNet(input_size, hidden_size, num_classes)


## Training of the Neural Network

In [7]:
# # Define the loss function (CrossEntropyLoss) and the optimizer (Adam)
# criterion = nn.CrossEntropyLoss()  # Loss function for classification problems
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Adam optimizer with specified learning rate

# # Train the model
# total_step = len(train_loader)  # Total number of batches in the training data
# for epoch in range(num_epochs):  # Loop through each training epoch
#     for i, (train_data, train_labels) in enumerate(train_loader):  # Loop through each batch of data
#         # Reshape the input images to have the shape (batch_size, input_size)
#         train_data = train_data.reshape(-1, 28*28)

#         # Forward pass: compute predicted outputs by passing inputs to the model
#         outputs = model(train_data)

#         # Calculate the loss using the defined criterion (CrossEntropyLoss)
#         loss = criterion(outputs, train_labels)

#         # Backward pass and optimization
#         optimizer.zero_grad()  # Clear gradients from previous steps
#         loss.backward()  # Perform backpropagation to compute gradients
#         optimizer.step()  # Update model parameters using the computed gradients

#         # Print training statistics
#         if (i+1) % 100 == 0:
#             print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
#                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


In [8]:
# # After training the model, save it to a file
# torch.save(model.state_dict(), 'mymodel.pth')


## Test of the Neural Network

In [11]:
# Initialize lists to store predictions and ground truth labels
p_test = []  # Model predictions of class index
y_test = []  # Ground truth class indices

# Loop through the test data in batches
for images, labels in test_loader:
    images = images.reshape(-1, 28*28)  # Reshape input images
    images = images.to(dtype=torch.float32)  # Cast images to the correct data type (torch.float32)
    outputs = model(images)  # Forward pass to get model predictions
    _, predicted = torch.max(outputs.data, 1)  # Get the predicted class for each sample

    # Append model predictions and ground truth labels to lists
    p_test.extend(predicted.tolist())
    y_test.extend(labels.tolist())

# Calculate balanced accuracy using the provided implementation
accs = []

# Loop through each class (in this case, there are 49 classes)
for cls in range(49):
    # Create a mask to select samples of the current class in the ground truth
    mask = (torch.tensor(y_test) == cls)
    
    # Calculate the accuracy for samples of the current class
    cls_acc = (torch.tensor(p_test) == cls)[mask].float().mean()  # Accuracy for rows of class cls
    
    # Append the class accuracy to the list of accuracies 
    accs.append(cls_acc)

# Calculate the final balanced accuracy by taking the mean of class accuracies
balanced_accuracy = torch.tensor(accs).mean().item()

# Print the balanced accuracy as a percentage
print('Balanced Accuracy of the model on test images: {:.2f} %'.format(100 * balanced_accuracy))


Balanced Accuracy of the model on test images: 80.96 %


In [12]:
# Import necessary libraries
import sys  # Import the sys library for system-related functionality
import pygame  # Import the pygame library for game development
import ctypes  # Import the ctypes library for low-level operations
from PIL import Image  # Import the Image module from the PIL (Pillow) library for image manipulation
import numpy as np  # Import the numpy library for numerical operations
import torch  # Import the torch library for PyTorch deep learning framework
import tkinter as tk  # Import the tkinter library for GUI components
import csv  # Import the csv library for reading CSV files
import torch.nn as nn
# Increase Dots Per Inch (DPI) awareness for sharper display
ctypes.windll.shcore.SetProcessDpiAwareness(True)

# Define your neural network model and load pre-trained weights
# Replace with your actual model architecture and weights loading code
# Example:
# input_size = 784  # Input size
# hidden_size = 19600  # Hidden layer size
# num_classes = 49  # Number of classes

# class NeuralNet(nn.Module):
#     def __init__(self, input_size, hidden_size, num_classes):
#         super(NeuralNet, self).__init__()
#         self.input_layer = nn.Linear(input_size, hidden_size)
#         self.relu = nn.ReLU()
#         self.output_layer = nn.Linear(hidden_size, num_classes)

#     def forward(self, x):
#         x = self.input_layer(x)
#         x = self.relu(x)
#         x = self.output_layer(x)
#         return x

model = NeuralNet(input_size, hidden_size, num_classes)  # Create an instance of the neural network
model.load_state_dict(torch.load('mymodel.pth'))  # Load pre-trained weights
model.eval()  # Set the model to evaluation mode (no training)

# Pygame Configuration
pygame.init()  # Initialize the pygame library
fps = 300  # Set frames per second
fpsClock = pygame.time.Clock()  # Create a clock object to control frame rate
width, height = (1000, 800)  # Set the window dimensions
screen = pygame.display.set_mode((width, height), pygame.RESIZABLE)  # Create a resizable window
font = pygame.font.SysFont('Arial', 20)  # Define a font for text

# Initialize the Japanese font
japanese_font = pygame.font.Font('font_file.ttf', 30)  # Load a Japanese font from a file

# Variables

# Our Buttons will append themselves to this list
objects = []  # Create an empty list to store button objects

# Initial color for drawing
drawColor = [0, 0, 0]  # Set the initial drawing color as black

# Initial brush size
brushSize = 20  # Set the initial brush size
brushSizeSteps = 3  # Define steps for changing brush size

# Drawing Area Size
canvasSize = [560, 560]  # Define the size of the drawing canvas

# Create a surface for the result label
result_label_surface = pygame.Surface((200, 200))  # Create a surface for displaying recognition results
result_label_surface.fill((255, 255, 255))  # Fill the result label surface with a white background

# Function to update the result label text
def update_result_text(text_lines):
    result_label_surface.fill((255, 255, 255))  # Clear the result label surface
    y_offset = 10  # Starting Y offset for the first line
    for line in text_lines:
        text_surface = japanese_font.render(line, True, (0, 0, 0))  # Render the text with Japanese font
        result_label_surface.blit(text_surface, (10, y_offset))  # Position the text
        y_offset += text_surface.get_height() + 5  # Adjust Y offset for the next line

# Button Class
class Button():
    def __init__(self, x, y, width, height, buttonText='Button', onclickFunction=None, onePress=False):
        self.x = x  # Set the x-coordinate of the button
        self.y = y  # Set the y-coordinate of the button
        self.width = width  # Set the width of the button
        self.height = height  # Set the height of the button
        self.onclickFunction = onclickFunction  # Store the function to be executed on button click
        self.onePress = onePress  # Flag to determine if the button can be pressed only once

        # Define colors for different button states
        self.fillColors = {
            'normal': '#ffffff',  # Normal state color (white)
            'hover': '#666666',  # Hovered state color (gray)
            'pressed': '#333333',  # Pressed state color (dark gray)
        }

        self.buttonSurface = pygame.Surface((self.width, self.height))  # Create a button surface
        self.buttonRect = pygame.Rect(self.x, self.y, self.width, self.height)  # Create a button rectangle

        self.buttonSurf = font.render(buttonText, True, (20, 20, 20))  # Render the button text

        self.alreadyPressed = False  # Flag to track if the button has already been pressed

        objects.append(self)  # Append the button to the list of objects

    def process(self):
        mousePos = pygame.mouse.get_pos()  # Get the mouse cursor position

        self.buttonSurface.fill(self.fillColors['normal'])  # Fill the button surface with the normal color
        if self.buttonRect.collidepoint(mousePos):  # Check if the mouse cursor is over the button
            self.buttonSurface.fill(self.fillColors['hover'])  # Fill with hover color

            if pygame.mouse.get_pressed(num_buttons=3)[0]:  # Check if the left mouse button is pressed
                self.buttonSurface.fill(self.fillColors['pressed'])  # Fill with pressed color

                if self.onePress:  # If the button can be pressed only once
                    self.onclickFunction()  # Execute the button's onclickFunction

                elif not self.alreadyPressed:  # If the button can be pressed multiple times
                    self.onclickFunction()  # Execute the button's onclickFunction
                    self.alreadyPressed = True

            else:
                self.alreadyPressed = False

        self.buttonSurface.blit(self.buttonSurf, [
            self.buttonRect.width/2 - self.buttonSurf.get_rect().width/2,
            self.buttonRect.height/2 - self.buttonSurf.get_rect().height/2
        ])  # Center-align the button text
        screen.blit(self.buttonSurface, self.buttonRect)  # Draw the button on the screen

# Handler Functions

# Clear the canvas
def clear_all():
    canvas.fill((255, 255, 255))  # Fill the canvas with white

# Character recognition function
def classify_handwriting():
    pygame.image.save(canvas, "canvas.png")  # Save the canvas as an image
    im = Image.open('canvas.png')  # Open the saved image

    # Preprocess the image for character recognition
    im = im.convert('L')  # Convert RGB to grayscale
    im = im.resize((28, 28))  # Resize image to 28x28 pixels
    im = np.array(im)  # Convert to NumPy array
    im = im.reshape(-1, 28 * 28)  # Reshape to (1, 784)
    im = 255 - im  # Invert the colors
    im = im.astype(np.float32)  # Convert to float32

    # Convert to PyTorch tensor
    img_tensor = torch.tensor(im, dtype=torch.float32)

    # Predicting the class
    with torch.no_grad():
        outputs = model(img_tensor)  # Forward pass to get model predictions

        # Get the top 3 predicted classes and their probabilities
        _, top_classes = torch.topk(outputs, 3, dim=1)
        top_probabilities = torch.softmax(outputs, dim=1)[0, top_classes].numpy()

        # Initialize an empty list to store the top recognized characters and their confidence percentages
        top_recognized_chars = []

        # Loop through the top 3 predicted classes
        for i in range(3):
            # Get the class index from the top_classes tensor
            class_index = top_classes[0, i].item()
            # Check if the class index exists in the class_mapping dictionary
            if class_index in class_mapping:
                # Retrieve the recognized character for the class index
                recognized_char = class_mapping[class_index]
                # Get the confidence (probability) for the current prediction
                confidence = top_probabilities[0, i]
                # Calculate the accuracy percentage by multiplying confidence by 100 and rounding to 2 decimal places
                accuracy_percentage = round(confidence * 100, 2)
                # Append a string containing the recognized character and its accuracy percentage to the list
                top_recognized_chars.append(f"{recognized_char} ({accuracy_percentage}%)")

        # Check if there are top recognized characters in the list
        if top_recognized_chars:
            update_result_text(top_recognized_chars)  # Update the result label with recognized characters
        else:
            update_result_text(["No recognized characters found in the mapping"])  # Display a message

# Button Variables.
buttonWidth = 120  # Width of buttons
buttonHeight = 35  # Height of buttons

# Buttons and their respective functions.
buttons = [
    ['Recognize', lambda: classify_handwriting()],  # Button for character recognition
    ['Clear', lambda: clear_all()],  # Button to clear the canvas
]

# Making the buttons
for index, buttonName in enumerate(buttons):
    Button(index * (buttonWidth + 10) + 10, 10, buttonWidth,
           buttonHeight, buttonName[0], buttonName[1])  # Create and add buttons to the objects list

# Canvas
canvas = pygame.Surface(canvasSize)  # Create a surface for drawing on the canvas
canvas.fill((255, 255, 255))  # Fill the canvas with white

# Load the class mapping from the CSV file
def load_class_mapping(csv_file):
    # Define a list of encodings to try when reading the CSV file
    encodings_to_try = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']

    # Iterate through the list of encodings and attempt to read the CSV file
    for encoding in encodings_to_try:
        try:
            # Initialize an empty dictionary to store the class mapping
            class_mapping = {}
            # Open the CSV file with the specified encoding
            with open(csv_file, newline='', encoding=encoding) as csvfile:
                csvreader = csv.DictReader(csvfile)
                # Iterate through each row in the CSV file
                for row in csvreader:
                    # Extract the 'index' and 'char' values from the row
                    index = int(row['index'])
                    char = row['char']
                    # Add the mapping of 'index' to 'char' in the class_mapping dictionary
                    class_mapping[index] = char
            # Return the class mapping if successful
            return class_mapping
        # Handle Unicode decoding errors
        except UnicodeDecodeError:
            continue
    # Raise an exception if unable to decode the CSV file with any of the specified encodings
    raise ValueError("Unable to decode the CSV file with any of the specified encodings.")

# Specify the path to your CSV file (e.g., "k49_classmap.csv")
csv_file_path = 'k49_classmap.csv'

# Load the class mapping from the CSV file using the function
class_mapping = load_class_mapping(csv_file_path)

# Game loop.
while True:
    screen.fill((30, 30, 30))  # Fill the screen with a dark gray background
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()  # Quit pygame
            sys.exit()  # Exit the program

    # Drawing the Buttons
    for obj in objects:
        obj.process()  # Process and display buttons

    # Draw the Canvas at the center of the screen
    x, y = screen.get_size()
    screen.blit(canvas, [x/2 - canvasSize[0]/2, y/2 - canvasSize[1]/2])  # Center-align the canvas

    # Display the result label on the right side of the drawing area
    screen.blit(result_label_surface, (x/2 + canvasSize[0]/2 + 10, y/2 - canvasSize[1]/2))

    # Drawing with the mouse
    if pygame.mouse.get_pressed()[0]:  # Check if the left mouse button is pressed
        mx, my = pygame.mouse.get_pos()  # Get the mouse coordinates

        # Calculate Position on the Canvas
        dx = mx - x/2 + canvasSize[0]/2
        dy = my - y/2 + canvasSize[1]/2

        pygame.draw.circle(
            canvas,
            drawColor,
            [dx, dy],
            brushSize,
        )  # Draw a circle on the canvas

    # Reference Dot
    pygame.draw.circle(
        screen,
        drawColor,
        [100, 100],
        brushSize,
    )  # Draw a reference dot on the screen

    pygame.display.flip()  # Update the display
    fpsClock.tick(fps)  # Control the frame rate


SystemExit: 