<a href="https://colab.research.google.com/github/meliksahb/Design-of-Intelligent-Machines-ME536-/blob/main/CharacterDetectionCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image, ImageDraw, ImageFont
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Define characters and font
CHARZ = ['B', 'U', 'D', 'R', 'K', 'A', 'E', '6', 'N']
FONT_PATH = '/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf'
IMG_SIZE = 30  # Cell size
fR = 0.8
nB = int(IMG_SIZE*(1-fR))
# Generate synthetic data
def create_training_data(font_path, charz, img_size=30, num_samples_per_char=1000):
    training_data = []
    labels = []
    for idx, char in enumerate(charz):
        for _ in range(num_samples_per_char):
            # Create an image with a single character
            img = Image.new('RGB', (img_size, img_size), (255, 255, 255))
            draw = ImageDraw.Draw(img)
            font = ImageFont.truetype(font_path, size=np.random.randint(12, IMG_SIZE-1))
            x_offset = int(np.random.rand()* nB)
            y_offset = int(np.random.rand()* nB)
            draw.text((x_offset, y_offset), char, font=font, fill=(0, 0, 0))

            # Convert to grayscale and normalize
            img_array = np.array(img.convert('L'))
            training_data.append(img_array)
            labels.append(idx)

    training_data = np.array(training_data).reshape(-1, img_size, img_size, 1) / 255.0
    labels = np.array(labels)
    return training_data, labels

# Generate training data
X, y = create_training_data(FONT_PATH, CHARZ)

# Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Build the CNN
def build_cnn(input_shape=(30, 30, 1), num_classes=len(CHARZ)):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize the CNN
model = build_cnn()

# Train the CNN
early_stopping = EarlyStopping(patience=5, restore_best_weights=True)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping]
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# # Display training history
# plt.plot(history.history['accuracy'], label='Train Accuracy')
# plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.show()

# Save the model
model.save('character_recognition_model.h5')

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from PIL import ImageColor


# Define character set and colors
CHARZ = ['B', 'U', 'D', 'R', 'K', 'A', 'E', '6', 'N']
CHAR_COLORS = {
    'B': "pink", 'U': "yellow", 'D': "green", 'R': "orange",
    'K': "red", 'A': "cyan", 'E': "magenta", '6': "blue", 'N': "purple"
}
FONT_PATH = "/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf"  # Adjust as needed

# Load the saved CNN model
model = load_model('/content/character_recognition_model.h5')



# Function to preprocess test image and extract cells
def preprocess_gen_image(img, cell_size=30):
    """
    Preprocess the generated image to extract cells and their locations.
    """
    img_array = np.array(img)
    gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    cells = []
    locations = []
    for y in range(0, img_array.shape[0], cell_size):
        for x in range(0, img_array.shape[1], cell_size):
            cell = gray[y:y+cell_size, x:x+cell_size]
            if np.sum(cell < 255) > 10:  # Check for non-white pixels
                cells.append(cell)
                locations.append((x, y))
    cells = np.array(cells).reshape(-1, cell_size, cell_size, 1) / 255.0
    return cells, locations


# Function to annotate and display results
def annotate_image(img, locations, predictions, char_locations, cell_size=30):
    """
    Annotate the image with predicted characters and colors.
    """
    draw = ImageDraw.Draw(img)

    img_array = np.array(img)
    for (x, y), pred_idx in zip(locations, predictions):
        char = CHARZ[pred_idx]
        color = CHAR_COLORS[char]
        # Color the rectangle without overwriting the character
        draw.rectangle([x, y, x + cell_size, y + cell_size], outline=color, width=2)

        # Convert color name to RGB
        rgb_color = ImageColor.getrgb(color)

       # Extract the region of interest (ROI) for the character
        roi = img.crop((x, y, x + cell_size, y + cell_size))

        # Convert the ROI to a numpy array for processing
        roi_array = np.array(roi)

        # Identify the pixels that belong to the character (non-white pixels)
        mask = np.all(roi_array < [200, 200, 200], axis=-1)  # Adjust threshold as needed

        # Apply the color to the detected character
        roi_array[mask] = rgb_color  # Apply the color to the detected character

        # Convert back to an image and paste it back into the original image
        img.paste(Image.fromarray(roi_array), (x, y))

        # Store the character's location
        char_locations[char].append((x + cell_size / 2, y + cell_size / 2))

# # Function to test the CNN on saved images
# def test_saved_images(image_array, output_folder):

#     char_locations = {char: [] for char in CHARZ}

#     if not os.path.exists(output_folder):
#         os.makedirs(output_folder)
#     image = np.load(image_array)
#     img = Image.fromarray(image.astype('uint8'))
#     cells, locations = preprocess_gen_image(img)

#     # Predict characters using the CNN
#     predictions = np.argmax(model.predict(cells), axis=1)

#     annotate_image(img, locations, predictions, char_locations)

#     # Save the annotated image
#     output_path = os.path.join(output_folder, "annotated_image.png")
#     img.save(output_path)
#     print(f"Processed and saved: {output_path}")

#     return char_locations, img

    # for img_path in image_paths:
          # char_locations = {char: [] for char in CHARZ}

    #     # Load the test image
    #     img = np.load(img_path)
    #     img = Image.fromarray(img.astype('uint8'))  # Convert to PIL Image

    #     # Preprocess the image to extract cells and their locations
    #     cells, locations = preprocess_gen_image(img)

    #     # Predict characters using the CNN
    #     predictions = np.argmax(model.predict(cells), axis=1)

    #     annotate_image(img, locations, predictions, char_locations)

    #     # Save the annotated image
    #     output_path = os.path.join(output_folder, os.path.basename(img_path).replace('.npy', '_annotated.png'))
    #     img.save(output_path)
    #     print(f"Processed and saved: {output_path}")
    #     # print(len(char_locations['K']))
    #     return char_locations, annotate_image

# # Paths to test images
# image_folder = '/content/'
# image_paths = [os.path.join(image_folder, fname) for fname in os.listdir(image_folder) if fname.endswith('.npy')]

# # Output folder for annotated images
# output_folder = "/content/"

# # # Test on the saved images
# # test_saved_images(image_paths, output_folder)

# img = '/content/simpleF.npy'
# test_saved_images(img, output_folder)


In [None]:
import math
def distance(p1, p2):
    """Euclidean distance between p1=(x1,y1) and p2=(x2,y2)."""
    return math.hypot(p2[0] - p1[0], p2[1] - p1[1])

def vector(p1, p2):
    """2D vector from p1 to p2."""
    return (p2[0] - p1[0], p2[1] - p1[1])

def dot_product(v1, v2):
    """Dot product of two 2D vectors."""
    return v1[0]*v2[0] + v1[1]*v2[1]

def is_perpendicular(p1, p2, p3, tol=1e-5):
    """
    Check if the angle at p2 (formed by p2->p1 and p2->p3) is ~90 degrees.
    """
    v1 = vector(p2, p1)
    v2 = vector(p2, p3)
    return abs(dot_product(v1, v2)) < tol

def is_square(kpt, apt, rpt, ept, dist_tol=1e-5, angle_tol=1e-5):
    """
    Checks if points (K, A, R, E) form a square in order K->A->R->E->K.
    1) All sides ~ equal
    2) All angles ~ 90 degrees
    """
    dKA = distance(kpt, apt)
    dAR = distance(apt, rpt)
    dRE = distance(rpt, ept)
    dEK = distance(ept, kpt)

    sides = [dKA, dAR, dRE, dEK]
    mean_side = sum(sides) / 4.0

    # Check side lengths
    for s in sides:
        if abs(s - mean_side) > dist_tol:
            return False

    # Check angles at A, R, E, K
    if (not is_perpendicular(kpt, apt, rpt, angle_tol) or
        not is_perpendicular(apt, rpt, ept, angle_tol) or
        not is_perpendicular(rpt, ept, kpt, angle_tol) or
        not is_perpendicular(ept, kpt, apt, angle_tol)):
        return False

    return True

def find_best_square(k_points, a_points, r_points, e_points,
                     dist_tol=1e-5, angle_tol=1e-5):
    """
    Search all combos of K, A, R, E points, return the "best" square
    (largest side). If none found, returns None.
    """
    best_square_pts = None
    best_side_len = 0.0

    for kpt in k_points:
        for apt in a_points:
            for rpt in r_points:
                for ept in e_points:
                    if is_square(kpt, apt, rpt, ept, dist_tol, angle_tol):
                        # Use distance(K, A) as side length
                        side_len = distance(kpt, apt)
                        if side_len > best_side_len:
                            best_side_len = side_len
                            best_square_pts = (kpt, apt, rpt, ept)

    return best_square_pts

def draw_square_pil(pil_image, square_pts, color='red', width=3):
    """
    Draw lines for the square (K->A->R->E->K) using PIL.
    Each point in `square_pts` is (x, y).
    """
    draw = ImageDraw.Draw(pil_image)
    kpt, apt, rpt, ept = square_pts

    # Convert float coords to ints if necessary
    kpt_i = (int(kpt[0]), int(kpt[1]))
    apt_i = (int(apt[0]), int(apt[1]))
    rpt_i = (int(rpt[0]), int(rpt[1]))
    ept_i = (int(ept[0]), int(ept[1]))

    # Draw lines
    draw.line([kpt_i, apt_i], fill=color, width=width)
    draw.line([apt_i, rpt_i], fill=color, width=width)
    draw.line([rpt_i, ept_i], fill=color, width=width)
    draw.line([ept_i, kpt_i], fill=color, width=width)

In [None]:
def SearchInAlphabetSoup(img, txt='KARE'):
    """
    Given an image (numpy array) and a text (default 'KARE'),
    - If txt == 'KARE', attempt to find centers of K, A, R, E that form a square.
    - If found, draw the largest square on a copy of the image and return it.
    - Otherwise, return the original or some other processed result for other words.
    """
    # Convert the input array to PIL
    pil_img = Image.fromarray(img.astype('uint8'))

    # Preprocess => get cells + their top-left corners
    cells, locations = preprocess_gen_image(pil_img)

    # CNN predictions
    predictions = np.argmax(model.predict(cells), axis=1)

    # We'll store locations for each recognized character
    char_locations = {c: [] for c in CHARZ}

    # Annotate (color) each detected character in the PIL image
    annotate_image(pil_img, locations, predictions, char_locations, cell_size=cell_size)

    # If the user wants 'KARE', attempt to find the square
    if txt.upper() == 'KARE':
        k_centers = char_locations.get('K', [])
        a_centers = char_locations.get('A', [])
        r_centers = char_locations.get('R', [])
        e_centers = char_locations.get('E', [])

        # Only try if we have at least one of each
        if k_centers and a_centers and r_centers and e_centers:
            best_sq = find_best_square(k_centers, a_centers, r_centers, e_centers)
            if best_sq is not None:
                draw_square_pil(pil_img, best_sq, color='lime', width=3)
                print("KARE square found:", best_sq)
            else:
                print("No valid KARE square found.")
        else:
            print("Cannot form KARE because one or more letters are missing.")
    else:
        # If you want to handle other words differently, do so here.
        print(f"No special square detection for '{txt}'.")

    # Return the annotated PIL image
    return pil_img

In [None]:
# test codes -to be added to the end of your code
# the SearchInAlphabetSoup() function you wrote will be called several times
# during this test.

# import for imread, just in case
import matplotlib.pyplot as plt
from matplotlib.image import imread
from timeit import default_timer as timer
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import random

# get the necessary file
!rm *.jpg 2>/dev/null
!wget https://raw.githubusercontent.com/bugrakoku/data4all/main/crashed.jpg



def GenImage(numLetters = 50, fixedFontSize = True, xCount = 15, yCount = 10, pos = [], cvals = [] ):
    '''
    Input:
        numLetters = maximum number of letters
        fixedFontSize = well doug
        xCount, yCount = number of rows and columns
        pos and cvals = pre-allocated positions of characters

    Output:
        image as numpy array that contains letters randomly located
    '''
    # in the assignment assessment following will not change in test images
    fontname = '/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf'
    cellSize = 30 #cell within which a single char will be printed
    BackColor = (255,255, 255) # back color
    ForeColor = (0, 0, 0) # font color
    charz = ['B', 'U', 'D', 'R', 'K', 'A', 'E', '6', 'N']
    # in the assignment assessment variable above will not change in generating test images
    fR = 0.8 # font ratio to cell size
    # generate an empty list to be filled in randomly
    #charmap = [['' for i in range(xCount)] for j in range(yCount)] # assign

    fontsize = int(cellSize * fR) # fixed font size
    nB = int(cellSize*(1-fR)) # location noise

    img = Image.new('RGB', (xCount*cellSize,yCount*cellSize), BackColor ) # blank image
    imgPen = ImageDraw.Draw(img) # pen to draw on the blank image

    # first generate random positions over pos
    # note that pos might be already partially filled in
    for i in range(len(pos), numLetters):
        x = np.random.randint(0, xCount)
        y = np.random.randint(0, yCount)
        if [y,x] not in pos:
            pos.append([y,x])
            cvals.append(random.choice(charz))
    print(f'size: {len(pos)}:{len(cvals)}')
    # now that positions are determined, print random letters in them
    for ([y,x], txt) in zip(pos, cvals): # draw each letter if random location is not already occupied, if occupied, skip
        if not fixedFontSize: # if set so, select a random font size
            fontsize = np.random.randint(12, cellSize-1)
            dx, dy = 0, 0 # when scaled, position does not change
        else: # add some location noise
            dx = int(np.random.rand()* nB)
            dy = int(np.random.rand()* nB)
        font = ImageFont.truetype(fontname, fontsize) # font instace created
        imgPen.text((x * cellSize + dx, y * cellSize + dy), txt, font=font, fill=ForeColor) # write the character to blank image


    return np.array(img) # finally return image as an numpy array

test_basic = ['simplest']
test_more = ['simple', 'test50', 'test100']
test_pro = ['test150', 'test250', 'test400', 'testInsane']
test_all = test_basic + test_more + test_pro
# the following are variable font size cases
test_basicF = ['simplestF']
test_moreF = ['simpleF', 'test50F', 'test100F']
test_ProF = ['test150F', 'test250F', 'test400F', 'testInsaneF']
test_allF = test_basicF + test_moreF + test_ProF

def PerformTest(cases = ['simplest'], figSize = (25,25)):
    shapes = ['KARE', 'BUDUR', '6KENAR']
    crash = imread('crashed.jpg')
    imgz = []
    imgTitles = []
    for case in cases:
        img = np.load(f'{case}.npy')
        for shape in shapes:
            startTime = timer()
            try:
                print(f'Searching for {shape} in {case} ')
                imgR = SearchInAlphabetSoup(img.copy(), shape)
                imgz.append(imgR)
            except:
                imgz.append(crash)
            endTime = timer()
            imgTitles.append(f'{shape} in {case}: {round(endTime-startTime, 3)} sec.')
            print(imgTitles[-1])

    # finally plot the results
    fig, axz = plt.subplots(len(imgz), 1, figsize=figSize)
    for i, ax in enumerate(axz):
        ax.imshow(imgz[i])
        ax.set_title(imgTitles[i])

# make suer the crashed image shows up
# plt.imshow(plt.imread('crashed.jpg'))
# plt.title('just checking the crash.jpg image... no worries yet...')

In [None]:
# fixed size font test
# if you trust your code, test them all togather
# or change the following to run one by one try it a few times you will get to the bottom of it
PerformTest(test_basic)
#PerformTest(test_all)

In [None]:
# variable sized font tests
PerformTest(test_allF)