In [1]:
import cv2 #type: ignore
import numpy as np  #type: ignore
import matplotlib.pyplot as plt  #type: ignore
import matplotlib.image as mpimg  #type: ignore


import tensorflow as tf  #type: ignore
from keras.models import Model # type: ignore
from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Lambda, Activation, BatchNormalization, Dropout # type: ignore
from keras.optimizers import Adam # type: ignore

from sklearn.metrics import confusion_matrix, f1_score
import csv

In [2]:
alphabets = "ABCDEFGHIJKLMNOPQRSTUVWXYZ-' "  # The set of valid characters
max_str_len = 50                             # Maximum length of input labels
num_of_characters = len(alphabets) + 1       # Number of unique characters, plus 1 for CTC pseudo-blank
num_of_timestamps = 64   

In [3]:
path_to_model_weight = 'trained_model_13_3.h5' # the best model weight
#path_to_model_weight = 'trained_model_12_3.h5' # previous version


# Define the input layer with a shape of (256, 64, 1) for grayscale images
input_data = Input(shape=(256, 64, 1), name='input')

# Convolutional Layer 1: 32 filters, (3, 3) kernel, 'same' padding, He normal initialization
inner = Conv2D(32, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(input_data)
inner = BatchNormalization()(inner)  # Batch normalization
inner = Activation('relu')(inner)  # ReLU activation
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)  # Max-pooling

# Convolutional Layer 2: 64 filters, (3, 3) kernel, 'same' padding, He normal initialization
inner = Conv2D(64, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)
inner = Dropout(0.3)(inner)

# Convolutional Layer 3: 128 filters, (3, 3) kernel, 'same' padding, He normal initialization
inner = Conv2D(128, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner = Dropout(0.3)(inner)

# Reshape the output for sequence processing
inner = Reshape(target_shape=((64, 1024)), name='reshape')(inner)

# Fully Connected Layer 1: 64 units, ReLU activation, He normal initialization
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)

# Bidirectional LSTM Layers: 256 units, return sequences
inner = Bidirectional(LSTM(256, return_sequences=True), name='lstm1')(inner)
inner = Bidirectional(LSTM(256, return_sequences=True), name='lstm2')(inner)

# Output Layer: Number of characters, He normal initialization
inner = Dense(num_of_characters, kernel_initializer='he_normal', name='dense2')(inner)
y_pred = Activation('softmax', name='softmax')(inner)  # Softmax activation
# Create the model with input and output layers
model = Model(inputs=input_data, outputs=y_pred)

model.load_weights(path_to_model_weight)

# The ctc loss function
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # The 2 is critical here since the first couple outputs of the RNN tend to be garbage
    y_pred = y_pred[:, 2:, :]
    return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)


# Define input placeholders for true labels, input sequence length, and label sequence length
labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

# Calculate CTC loss using the ctc_lambda_func function
ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

# Create the final model that takes input data, true labels, input length, and label length
model_final = Model(inputs=[input_data, labels, input_length, label_length], outputs=ctc_loss)
# Compile the final model with a dummy loss lambda function (loss calculation occurs elsewhere)
# The optimizer used is Adam with a learning rate of 0.0001
model_final.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(learning_rate=0.0001))

In [4]:
# Fuction to preprocess the img
def preprocess(img):
    (h, w) = img.shape                                    # Getting the height & width of the image
    
    final_img = np.ones([64, 256])*255                    # Blank white image
    
    # crop    
    if h > 64:
        img = img[:64, :]                                 # If the h>64 then it is cropped to 64
        
    if w > 256:
        img = img[:, :256]                                # If the w>256 then it is cropped to 256
    
    final_img[:h, :w] = img
    return cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE) # Rotate 90° Clockwise & return

In [5]:
def label_to_num(label):
    return np.array([alphabets.find(ch) for ch in label])


def num_to_label(num):
    return ''.join([alphabets[ch] for ch in num if ch != -1])

blank_label = -1

In [6]:
def resize_with_aspect_ratio(image, target_size=(64, 256)):
    (h, w) = image.shape
    target_h, target_w = target_size
    
    scale = min(target_h / h, target_w / w)  # Calculate scale to fit target size
    new_h = int(h * scale)
    new_w = int(w * scale)
    
    resized_img = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)  # Resize while preserving aspect ratio
    final_img = np.ones((target_h, target_w)) * 255  # Blank white image of target size
    final_img[:new_h, :new_w] = resized_img  # Place resized image in top-left corner
    
    return final_img

In [7]:
def predict(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    image = preprocess(image)
    image = image / 255.0
    
    # Reshape the image to have a batch size of 1
    image = image.reshape(1, 256, 64, 1)
    
    pred = model.predict(image)
    decoded = tf.keras.backend.get_value(
        tf.keras.backend.ctc_decode(pred, input_length=np.ones(pred.shape[0]) * pred.shape[1], greedy=True)[0][0]
    )
    
    return num_to_label(decoded[0])

In [8]:
def sliding_window_prediction(image, window_size=(64, 256), step_size=128):
    (h, w) = image.shape
    
    if h < window_size[0]:  # Pad height if it's smaller than 64
        padding_h = window_size[0] - h
        image = np.pad(image, ((0, padding_h), (0, 0)), 'constant', constant_values=255)
    
    if w < window_size[1]:  # Pad width if it's smaller than 256
        padding_w = window_size[1] - w
        image = np.pad(image, ((0, 0), (0, padding_w)), 'constant', constant_values=255)
    
    results = []
    
    for x in range(0, w - window_size[1] + 1, step_size):  # Slide window horizontally
        window = image[:, x:x + window_size[1]]  # Extract the window
        pred_text = predict(window)  # Predict text in the window
        results.append(pred_text)  
    
    return ''.join(results)  # Combine predictions from all windows

In [9]:
def pad_image_to_fixed_size(image, target_size=(64, 256)):
    (h, w) = image.shape
    target_h, target_w = target_size
    
    if h > target_h:  # Crop height if larger than target
        image = image[:target_h, :]
    
    final_img = np.ones(target_size) * 255  # Blank white image
    final_img[:h, :w] = image  # Place original image in top-left corner
    
    return final_img

In [10]:
def process_large_image(image, part_width=256):
    (h, w) = image.shape
    results = []
    
    for start_x in range(0, w, part_width):  # Divide image into sub-images
        part_img = image[:, start_x:start_x + part_width]  # Extract part of the image
        padded_img = pad_image_to_fixed_size(part_img)  # Pad to fixed size (64, 256)
        pred_text = predict(padded_img)  # Predict text for the sub-image
        results.append(pred_text)  
    
    return ''.join(results)  # Combine predictions from all sub-images

In [11]:
def postprocess(image,text):
    if(text[-4:]== 'EPTY'):
        text = text[:-4]

    text = text.replace(' ','')

    # image = mpimg.imread(image)
    # plt.imshow(image)
    # plt.axis('off')  # Menyembunyikan axis
    # plt.show()

    # print("predicted:",text)
    # print("len:",len(text))
    # print('')

In [12]:
import csv

def read_ground_truth(csv_file):
    ground_truth_dict = {}
    with open(csv_file, mode='r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Lewati header CSV
        for row in csv_reader:
            image_name = row[0]
            ground_truth = row[1].replace(" ", "")  # Menghapus semua spasi
            ground_truth_dict[image_name] = ground_truth
    return ground_truth_dict



In [13]:
def calculate_char_accuracy(recognized_text, ground_truth):
    if(recognized_text[-4:]== 'EPTY'):
        recognized_text = recognized_text[:-4]
    recognized_text = recognized_text.upper()
    ground_truth = ground_truth.upper()

    TP = sum(1 for i in range(min(len(recognized_text), len(ground_truth))) if recognized_text[i] == ground_truth[i])
    FP = sum(1 for i in range(len(recognized_text)) if i >= len(ground_truth) or recognized_text[i] != ground_truth[i])
    FN = sum(1 for i in range(len(ground_truth)) if i >= len(recognized_text) or recognized_text[i] != ground_truth[i])

    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    accuracy = TP / max(len(recognized_text), len(ground_truth)) * 100

    return accuracy, f1, TP, FP, FN

In [14]:
def plot_confusion_matrix(y_true, y_pred, labels):
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    plt.figure(figsize=(10, 7))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix CRNN')
    plt.colorbar()
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    plt.ylabel('True Labels')
    plt.xlabel('Predicted Labels')

    for i in range(len(labels)):
        for j in range(len(labels)):
            plt.text(j, i, cm[i, j], ha='center', va='center', color='red')

    plt.tight_layout()
    plt.show()

In [15]:
def plot_accuracy_graph(accuracies, ground_truth_lengths):
    plt.figure(figsize=(10, 6))
    plt.plot(ground_truth_lengths, accuracies, marker='o', linestyle='-', color='b', label='Akurasi per Gambar')
    plt.xlabel('Panjang String Ground Truth')
    plt.ylabel('Akurasi (%)')
    plt.title('Akurasi CRNN')
    plt.grid(True)
    plt.legend()
    plt.show()

In [16]:
# Fungsi untuk memplot grafik akurasi terhadap nomor iterasi dengan rata-rata
def plot_accuracy_vs_iteration(accuracies):
    # Menghitung rata-rata kumulatif
    avg_accuracies = [sum(accuracies[:i + 1]) / (i + 1) for i in range(len(accuracies))]
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(accuracies) + 1), accuracies, marker='o', linestyle='-', color='g', label='Akurasi per Iterasi')
    plt.plot(range(1, len(avg_accuracies) + 1), avg_accuracies, marker='', linestyle='--', color='b', label='Rata-rata Kumulatif')
    plt.xlabel('Nomor Iterasi')
    plt.ylabel('Akurasi (%)')
    plt.title('Akurasi CRNN 30 Char dengan Rata-rata Kumulatif') # ganti sesuai keterangan yang sesuai 
    plt.grid(True)
    plt.legend()
    plt.show()


In [17]:
def plot_f1_graph(f1_scores):
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(f1_scores) + 1), f1_scores, marker='o', linestyle='-', color='r', label='F1 Score per Gambar')
    plt.xlabel('Nomor Gambar')
    plt.ylabel('F1 Score')
    plt.title('F1 Convidence CRNN')
    plt.grid(True)
    plt.legend()
    plt.show()

In [18]:
ground_truth_file = '../Capital/26-30/GROUND_TRUTH30.csv'  # Ganti dengan path file CSV Anda
ground_truth_dict = read_ground_truth(ground_truth_file)

# Variabel untuk menghitung confusion matrix dan F1 score total
all_true = []
all_pred = []
f1_scores = []

# Variabel untuk menyimpan akurasi per gambar
accuracies = []
ground_truth_lengths = []  # Menyimpan panjang karakter ground truth

In [None]:
#1-4
for i in range(1,11):
    print(f'index {i}')
    print('prepared image to analyse')
    name = f'../Capital/26-30/TEST_{i}.png' # Ganti dengan path gambar yang dinginkan
    image = cv2.imread(name, cv2.IMREAD_GRAYSCALE)  # Read grayscale image
    
    image_name = f'TEST_{i}.png'
    ground_truth = ground_truth_dict.get(image_name, "")
    ground_truth = ground_truth.upper()
     
    
    # Menghitung akurasi karakter, F1 score, dan confusion matrix
    result_text = process_large_image(image).upper().replace(' ', '')
    accuracy, f1, TP, FP, FN = calculate_char_accuracy(result_text, ground_truth)
    f1_scores.append(f1)
    

    all_true.extend(list(ground_truth.upper()))  # Menyimpan ground truth
    if result_text:  # Pastikan recognized_text tidak kosong
        all_pred.extend(list(result_text.upper()))  # Menyimpan hasil pengenalan
    else:
        all_pred.extend([''] * len(ground_truth))  # Menambahkan karakter kosong jika tidak ada teks yang dikenali
    
    # Menyimpan akurasi per gambar dan panjang karakter ground truth
    accuracies.append(accuracy)
    ground_truth_lengths.append(len(ground_truth))  # Menyimpan panjang karakter ground truth

    print(f"Gambar {i} - Akurasi: {accuracy:.2f}% - F1 Score: {f1:.2f} - TP: {TP} - FP: {FP} - FN: {FN}")

if len(all_true) > len(all_pred):
    all_pred.extend([''] * (len(all_true) - len(all_pred)))  # Padding untuk all_pred
elif len(all_true) < len(all_pred):
    all_true.extend([''] * (len(all_pred) - len(all_true)))  # Padding untuk all_true

# Menghitung confusion matrix secara keseluruhan
conf_matrix = confusion_matrix(all_true, all_pred, labels=list(set(all_true + all_pred)))
f1_total = f1_score(all_true, all_pred, average='weighted')

# Menampilkan confusion matrix dan F1 score total
print("\nConfusion Matrix:")
print(conf_matrix)
print(f"Total F1 Score (Weighted): {f1_total:.2f}")

labels = sorted(set(all_pred))

# Menampilkan confusion matrix dalam bentuk plot menggunakan fungsi yang Anda definisikan
plot_confusion_matrix(all_true, all_pred,labels )

# Membuat grafik akurasi berdasarkan panjang karakter ground truth
# plot_accuracy_graph(accuracies, ground_truth_lengths)
plot_accuracy_vs_iteration(accuracies)

plot_f1_graph(f1_scores)



    