In [None]:
import transformers
print(transformers.__version__)

In [None]:
import csv
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel  # import pretrained model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
from PIL import Image

In [None]:
# Fungsi untuk membaca ground truth dari file CSV
def read_ground_truth(csv_file):
    ground_truth_dict = {}
    with open(csv_file, mode='r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Lewati header CSV
        for row in csv_reader:
            image_name = row[0]
            ground_truth = row[1]
            ground_truth_dict[image_name] = ground_truth
    return ground_truth_dict

In [None]:
# Fungsi untuk memplot grafik akurasi berdasarkan panjang string ground truth
def plot_accuracy_graph(accuracies, ground_truth_lengths):
    plt.figure(figsize=(10, 6))
    plt.plot(ground_truth_lengths, accuracies, marker='o', linestyle='-', color='b', label='Akurasi per Gambar')
    plt.xlabel('Panjang String Ground Truth')
    plt.ylabel('Akurasi (%)')
    plt.title('Akurasi trOCR')
    plt.grid(True)
    plt.legend()
    plt.show()


In [None]:
def plot_accuracy_vs_iteration(accuracies):
    # Menghitung rata-rata kumulatif
    avg_accuracies = [sum(accuracies[:i + 1]) / (i + 1) for i in range(len(accuracies))]
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(accuracies) + 1), accuracies, marker='o', linestyle='-', color='g', label='Akurasi per Iterasi')
    plt.plot(range(1, len(avg_accuracies) + 1), avg_accuracies, marker='', linestyle='--', color='b', label='Rata-rata Kumulatif')
    plt.xlabel('Nomor Iterasi')
    plt.ylabel('Akurasi (%)')
    plt.title('Akurasi trOCR 30 Char dengan Rata-rata Kumulatif') # ganti dengan keterangan yang sesuai
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
def plot_char_confusion_matrix(y_true_chars, y_pred_chars, labels):
    cm = confusion_matrix(y_true_chars, y_pred_chars, labels=labels)
    plt.figure(figsize=(10, 7))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix trOCR')
    plt.colorbar()
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    plt.ylabel('True Labels')
    plt.xlabel('Predicted Labels')

    for i in range(len(labels)):
        for j in range(len(labels)):
            plt.text(j, i, cm[i, j], ha='center', va='center', color='red')

    plt.tight_layout()
    plt.show()

In [None]:

def plot_f1_graph(f1_scores):
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(f1_scores) + 1), f1_scores, marker='o', linestyle='-', color='r', label='F1 Score per Gambar')
    plt.xlabel('Nomor Gambar')
    plt.ylabel('F1 Score')
    plt.title('F1 Convidence trOCR')
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
# Fungsi untuk menghitung akurasi karakter antara teks yang dikenali dan ground truth
def calculate_char_accuracy(recognized_text, ground_truth):
    recognized_text = recognized_text.upper()
    ground_truth = ground_truth.upper()

    TP = sum(1 for i in range(min(len(recognized_text), len(ground_truth))) if recognized_text[i] == ground_truth[i])
    FP = sum(1 for i in range(len(recognized_text)) if i >= len(ground_truth) or recognized_text[i] != ground_truth[i])
    FN = sum(1 for i in range(len(ground_truth)) if i >= len(recognized_text) or recognized_text[i] != ground_truth[i])

    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
    accuracy = TP / max(len(recognized_text), len(ground_truth)) * 100

    return accuracy, f1, TP, FP, FN

In [None]:
# Inisialisasi model TrOCR
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')

# Fungsi untuk mengenali tulisan tangan pada gambar
def recognize_handwriting(image_path):
    image = Image.open(image_path).convert("RGB")
    # Proses gambar untuk model
    pixel_values = processor(images=image, return_tensors="pt").pixel_values

    # Menghasilkan prediksi
    with torch.no_grad():
        generated_ids = model.generate(pixel_values)

    # Mengonversi id kembali menjadi teks
    recognized_text = processor.decode(generated_ids[0], skip_special_tokens=True)
    return recognized_text

In [None]:
# Fungsi utama untuk menguji model dengan data ground truth
def evaluate_model(csv_file, image_folder):
    ground_truth_dict = read_ground_truth(csv_file)
    predicted_texts = []
    ground_truths = []
    accuracies = []
    ground_truth_lengths = []
    y_true_chars = []  # Untuk menyimpan karakter-karakter ground truth
    y_pred_chars = []  # Untuk menyimpan karakter-karakter prediksi
    f1_scores = []

    # Proses setiap gambar TEST_1.jpg sampai TEST_19.jpg dan bandingkan dengan ground truth
    for i in range(1, 11):
        image_name = f"TEST_{i}.png"
        if image_name in ground_truth_dict:
            ground_truth = ground_truth_dict[image_name]
            image_path = f"{image_folder}/{image_name}"
            recognized_text = recognize_handwriting(image_path)

            # Ubah teks yang dikenali dan ground truth ke uppercase untuk perbandingan yang konsisten
            recognized_text = recognized_text.upper()
            ground_truth = ground_truth.upper()

            # Output prediksi untuk gambar tertentu
            print(f"Nama Gambar: {image_name}")
            print(f"Ground Truth: {ground_truth}")
            print(f"Teks yang Dikenali: {recognized_text}")
        
            accuracy, f1, TP, FP, FN = calculate_char_accuracy(recognized_text, ground_truth)
            accuracies.append(accuracy)
            ground_truth_lengths.append(len(ground_truth))

            f1_scores.append(f1)
            
            predicted_texts.append(recognized_text)
            ground_truths.append(ground_truth)

            # Menambahkan karakter-karakter ground truth dan prediksi untuk confusion matrix
            y_true_chars.extend(list(ground_truth))
            y_pred_chars.extend(list(recognized_text))

            # Mengisi dengan padding jika panjangnya berbeda
            max_len = max(len(y_true_chars), len(y_pred_chars))
            while len(y_true_chars) < max_len:
                y_true_chars.append('-')
            while len(y_pred_chars) < max_len:
                y_pred_chars.append('-')

    # Plot grafik akurasi
    # plot_accuracy_graph(accuracies, ground_truth_lengths)

    # Plot confusion matrix berdasarkan kemunculan karakter
    labels = sorted(set(y_true_chars))  # Menentukan label berdasarkan karakter-karakter unik
    plot_char_confusion_matrix(y_true_chars, y_pred_chars, labels)

    plot_f1_graph(f1_scores)
    plot_accuracy_vs_iteration(accuracies)

In [None]:
# Contoh pemanggilan fungsi untuk mengevaluasi model
csv_file = "../Capital/26-30/GROUND_TRUTH30.csv" # Ganti dengan path ke file CSV ground truth
image_folder = "../Capital/26-30/" # Ganti dengan path ke folder gambar yang diinginkan
evaluate_model(csv_file, image_folder)
