In [53]:
import os
import csv
import requests
import base64
import re
import Levenshtein

In [54]:
# === PENGATURAN DASAR ===
FOLDER_GAMBAR = "C:\\Users\\User\\assignment\\uas\\UAS_RAFLI\\Optical-Character-Recognition-OCR-with-VLM\\archive\\Indonesian License Plate Recognition Dataset\\images\\test"
LABEL_MANUAL = "label.csv"
HASIL_EKSPOR = "results.csv"
LMSTUDIO_API = "http://localhost:1234/v1/chat/completions"
MODEL_YANG_DIPAKAI = "google/gemma-3-12b"

In [55]:
# === AMBIL DATA LABEL ===
def ambil_ground_truth(file_csv):
    peta_label = {}
    with open(file_csv, newline='') as csvfile:
        pembaca = csv.DictReader(csvfile)
        for baris in pembaca:
            nama_file = baris["image"].strip()
            label = re.sub(r'[^A-Z0-9]', '', baris["ground_truth"].strip().upper())
            peta_label[nama_file] = label
    return peta_label

In [56]:
# === KONVERSI GAMBAR KE BASE64 ===
def gambar_ke_base64(jalur_gambar):
    with open(jalur_gambar, "rb") as file:
        return base64.b64encode(file.read()).decode("utf-8")

In [57]:
# === HITUNG CHARACTER ERROR RATE ===
def hitung_cer(asli, prediksi):
    if not asli:
        return 1.0
    jarak = Levenshtein.distance(asli, prediksi)
    return round(jarak / len(asli), 3)


In [58]:
# === PERMINTAAN KE LM STUDIO UNTUK OCR ===
def kirim_ke_lmstudio(img_b64):
    header = {"Content-Type": "application/json"}
    instruksi = (
        "Kamu adalah model OCR yang dirancang khusus untuk mengenali plat nomor kendaraan Indonesia.\n"
        "Hanya berikan isi plat nomor sebagai satu string tanpa simbol atau penjelasan tambahan.\n"
        "Format bisa berupa kombinasi huruf dan angka, seperti: B1234XYZ, D 123 AB, atau AB1234CD.\n"
        "Balas hanya dengan isi plat nomor, jangan beri komentar atau kalimat tambahan."
    )
    payload = {
        "model": MODEL_YANG_DIPAKAI,
        "messages": [
            {"role": "system", "content": instruksi},
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
                    },
                    {
                        "type": "text",
                        "text": "Tolong berikan hanya isi plat nomor dari gambar ini."
                    }
                ]
            }
        ],
        "temperature": 0,
        "max_tokens": 30
    }
    try:
        resp = requests.post(LMSTUDIO_API, headers=header, json=payload)
        resp.raise_for_status()
        data = resp.json()
        return data['choices'][0]['message']['content'].strip()
    except Exception as err:
        return f"ERROR: {err}"


In [59]:
# === BERSIHKAN OUTPUT PLAT ===
def bersihkan_teks(text):
    teks_bersih = text.upper()
    cocok = re.search(r"[A-Z]{1,2}[0-9]{1,4}[A-Z]{0,3}", teks_bersih)
    return cocok.group(0) if cocok else re.sub(r'[^A-Z0-9]', '', teks_bersih)


In [60]:
# === EKSEKUSI UTAMA ===
def jalankan_ocr():
    data_label = ambil_ground_truth(LABEL_MANUAL)
    jumlah_total = 0
    cer_total = 0.0

    with open(HASIL_EKSPOR, "w", newline="") as file_out:
        tulis = csv.writer(file_out)
        tulis.writerow(["image", "ground_truth", "hasil_prediksi", "CER"])

        for nama_file in sorted(os.listdir(FOLDER_GAMBAR)):
            if nama_file.lower().endswith((".jpg", ".jpeg", ".png")):
                path_gambar = os.path.join(FOLDER_GAMBAR, nama_file)
                gt = data_label.get(nama_file, "")

                if not gt:
                    print(f"⚠️ Label tidak ditemukan untuk {nama_file}")
                    continue

                try:
                    b64 = gambar_ke_base64(path_gambar)
                    hasil_raw = kirim_ke_lmstudio(b64)

                    if hasil_raw.startswith("ERROR"):
                        hasil_akhir = "ERROR"
                        nilai_cer = 1.0
                    else:
                        hasil_akhir = bersihkan_teks(hasil_raw)
                        nilai_cer = hitung_cer(gt, hasil_akhir)

                    cer_total += nilai_cer
                    jumlah_total += 1

                    print(f"🖼️  {nama_file}")
                    print(f"    🟢 Prediksi : {hasil_akhir}")
                    print(f"    🎯 Ground Truth : {gt}")
                    print(f"    📉 CER : {nilai_cer}\n")

                    tulis.writerow([nama_file, gt, hasil_akhir, nilai_cer])

                except Exception as err:
                    print(f"❌ ERROR pada {nama_file} → {err}")
                    tulis.writerow([nama_file, gt, "UNHANDLED ERROR", 1.0])
                    cer_total += 1.0
                    jumlah_total += 1

    rata_rata = round(cer_total / jumlah_total, 3) if jumlah_total else 1.0
    print(f"\n📊 RATA-RATA CER DARI {jumlah_total} GAMBAR: {rata_rata}")

if __name__ == "__main__":
    jalankan_ocr()

🖼️  test001_1.jpg
    🟢 Prediksi : B9140BCD
    🎯 Ground Truth : B9140BCD
    📉 CER : 0.0

🖼️  test001_2.jpg
    🟢 Prediksi : B2407UZO
    🎯 Ground Truth : B2407UZO
    📉 CER : 0.0

🖼️  test001_3.jpg
    🟢 Prediksi : B2842PKM
    🎯 Ground Truth : B2642PKM
    📉 CER : 0.125

🖼️  test002_1.jpg
    🟢 Prediksi : BG1352AE
    🎯 Ground Truth : BG1352AE
    📉 CER : 0.0

🖼️  test003_1.jpg
    🟢 Prediksi : B2634UZF
    🎯 Ground Truth : B2634UZF
    📉 CER : 0.0

🖼️  test003_2.jpg
    🟢 Prediksi : B1995JVK
    🎯 Ground Truth : B1995JVK
    📉 CER : 0.0

🖼️  test004_1.jpg
    🟢 Prediksi : B9062VEH
    🎯 Ground Truth : B9062VEH
    📉 CER : 0.0

🖼️  test005_1.jpg
    🟢 Prediksi : DD2798KM
    🎯 Ground Truth : DD3798KM
    📉 CER : 0.125

🖼️  test006_1.jpg
    🟢 Prediksi : T1329KC
    🎯 Ground Truth : T1329KC
    📉 CER : 0.0

🖼️  test007_1.jpg
    🟢 Prediksi : AD8865EE
    🎯 Ground Truth : AD8865EE
    📉 CER : 0.0

🖼️  test008_1.jpg
    🟢 Prediksi : DK1157AAB
    🎯 Ground Truth : DK1157AAB
    📉 CER : 