In [None]:
# ML pipeline for captcha processing and prediction with visualization
import cv2
import numpy as np
import pytesseract
import os
import pandas as pd
import matplotlib.pyplot as plt

# Config Tesseract (chỉnh path cho phù hợp)
pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"

# Hàm hiển thị ảnh
def show_step(title, img, cmap="gray"):
    plt.figure(figsize=(6,4))
    if len(img.shape) == 2:  # ảnh grayscale
        plt.imshow(img, cmap=cmap)
    else:  # ảnh màu
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis("off")
    plt.show()

# OCR function theo pipeline bạn có
def process_and_ocr(img_path, visualize=False):
    img = cv2.imread(img_path)
    if img is None:
        return ""

    if visualize: show_step("Original", img, cmap=None)

    # 1) Grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 2) Median Blur
    gray_blur = cv2.medianBlur(gray, 5)

    # 3) Color-based Mask
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lower = np.array([0,0,0])
    upper = np.array([180,255,55])
    mask = cv2.inRange(hsv, lower, upper)
    masked = cv2.bitwise_and(gray_blur, gray_blur, mask=mask)

    # morth
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))
    clean = cv2.morphologyEx(masked, cv2.MORPH_OPEN, kernel)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
    dilated = cv2.dilate(clean, kernel, iterations=1)



    # threshold
    _, binary = cv2.threshold(dilated, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)


    if visualize: 
        show_step("clean", binary)

 
    config = "--psm 8 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
    text = pytesseract.image_to_string(binary, config=config)

    return text


# Folder chứa ảnh
image_dir = "images"
results = []

for file in os.listdir(image_dir):
    if file.lower().endswith((".png", ".jpg", ".jpeg")):
        path = os.path.join(image_dir, file)

        # Gọi hàm với visualize=True để xem ảnh từng bước
        pred = process_and_ocr(path, visualize=True)

        print(f"{file} → {pred}")
        results.append({"filename": file, "prediction": pred})

# Xuất CSV
df = pd.DataFrame(results)
df.to_csv("predictions.csv", index=False, encoding="utf-8")
print("Saved predictions.csv")


In [None]:
import cv2
import numpy as np
import pytesseract
import os
import pandas as pd

def OCR(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return ""

    # 1) Grayscale
    config = "--psm 8 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
    pred = pytesseract.image_to_string(img, config=config)
    return pred
# Config Tesseract (chỉnh path cho phù hợp)
pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
# prediction file before Captcha processing (raw prediction)
image_dir = "images"
results = []

for file in os.listdir(image_dir):
    if file.lower().endswith((".png", ".jpg", ".jpeg")):
        path = os.path.join(image_dir, file)
        pred = OCR(path)
        print(f"{file} → {pred}")
        results.append({"filename": file, "prediction": pred})

df = pd.DataFrame(results)
df.to_csv("raw_predictions.csv", index=False, encoding="utf-8")
print("Saved raw_predictions.csv")

        

captcha_0.png → SSR

captcha_1.png → SREPTU

captcha_10.png → UBS

captcha_100.png → DSUs

captcha_101.png → Nore

captcha_102.png → SeaNrc

captcha_11.png → BRAM

captcha_12.png → BFuItee

captcha_13.png → LawSizer

captcha_14.png → EACErAS

captcha_15.png → AEMSLLE

captcha_16.png → See

captcha_17.png → Ate

captcha_18.png → Se

captcha_19.png → WEQvey

captcha_2.png → MmBEAGA

captcha_20.png → Seco

captcha_21.png → DFA

captcha_22.png → BND

captcha_23.png → Spee

captcha_24.png → Corts

captcha_25.png → SEPaomse

captcha_26.png → roe

captcha_27.png → UiTeaagy

captcha_28.png → Bree

captcha_29.png → Teepedc

captcha_3.png → year

captcha_30.png → oWway7iscy

captcha_31.png → eEeigbs

captcha_32.png → BICC

captcha_33.png → DpESte

captcha_34.png → Sermo

captcha_35.png → Toner

captcha_36.png → eon

captcha_37.png → Bee

captcha_38.png → Reees

captcha_39.png → pepiB

captcha_4.png → soot

captcha_40.png → Pre

captcha_41.png → SRShata

captcha_42.png → ANYSsa

captcha_43.png → 

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score

# Load ground truth
labels = pd.read_csv("labels.csv")   # cột: filename, label
preds = pd.read_csv("predictions.csv")  # cột: filename, prediction
raw_preds = pd.read_csv("raw_predictions.csv")
# join theo filename
df = labels.merge(preds, on="filename")
df2 = labels.merge(raw_preds, on="filename")
# word-level accuracy (so sánh nguyên chuỗi)
word_acc = accuracy_score(df["label"], df["prediction"])
word_acc_raw = accuracy_score(df2["label"], df2["prediction"])
# character-level accuracy
def char_accuracy(gt, pred):
    total_chars = 0
    correct_chars = 0
    for g, p in zip(gt, pred):
        total_chars += len(g)
        correct_chars += sum(gc == pc for gc, pc in zip(g, p))
    return correct_chars / total_chars if total_chars > 0 else 0

#char_acc = char_accuracy(df["label"], df["prediction"])
char_acc_raw = char_accuracy(df2["label"], df2["prediction"])

print(f"Word-level Accuracy: {word_acc:.2%}")
print(f"Character-level Accuracy: {char_acc:.2%}")

print(f"Word-level raw Accuracy: {word_acc_raw:.2%}")
print(f"Character-level raw Accuracy: {char_acc_raw:.2%}")


Word-level raw Accuracy: 0.00%
Character-level raw Accuracy: 6.32%
