In [None]:
import cv2
import numpy as np
import pytesseract
from PIL import Image
import matplotlib.pyplot as plt
import os

In [None]:
class CardRecognitionResult:
    def __init__(self):
        self.is_card = False
        self.name = None
        self.index = None
        self.edition = None
        self.text = None
        self.quote = None
        self.rarity = None
        self.confidence = 0.0

In [None]:
image = Image.open("test_data/franta.png")
image

In [None]:
ocr_data = pytesseract.image_to_data(image, config='--psm 6', lang='eng')

In [None]:
text = pytesseract.image_to_string(image, config='--psm 6', lang='slk')
text

In [None]:
card_bounds = (90, 70, 690, 910)


fig, ax = plt.subplots()
ax.imshow(image)
x1, y1, x2, y2 = card_bounds
rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
plt.show()


card = image.crop((90, 70, 690, 910))
card

In [None]:
image = Image.open("test_data/kata.png")
card = image

In [None]:
def get_card_bounding_box(image_path):

    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

    coords = cv2.findNonZero(thresh)
    x, y, w, h = cv2.boundingRect(coords)

    return x, y, w, h


# image_path = "test_data/franta_foto.jpg"
image_path = "test_data/kata_rotated.png"
bbox = get_card_bounding_box(image_path)
x, y, w, h = bbox
print(f"Bounding box: x={x}, y={y}, w={w}, h={h}")

img = cv2.imread(image_path)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness=20)
card = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).crop((x, y, x+w, y+h))

In [None]:
# height = 800
# width = int(0.7 * height)
# card = card.resize((width, height))

# text = pytesseract.image_to_string(card, config='--psm 6', lang='slk')
# text

In [None]:
class TesseractConfig:
    PSM_SINGLE_LINE = '--psm 7'
    PSM_BLOCK = '--psm 6'
    LANG_SLK = 'slk'
    LANG_ENG = 'eng'


binary_threshold = 200

In [None]:
def get_card_bounding_box(image_path):

    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

    coords = cv2.findNonZero(thresh)
    x, y, w, h = cv2.boundingRect(coords)

    return x, y, w, h


# image_path = "test_data/franta_foto.jpg"
# image_path = "test_data/franta.png"
# image_path = "test_data/jozo_cropped.png"
image_path = "data/1.jpg"
# image_path = "test_data/franta_foto.jpg"
bbox = get_card_bounding_box(image_path)
x, y, w, h = bbox
print(f"Bounding box: x={x}, y={y}, w={w}, h={h}")



In [None]:
data_dir = "data"

for image in os.listdir(data_dir):
    if image.endswith(".jpg"):
        image_path = os.path.join(data_dir, image)
        img = cv2.imread(image_path)
        text = pytesseract.image_to_string(img, config=TesseractConfig.PSM_BLOCK, lang=TesseractConfig.LANG_ENG)
        output_path = image_path.replace(".jpg", ".txt")
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(text)

In [None]:
image_path = "data/2.jpg"
img = cv2.imread(image_path)
# cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness=20)
x,y,w,h = 500, 500, 2200, 3200
card = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).crop((x, y, x+w, y+h))

# rh = reference height
rh = 800
boxes_pct = [
    (50/rh, 0/rh, 480/rh, 100/rh),    # Name
    (0/rh, 730/rh, 205/rh, 795/rh),  # Index
    (390/rh, 740/rh, 540/rh, 795/rh), # Trademark
    (140/rh, 460/rh, 440/rh, 510/rh), # Type
    (50/rh, 520/rh, 540/rh, 720/rh),  # Text + Quote
]

# boxes_pct = [
#     (90/rh, 35/rh, 470/rh, 90/rh),    # Name
#     (45/rh, 745/rh, 205/rh, 795/rh),  # Index
#     (390/rh, 740/rh, 540/rh, 795/rh), # Trademark
#     (140/rh, 460/rh, 440/rh, 510/rh), # Type
#     (50/rh, 520/rh, 540/rh, 720/rh),  # Text + Quote
# ]

img_width, img_height = card.size

boxes = [
    (
        int(x1 * img_height), int(y1 * img_height),
        int(x2 * img_height), int(y2 * img_height)
    )
    for (x1, y1, x2, y2) in boxes_pct
]

fig, ax = plt.subplots()
ax.imshow(card)
for box in boxes:
    x1, y1, x2, y2 = box
    rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
plt.show()

In [None]:
def extract_and_ocr_card_regions(card_img, binary_threshold=200):
    rh = 800
    boxes_pct = [
        (90/rh, 35/rh, 470/rh, 90/rh),    # Name
        (45/rh, 745/rh, 205/rh, 795/rh),  # Index
        (390/rh, 740/rh, 540/rh, 795/rh), # Trademark
        (140/rh, 460/rh, 440/rh, 510/rh), # Type
        (50/rh, 520/rh, 540/rh, 720/rh),  # Text + Quote
    ]
    img_width, img_height = card_img.size
    boxes_scaled = [
        (
            int(x1 * img_height), int(y1 * img_height),
            int(x2 * img_height), int(y2 * img_height)
        )
        for (x1, y1, x2, y2) in boxes_pct
    ]
    configs = [
        TesseractConfig.PSM_SINGLE_LINE,  # Name
        TesseractConfig.PSM_BLOCK,        # Index
        TesseractConfig.PSM_SINGLE_LINE,  # Trademark
        TesseractConfig.PSM_SINGLE_LINE,  # Type
        TesseractConfig.PSM_BLOCK,        # Text + Quote
    ]
    langs = [
        TesseractConfig.LANG_SLK,  # Name
        TesseractConfig.LANG_SLK,  # Index
        TesseractConfig.LANG_ENG,  # Trademark
        TesseractConfig.LANG_SLK,  # Type
        TesseractConfig.LANG_SLK,  # Text + Quote
    ]
    resizes = [
        (450, 55),   # Name
        (160, 50),   # Index
        (150, 55),   # Trademark
        (300, 50),   # Type
        (490, 200),  # Text + Quote
    ]


    results = []
    for i, box in enumerate(boxes_scaled):
        region = card_img.crop(box).resize(resizes[i], Image.LANCZOS)
        # region_binary = region.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
        text = pytesseract.image_to_string(region, config=configs[i], lang=langs[i])
        results.append(text)
    return results

# Example usage:
texts = extract_and_ocr_card_regions(card)
print(f"Name: {texts[0]}")
print(f"Index: {texts[1]}")
print(f"Trademark: {texts[2]}")
print(f"Type: {texts[3]}")
print(f"Text + Quote: {texts[4]}")

In [None]:
# boxes = [
#     (90, 35, 470, 80), # Name
#     (50, 750, 200, 790), # Index
#     (385, 750, 510, 790), # Trademark
#     (140, 470, 410, 510), # Type
#     # (50, 520, 510, 640), # Text
#     # (50, 640, 510, 720), # Quote
#     (50, 520, 510, 720), # Text + Quote

# ]

# fig, ax = plt.subplots()
# ax.imshow(card)
# for box in boxes:
#     x1, y1, x2, y2 = box
#     rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='r', facecolor='none')
#     ax.add_patch(rect)
# plt.show()

In [None]:
card_croped = card.crop(boxes[0])
card_binary = card_croped.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
text = pytesseract.image_to_string(card_croped, config=TesseractConfig.PSM_SINGLE_LINE, lang=TesseractConfig.LANG_SLK)
text

In [None]:
card_croped = card.crop(boxes[1])
card_binary = card_croped.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
text = pytesseract.image_to_string(card_croped, config=TesseractConfig.PSM_BLOCK, lang=TesseractConfig.LANG_SLK)
text

In [None]:
card_croped = card.crop(boxes[2])
card_binary = card_croped.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
text = pytesseract.image_to_string(card_croped, config=TesseractConfig.PSM_SINGLE_LINE, lang=TesseractConfig.LANG_ENG)
text

In [None]:
# card_croped = card.crop(boxes[3])
card_binary = card_croped.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
text = pytesseract.image_to_string(card_binary, config=TesseractConfig.PSM_SINGLE_LINE, lang=TesseractConfig.LANG_SLK)
text

In [None]:
card_croped = card.crop(boxes[4])
card_binary = card_croped.convert('L').point(lambda x: 0 if x < binary_threshold else 255, '1')
text = pytesseract.image_to_string(card_binary, config=TesseractConfig.PSM_BLOCK, lang=TesseractConfig.LANG_SLK)
text

In [None]:
import torch
import clip
from PIL import Image
import os

In [None]:
model, preprocess = clip.load("ViT-B/32", device="cpu")

In [None]:
embeddings = []

img_path = "data"
for img_file in os.listdir(img_path):
    if img_file.endswith(".jpg"):
        img = preprocess(Image.open(os.path.join(img_path, img_file))).unsqueeze(0)
        with torch.no_grad():
            embedding = model.encode_image(img).numpy()
            embeddings.append((img_file, embedding))

In [None]:
correlation_matrix = np.zeros((len(embeddings), len(embeddings)))
for i in range(len(embeddings)):
    for j in range(len(embeddings)):
        emb_i = embeddings[i][1].flatten()
        emb_j = embeddings[j][1].flatten()
        correlation = np.dot(emb_i, emb_j) / (np.linalg.norm(emb_i) * np.linalg.norm(emb_j))
        correlation_matrix[i, j] = correlation


print("Correlation Matrix:")
print(correlation_matrix)
print("Best matches (excluding self-matches):")
for i in range(len(embeddings)):
    best_match = np.argsort(correlation_matrix[i])[-2]
    print(f"{embeddings[i][0]} <-> {embeddings[best_match][0]}: {correlation_matrix[i, best_match]}")

In [None]:
import time

time_start = time.time()
img = preprocess(Image.open("data/embedding/franta_foto.jpg")).unsqueeze(0)
with torch.no_grad():
    embedding1 = model.encode_image(img).numpy()
time_end = time.time()
print(f"Time taken to process franta_foto.jpg: {time_end - time_start} seconds")


In [None]:
img = preprocess(Image.open("data/embedding/franta.png")).unsqueeze(0)
with torch.no_grad():
    embedding2 = model.encode_image(img).numpy()

In [None]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
similarity = cosine_similarity(embedding1.flatten(), embedding2.flatten())
print(f"Similarity: {similarity:.4f}")