In [2]:
import torch
import torchvision
from pytesseract import Output
import pytesseract
import cv2
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# Basic first results

def basic_results_on_file(fin: str, min_conf: float = 0.95):
    image = cv2.imread(fin)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pytesseract.image_to_data(rgb, output_type=Output.DICT)
    
    for i in range(0, len(results["text"])):
        # Get the box
        x = results["left"][i]
        y = results["top"][i]
        w = results["width"][i]
        h = results["height"][i]
        
        # Text and confidence
        text = results["text"][i]
        conf = int(results["conf"][i])
        
        # Draw on the image
        if conf > min_conf:
            text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                1.2, (0, 0, 255), 3)
            
    # Save it to see quality 
    cv2.imwrite("temp.jpg", image)

basic_results_on_file("data/images/league/f_2000.jpg")

In [4]:
class Box:
    def __init__(self, id=1, left=0.0, top=0.0, width=0.0, height=0.0):
        self.id = id
        self.left = left
        self.top = top
        self.width = width
        self.height = height
    
    # Gets the smallest box that contains both these boxes
    @staticmethod
    def union(boxA: "Box", boxB: "Box") -> "Box":
        my_right = boxA.left + boxA.width
        my_bottom = boxA.top + boxA.height
        other_right = boxB.left + boxB.width
        other_bottom = boxB.top + boxB.height

        union_left = min(boxA.left, boxB.left)
        union_top = min(boxA.top, boxB.top)

        return Box(
            left=union_left,
            top=union_top,
            width=max(my_right, other_right) - union_left,
            height=max(my_bottom, other_bottom) - union_top
        )

    # Returns L1 distance + distance between centers
    # Want both because otherwise huge boxes become a problem
    def distance(self, other: "Box") -> float:
        union = Box.union(self, other)
        union.width -= self.width + other.width
        union.height -= self.height + other.height
        union.width = max(0, union.width)
        union.height = max(0, union.height)
        dist_between_centers = abs(self.left + self.width / 2 - other.left - other.width / 2) + abs(self.top + self.height / 2 - other.top - other.height / 2)
        return union.width + union.height + dist_between_centers / 25.0

    # Returns (distance, other_box)
    def closet_to(self, others: list["Box"]) -> tuple[float, "Box"]:
        best = (float("inf"), Box())
        for other in others:
            dist = self.distance(other)
            if dist < best[0]:
                best = (dist, other)
        return best

In [5]:
IMG_SIZE = (1920, 1080)

def results_on_file(fin: str, fout: str, box_merge_dist = 40.0, min_conf: float = 0.95):
    # Apply tesseract
    image = cv2.imread(fin)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pytesseract.image_to_data(rgb, output_type=Output.DICT)

    # Perform box merging
    boxes: list[Box] = []
    for i in range(len(results["block_num"])):
        if results["level"][i] != 3:
            continue
        # Ignore big boxes
        if results["width"][i] > IMG_SIZE[0] / 2 or results["height"][i] > IMG_SIZE[1] / 2:
            continue
        box = Box(
            left=results["left"][i],
            top=results["top"][i],
            width=results["width"][i],
            height=results["height"][i]
        )
        (dist, merge_candidate) = box.closet_to(boxes)
        if dist < box_merge_dist:
            boxes.remove(merge_candidate)
            box = Box.union(merge_candidate, box)
        boxes.append(box)

    # Draw on the image
    for box in boxes:
        cv2.rectangle(image, (box.left, box.top), (box.left + box.width, box.top + box.height), (0, 255, 0), 2)

    cv2.imwrite(fout, image)

results_on_file("data/images/league/f_6000.jpg", "temp.jpg")

In [6]:
# Iterate over all files in data/leage
import os
for fin in os.listdir("data/league"):
    if not fin.endswith(".jpg"):
        continue
    # results_on_file("data/league/" + fin, "explore/league/" + fin)

FileNotFoundError: [Errno 2] No such file or directory: 'data/league'

In [7]:
# Before I go on, let's play around with binarization to see if that improves results significantly

def binarize(img, threshold):
    # TODO: Make threshold adaptive to increase robustness
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    img = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)[1]
    img = 255 - img
    return img

start_img = cv2.imread("data/league/f_560000.jpg")
img = cv2.cvtColor(start_img, cv2.COLOR_BGR2RGB)
result = binarize(img, 150)
cv2.imwrite("temp.jpg", result)



[ WARN:0@9.069] global loadsave.cpp:244 findDecoder imread_('data/league/f_560000.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.7.0) /Users/runner/work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [8]:
IMG_SIZE = (1920, 1080)

def results_on_file_with_binarize(fin: str, fout: str, box_merge_dist = 25.0, min_conf: float = 0.95):
    # Apply tesseract
    image = cv2.imread(fin)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    clean_img = rgb
    clean_img = binarize(rgb, 150)
    results = pytesseract.image_to_data(clean_img, output_type=Output.DICT)

    # Perform box merging
    boxes: list[Box] = []
    for i in range(len(results["block_num"])):
        if results["level"][i] != 3:
            continue
        # Ignore big boxes
        if results["width"][i] > IMG_SIZE[0] / 2 or results["height"][i] > IMG_SIZE[1] / 2:
            continue
        box = Box(
            results["block_num"][i],
            left=results["left"][i],
            top=results["top"][i],
            width=results["width"][i],
            height=results["height"][i]
        )
        (dist, merge_candidate) = box.closet_to(boxes)
        if dist < box_merge_dist:
            boxes.remove(merge_candidate)
            box = Box.union(merge_candidate, box)
        boxes.append(box)

    # Draw on the image
    for box in boxes:
        cv2.rectangle(image, (box.left, box.top), (box.left + box.width, box.top + box.height), (0, 255, 0), 2)

    cv2.imwrite(fout, image)

results_on_file_with_binarize("data/images/league/f_6000.jpg", "temp.jpg")

In [9]:
# Iterate over all files in data/leage
import os
for fin in os.listdir("data/league"):
    if not fin.endswith(".jpg"):
        continue
    # results_on_file_with_binarize("data/league/" + fin, "explore/league/" + fin)

FileNotFoundError: [Errno 2] No such file or directory: 'data/league'

In [11]:
start_img = cv2.imread("data/images/league/f_560000.jpg")
img = cv2.cvtColor(start_img, cv2.COLOR_BGR2RGB)
result = binarize(img, 150)

def get_boxes(img, conf_threshold=80, box_merge_dist=25.0):
    results = pytesseract.image_to_data(img, output_type=Output.DICT)

    # Get block_num of confident texts
    confident_blocks = set()
    for i in range(len(results["block_num"])):
        print(results["conf"][i])
        if results["conf"][i] > conf_threshold:
            confident_blocks.add(results["block_num"][i])

    # Perform box merging
    boxes: list[Box] = []
    for i in range(len(results["block_num"])):
        if results["level"][i] != 3 or results["block_num"][i] not in confident_blocks:
            continue
        # Ignore big boxes
        if results["width"][i] > img.shape[0] / 2 or results["height"][i] > img.shape[1] / 2:
            continue
        box = Box(
            results["block_num"][i],
            left=results["left"][i],
            top=results["top"][i],
            width=results["width"][i],
            height=results["height"][i]
        )
        (dist, merge_candidate) = box.closet_to(boxes)
        if dist < box_merge_dist:
            boxes.remove(merge_candidate)
            box = Box.union(merge_candidate, box)
        boxes.append(box)
    
    return boxes

boxes = get_boxes(result)
for box in boxes:
    cv2.rectangle(result, (box.left, box.top), (box.left + box.width, box.top + box.height), (0, 255, 0), 2)
cv2.imwrite("temp.jpg", result)

-1
-1
-1
-1
95
-1
-1
-1
29
31
0
-1
-1
-1
11
87
-1
68
-1
-1
-1
95
-1
-1
-1
95
95
-1
-1
-1
18
-1
22
64
64
80
20
-1
30
72
87
95
67
-1
34
40
-1
91
-1
10
51
51
93
90
-1
81
85
85
-1
-1
-1
95
-1
-1
-1
57
95
84
70
93
85
-1
9
39
87
46
-1
-1
-1
95
-1
-1
-1
77
25
-1
40
60
-1
-1
-1
59
-1
57
68
-1
36
56
-1
-1
-1
79
-1
-1
-1
95
-1
-1
-1
95
-1
-1
-1
95
-1
-1
-1
83
86
50
50
74
66
-1
-1
-1
89
49
92
68
-1
48
37
-1
34
40
-1
82
78
78
-1
71
-1
-1
-1
95
-1
-1
-1
44
-1
-1
-1
33
89
-1
-1
-1
80
50
69
-1
-1
-1
95


True

In [19]:
f_dataset = datasets.ImageFolder(
    root="data/images",
    transform=transforms.ToTensor()
)

f_loader = DataLoader(f_dataset, batch_size=1, shuffle=False)

In [29]:
def tensor_to_cv2(tens):
    as_numpy = tens.detach().numpy()
    as_numpy = as_numpy.swapaxes(0,1)
    as_numpy = as_numpy.swapaxes(1,2)
    as_numpy = as_numpy * 255
    return as_numpy.astype(np.uint8)

In [31]:
for ix, (images, labels) in enumerate(f_loader):
    fname = f_loader.dataset.samples[ix][0]
    timestamp = fname.split("f_")[-1].split(".")[0]
    color = tensor_to_cv2(images[0])
    bw =  binarize(color, 115)
    boxes = get_boxes(bw)
    # For each box, get the subimage it bounds and write to file
    for box in boxes:
        sub_color = color[box.top:box.top + box.height, box.left:box.left + box.width]
        sub_bw = bw[box.top:box.top + box.height, box.left:box.left + box.width]
        cv2.imwrite(f"explore/boxes/color/{timestamp}_{box.id}.jpg", sub_color)
        cv2.imwrite(f"explore/boxes/bw/{timestamp}_{box.id}.jpg", sub_bw)    

KeyboardInterrupt: 

In [36]:
start_img = cv2.imread("data/images/league/f_560000.jpg")
img = cv2.cvtColor(start_img, cv2.COLOR_BGR2RGB)
result = binarize(img, 115)
# Write to temp.jpg
cv2.imwrite("temp.jpg", result)

True