In [None]:
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import pytesseract
import re
from matplotlib import pyplot as plt
from itertools import chain
from tqdm.notebook import tqdm

path_screenshots = Path.home() / "Pictures/Screenshots"
member_csv_path = Path("../out/members.csv")
pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"

In [None]:
def parse_base(items):
    out = list()

    items = filter(None, items.split("\n"))

    for item in items:
        out.append(item.strip())

    return out


def process_member_crop(img, y1, y2, x1, x2, config=None):
    crop = img[y1:y2, x1:x2]
    tresh = cv2.threshold(crop, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    return parse_base(pytesseract.image_to_string(tresh, config=config))


def process(path, is_top_3=False):
    img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)

    # Improve tresholding by processing the top 3 separately
    y2 = 1045 if is_top_3 else 1545

    players = process_member_crop(img, 435, y2, 1400, 1950, config="--psm 6")
    scores = process_member_crop(
        img, 435, y2, 1950, 2100, config="--psm 6 -c tessedit_char_whitelist=0123456789"
    )
    ranks = process_member_crop(
        img,
        435,
        y2,
        2360,
        2650,
        config=r'--psm 6 -c tessedit_char_whitelist="LegendGoldSilverBronzeIV "',
    )
    # Clean rank data. Ignore the first row since it may be clipped and will be
    # dropped later.
    ranks = [ranks[0]] + [
        re.search("([A-Za-z]{2,} *[A-Za-z]+)$", s).group() for s in ranks[1:]
    ]
    # TODO: more cleaning
    
    win_rates = process_member_crop(
        img,
        435,
        y2,
        2720,
        2950,
        config="--psm 6 -c tessedit_char_whitelist=0123456789%",
    )

    data = [reversed(x) for x in [players, scores, ranks, win_rates]]
    data = reversed(list(zip(*data)))

    return list(data)


def append_merge(lists):
    # Append lists greedily merging on overlapping elements.
    # Example: [[1,2,3,a],[b,3,4,5]] -> [1,2,3,4,5]
    a = lists[0]
    for b in lists[1:]:
        try:
            for idx_a in range(len(a)):
                for idx_b in range(len(b)):
                    if a[idx_a][0] == b[-idx_b - 1][0]:
                        a = a[: idx_a + 1] + b[-idx_b:]
                        raise StopIteration
        except StopIteration:
            continue

        raise StopIteration

    return a


def run(path):
    # df1 = pd.read_csv(str(member_csv_path))

    screenshots = sorted(path.glob("*.png"))
    is_top_3 = True
    results = []
    for screenshot in tqdm(screenshots):
        try:
            results.append(process(screenshot, is_top_3))
        except Exception as e:
            print("Error processing file: " + str(screenshot))
            raise e

        is_top_3 = False

    results = results[0] + append_merge(results[1:])

    df = pd.DataFrame(results, columns=["Player", "Score", "Rank", "Win Rate"])
    df = df.drop_duplicates("Player", ignore_index=True)

    return df


df = run(path_screenshots)


In [None]:
df