In [None]:
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import pytesseract
import re
from matplotlib import pyplot as plt
from itertools import chain
from tqdm.notebook import tqdm

path_screenshots = Path.home() / "Pictures/Screenshots"
member_csv_path = Path("../out/members.csv")
pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
classes = ["Wizard", "Barbarian", "Monk", "Demon Hunter", "Crusader", "Necromancer"]

In [None]:
def parse_base(items):
    out = list()

    items = filter(None, items.split("\n"))

    for item in items:
        out.append(item.strip())

    return out


def process_member_crop(img, y1, y2, x1, x2, config=None):
    crop = img[y1:y2, x1:x2]
    tresh = cv2.threshold(crop, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    return parse_base(pytesseract.image_to_string(tresh, config=config))


def process(path, is_top_3=False):
    img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)

    # Improve tresholding by processing the top 3 separately
    y2 = 1045 if is_top_3 else 1545

    players = process_member_crop(img, 435, y2, 1400, 1950, config="--psm 6")
    scores = process_member_crop(
        img, 435, y2, 1950, 2100, config="--psm 6 -c tessedit_char_whitelist=0123456789"
    )
    ranks = process_member_crop(
        img,
        435,
        y2,
        2360,
        2650,
        config=r'--psm 6 -c tessedit_char_whitelist="LegendGoldSilverBronzeIV "',
    )
    # Clean rank data

    # "t Legend" -> "Legend"
    ranks = [
        x.group() if (x := re.search("([A-Za-z]{2,} *[A-Za-z]+)$", s)) is not None else s
        for s in ranks
    ]

    # "GoldV" -> "Gold V"
    ranks = [
        s[: len(s) - 1] + " V" if re.match("^\w+(?:\wV)$", s) else s for s in ranks
    ]

    # "Bronze Ill" -> "Bronze III"
    ranks = [s[:-3] + s[-3:].replace("l", "I") if len(s) > 4 else s for s in ranks]

    win_rates = process_member_crop(
        img,
        435,
        y2,
        2720,
        2950,
        config="--psm 6 -c tessedit_char_whitelist=0123456789%",
    )

    data = [reversed(x) for x in [players, scores, ranks, win_rates]]
    data = reversed(list(zip(*data)))

    return list(data)


def append_merge(lists):
    # Append lists greedily merging on overlapping elements.
    # Example: [[1,2,3,a],[b,3,4,5]] -> [1,2,3,4,5]
    a = lists[0]
    for b in lists[1:]:
        try:
            for idx_a in range(len(a)):
                for idx_b in range(len(b)):
                    if a[idx_a][0] == b[-idx_b - 1][0]:
                        a = a[: idx_a + 1] + b[-idx_b:]
                        raise StopIteration
        except StopIteration:
            continue

        raise StopIteration

    return a


def run(path):
    screenshots = sorted(path.glob("*.png"))
    screenshot_batches = zip(*[iter(screenshots)] * 26)

    df = pd.DataFrame()

    for player_class, screenshot_batch in zip(classes, screenshot_batches):
        is_top_3 = True
        batch_results = []
        for screenshot in tqdm(screenshot_batch):
            try:
                batch_results.append(process(screenshot, is_top_3))
            except Exception as e:
                print("Error processing file: " + str(screenshot))
                raise e

            is_top_3 = False

        batch_results = batch_results[0] + append_merge(batch_results[1:])

        # Clean rank data
        batch_results = list(map(list, zip(*batch_results)))
        ranks = batch_results[2]

        # Fill errant values if previous and next values match
        ranks = [
            ranks[i + 1] if ranks[i - 1] == ranks[i + 1] else ranks[i]
            for i in range(len(ranks[:-1]))
        ] + [ranks[-1]]

        # "Gold" -> "Gold I"
        ranks = [s + " I" if s in ["Gold", "Silver", "Bronze"] else s for s in ranks]

        batch_results[2] = ranks
        batch_results.insert(1, [player_class] * len(batch_results[0]))
        batch_results = zip(*batch_results)

        batch_df = pd.DataFrame(
            batch_results, columns=["Player", "Class", "Score", "Rank", "Win Rate"]
        )
        batch_df = batch_df.drop_duplicates("Player")

        df = pd.concat([df, batch_df], ignore_index=True)

    return df


df = run(path_screenshots)


In [None]:
df.to_csv("../out/battleground.csv", index=False)