# Experiment with recognizing times from MK8DX screenshots

In [14]:
import tempfile
from PIL import Image
from pathlib import Path
import os
import sys
import io
import datetime as dt
import subprocess
from mindee import Client, AsyncPredictResponse, product
from dotenv import load_dotenv, dotenv_values

In [15]:
load_dotenv()

True

In [16]:
# Init a new client
mindee_client = Client(api_key=os.getenv('MINDEE_API_KEY'))

endpoint_kind_1 = mindee_client.create_endpoint(
    account_name="polimath",
    endpoint_name="mk8dx_screen_capture_kind_1",
    version="1"
)
endpoint_kind_2 = mindee_client.create_endpoint(
    account_name="polimath",
    endpoint_name="mk8dx_screen_capture_kind_2",
    version="1"
)
#endpoint_kind_3 = mindee_client.create_endpoint(
#    account_name="polimath",
#    endpoint_name="mk8dx_screen_capture_kind_3",
#    version="1"
#)

## Identify which kind of image we have

We need a way to distinguish among at least three image formats:
1. Blue background, track name at bottom, combo listed with text.
2. Track background, racing alone (no ghost).
3. Track background, racing against ghost.

We also need a way to identify how many laps there are in the race. This can be either three or seven, with three being the most common.

### Determine kind of image

In [9]:
class ImageToOCR():
    def __init__(self, image_file_name):
        self.image_file_name = image_file_name
    
    @property
    def ocr(self) -> str:
        ocr_result = subprocess.run(['ocrs', self.image_file_name], stdout=subprocess.PIPE)
        return ocr_result.stdout.decode('utf-8').strip()
    
    def ocr_box(self, box: list[4]) -> str:
        """
        Given a box defining opposite corners of a box in pixels,
        return the text ocr'ed from that box.

        box = a list of integers: [x1, y1, x2, y2]
        """
        original_image = Image.open(self.image_file_name)
        cropped_image = original_image.crop(box)
        with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temporary_file:
            temporary_file_name = temporary_file.name
        cropped_image.save(temporary_file_name)
        ocr_result = subprocess.run(['ocrs', temporary_file_name], stdout=subprocess.PIPE)
        
        return ocr_result.stdout.decode('utf-8').strip()
    
    def ocr_mindee(self, kind: int) -> dict:
        input_doc = mindee_client.source_from_path(self.image_file_name)
        if kind in [1,2,3]:
            result: AsyncPredictResponse = mindee_client.enqueue_and_parse(
                product.GeneratedV1,
                input_doc,
                endpoint=eval('endpoint_kind_' + str(kind))
            )
            return {k:v.value for k,v in result.document.inference.prediction.fields.items()}
        else:
            return {}
    
    @property
    def kind(self) -> int:
        with Image.open(self.image_file_name) as f:
            image_size = f.size
        
        lower_right_OK = self.ocr_box(box=[1170, 646, 1225, 682])
        lower_right_lap_2_number = self.ocr_box(box=[1020, 456, 1044, 480])
        last_digit_top_lap_2 = self.ocr_box(box=[1162, 220, 1183, 253])

        if image_size != (1280, 720):
            # Wrong size; can't be a screen capture from the Switch
            image_kind = 0
        elif lower_right_OK == 'OK':
            # Looks for 'OK' in the lower left corner
            image_kind = 1
        elif lower_right_lap_2_number == '2':
            # Looks for the second lap in the lower box of times.
            # Assumes that the course has three laps.
            image_kind = 3
        elif len(last_digit_top_lap_2) is not None:
            # If there's no lower box of times, looks for the second lap
            # in the upper box of times.
            image_kind = 2
        else:
            # Can't positively identify as one of the defined kinds.
            image_kind = 0
        return image_kind
    
    def pull_data_from_image(self, kind=None) -> dict:
        """
        Pulls information from image. Returns a dictionary of strings.
        """
        if kind == None:
            return self.pull_data_from_image(kind=self.kind)
        if kind not in [1,2,3]:
            return {}
        else:
            mindee_extract = self.ocr_mindee(kind=kind)
            return mindee_extract

In [7]:
# kind 1
image_file = 'data/test_images/2019102710534900_c.jpg'
print(image_file + ': ' + str(ImageToOCR(image_file).kind))
print(ImageToOCR(image_file).pull_data_from_image(1))

data/test_images/2019102710534900_c.jpg: 1
{'glider': 'Gold Glider', 'lap_1_time': '0:28.703', 'lap_2_time': '0:28.330', 'lap_3_time': '0:27.706', 'overall_time': '1:24.799', 'racer': 'Ander', 'track': 'GCN Yoshi Circuit', 'vehicle': 'Biddybuggy', 'wheels': 'Roller'}


In [10]:
# kind 2
image_file = 'data/test_images/2023062418352400_s.jpg'
print(image_file + ': ' + str(ImageToOCR(image_file).kind))
print(ImageToOCR(image_file).pull_data_from_image(2))

data/test_images/2023062418352400_s.jpg: 2
{'character_name': 'Dry Bones', 'glider': 'Cloud Glider', 'lap_1_time': '0:37.501', 'lap_2_time': '0:33.703', 'lap_3_time': '0:34.005', 'overall_time': '8:45.209', 'vehicle': 'Biddybuggy', 'wheels': 'Azure Roller'}


In [None]:
# kind 3
image_file = 'data/test_images/2023070714422000_s.jpg'
print(image_file + ': ' + str(ImageToOCR(image_file).kind))
print(ImageToOCR(image_file).pull_data_from_image(3))

In [11]:
# kind other
image_file = 'data/test_images/dog.jpg'
print(image_file + ': ' + str(ImageToOCR(image_file).kind))
print(ImageToOCR(image_file).pull_data_from_image(0))

data/test_images/dog.jpg: 0
{}


There are still some problems with accuracy.

In [None]:
# SAVE IN CASE WE NEED THE COORDINATES
if kind == None:
            return self.pull_data_from_image(kind=self.kind)
        if kind not in [1,2,3]:
            return {}
        if kind == 1:
            racer = self.ocr_box(box=[788, 110, 1074, 143])
            overall_time = self.ocr_box(box=[870, 144, 1075, 190])
            lap_times = {
                1: self.ocr_box(box=[866, 210, 1026, 250]),
                2: self.ocr_box(box=[866, 250, 1026, 290]),
                3: self.ocr_box(box=[866, 290, 1026, 330])
            }
            vehicle = self.ocr_box(box=[830, 350, 1100, 390])
            wheels = self.ocr_box(box=[830, 410, 1100, 450])
            glider = self.ocr_box(box=[830, 470, 1100, 510])
            track = self.ocr_box(box=[300, 640, 830, 690])
            output = {
                'racer': racer,
                'overall_time': overall_time,
                'lap_times': lap_times,
                'vehicle': vehicle,
                'wheels': wheels,
                'glider': glider,
                'track': track
            }
        elif kind == 2:
            character_name = self.ocr_box(box=[615, 120, 920, 155])
            overall_time = self.ocr_box(box=[1039, 113, 1210, 152])
            lap_times = {
                1: self.ocr_box(box=[1047, 145, 1200, 210]),
                2: self.ocr_box(box=[1047, 222, 1200, 253]),
                3: self.ocr_box(box=[1047, 264, 1200, 298])
            }
            vehicle = self.ocr_box(box=[671, 171, 973, 202])
            wheels = self.ocr_box(box=[671, 220, 973, 252])
            glider = self.ocr_box(box=[671, 265, 973, 298])
            output = {
                'character_name': character_name,
                'overall_time': overall_time,
                'lap_times': lap_times,
                'vehicle': vehicle,
                'wheels': wheels,
                'glider': glider
            }
        elif kind == 3:
            output = {}