# **Instructions**:

### 1 - Open your game that you want to perform detections
### 2 - In the game window, get the name of it's title bar 
### 3 - Update the variable "window_name" with the game title bar name
### 4 - Run all cells to start detecting objects using your trained model

In [1]:
import numpy as np
import win32gui, win32ui, win32con
from PIL import Image
from time import sleep
import cv2 as cv
import os
import random

In [2]:
class WindowCapture:
    w = 0
    h = 0
    hwnd = None

    def __init__(self, window_name):
        self.hwnd = win32gui.FindWindow(None, window_name)
        if not self.hwnd:
            raise Exception('Window not found: {}'.format(window_name))

        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        border_pixels = 8
        titlebar_pixels = 30
        self.w = self.w - (border_pixels * 2)
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

    def get_screenshot(self):
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())

        img = img[...,:3]
        img = np.ascontiguousarray(img) 
            
        return img

    def generate_image_dataset(self):
        if not os.path.exists("images"):
            os.mkdir("images")
        while(True):
            img = self.get_screenshot()
            im = Image.fromarray(img[..., [2, 1, 0]])
            im.save(f"./images/img_{len(os.listdir('images'))}.jpeg")
            sleep(1)
    
    def get_window_size(self):
        return (self.w, self.h)

In [3]:
class ImageProcessor:
    W = 0
    H = 0
    net = None
    ln = None
    classes = {}
    colors = []

    def __init__(self, img_size, cfg_file, weights_file):
        np.random.seed(42)
        self.net = cv.dnn.readNetFromDarknet(cfg_file, weights_file)
        self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
        self.ln = self.net.getLayerNames()
        self.ln = [self.ln[i-1] for i in self.net.getUnconnectedOutLayers()]
        self.W = img_size[0]
        self.H = img_size[1]
        
        with open('yolov4-tiny/obj.names', 'r') as file:
            lines = file.readlines()
        for i, line in enumerate(lines):
            self.classes[i] = line.strip()
        
        # If you plan to utilize more than six classes, please include additional colors in this list.
        self.colors = [
            (0, 0, 255), 
            (0, 255, 0), 
            (255, 0, 0), 
            (255, 255, 0), 
            (255, 0, 255), 
            (0, 255, 255),
            (23, 39, 24),
            (12, 8, 50)
        ]
        

    def proccess_image(self, img):

        blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
        self.net.setInput(blob)
        outputs = self.net.forward(self.ln)
        outputs = np.vstack(outputs)
        
        coordinates = self.get_coordinates(outputs, 0.5)

        self.draw_identified_objects(img, coordinates)

        return coordinates

    def get_coordinates(self, outputs, conf):

        boxes = []
        confidences = []
        classIDs = []

        for output in outputs:
            scores = output[5:]
            
            classID = np.argmax(scores)
            confidence = scores[classID]
            if confidence > conf:
                x, y, w, h = output[:4] * np.array([self.W, self.H, self.W, self.H])
                p0 = int(x - w//2), int(y - h//2)
                boxes.append([*p0, int(w), int(h)])
                confidences.append(float(confidence))
                classIDs.append(classID)

        indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)

        if len(indices) == 0:
            return []

        coordinates = []
        for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            coordinates.append({'x': x, 'y': y, 'w': w, 'h': h, 'class': classIDs[i], 'class_name': self.classes[classIDs[i]]})
        return coordinates

    def draw_identified_objects(self, img, coordinates):
        for coordinate in coordinates:
            x = coordinate['x']
            y = coordinate['y']
            w = coordinate['w']
            h = coordinate['h']
            classID = coordinate['class']
            
            color = self.colors[classID]
            
            cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv.putText(img, self.classes[classID], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv.imshow('window',  img)


In [4]:
# Run this cell to initiate detections using the trained model and record the annotated window to a video file.

window_name = "MAME: 三国战纪 - 风云再起 (版本 101, CN) [kovsh101]"
cfg_file_name = "./yolov4-tiny/yolov4-tiny-custom.cfg"
weights_file_name = "./yolov4-tiny/training/yolov4-tiny-custom_last.weights"

wincap = WindowCapture(window_name)
# get width,height as integers for VideoWriter
width, height = wincap.get_window_size()
improc = ImageProcessor((width, height), cfg_file_name, weights_file_name)

# --- Video recording setup ---
# Output path (will create folder if missing)
output_path = './recordings/detection_output.avi'
import os
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# FourCC codec (mp4v works for .mp4 on many systems). Change to 'XVID' for .avi if preferred.
fourcc = cv.VideoWriter_fourcc(*'XVID')
fps = 20.0
frame_size = (int(width), int(height))
writer = cv.VideoWriter(output_path, fourcc, fps, frame_size)

# Ensure OpenCV window exists and prepare a WindowCapture for the display window.
cv.namedWindow('window', cv.WINDOW_NORMAL)
cv_wincap = None

while(True):
    # capture screenshot (BGR ndarray)
    ss = wincap.get_screenshot()
    
    # press 'q' to quit; release writer and close windows
    if cv.waitKey(1) == ord('q'):
        writer.release()
        cv.destroyAllWindows()
        break

    # process image (this function draws rectangles/text on the same img array)
    coordinates = improc.proccess_image(ss)
    
    # Attempt to capture the actual OpenCV display window and record that.
    # If we can't find or capture the display window, fall back to writing the annotated screenshot.
    frame_written = False
    try:
        if cv_wincap is None:
            try:
                cv_wincap = WindowCapture('window')
            except Exception:
                cv_wincap = None

        if cv_wincap is not None:
            win_frame = cv_wincap.get_screenshot()
            # resize if necessary
            if (win_frame.shape[1], win_frame.shape[0]) != frame_size:
                win_frame = cv.resize(win_frame, frame_size)
            writer.write(win_frame)
            frame_written = True
    except Exception as e:
        print('Warning: could not capture/write OpenCV display window:', e)

    if not frame_written:
        try:
            # fallback to the annotated screenshot (ss)
            if (ss.shape[1], ss.shape[0]) != frame_size:
                frame_to_write = cv.resize(ss, frame_size)
            else:
                frame_to_write = ss
            writer.write(frame_to_write)
        except Exception as e:
            print('Warning: could not write fallback frame to video file:', e)

    for coordinate in coordinates:
        print(coordinate)
    print()
    
    # If you have limited computer resources, consider adding a sleep delay between detections.
    # sleep(0.2)

# ensure writer released if loop exits normally
if writer.isOpened():
    writer.release()
print('Finished. Recorded to', output_path)


  img = np.fromstring(signedIntsArray, dtype='uint8')





























{'x': 306, 'y': 272, 'w': 111, 'h': 170, 'class': np.int64(1), 'class_name': 'enemy'}





{'x': 306, 'y': 272, 'w': 111, 'h': 170, 'class': np.int64(1), 'class_name': 'enemy'}
















{'x': 524, 'y': 257, 'w': 440, 'h': 334, 'class': np.int64(3), 'class_name': 'player'}





{'x': 524, 'y': 257, 'w': 440, 'h': 334, 'class': np.int64(3), 'class_name': 'player'}





















































































































































































































































































































































{'x': 532, 'y': 247, 'w': 433, 'h': 359, 'class': np.int64(3), 'class_name': 'player'}




{'x': 532, 'y': 247, 'w': 433, 'h': 359, 'class': np.int64(3), 'class_name': 'player'}















{'x': 614, 'y': 149, 'w': 101, 'h': 58, 'class': np.int64(5), 'clas