# plan (engine) model

In [2]:
import os
import cv2
import numpy as np
from glob import glob
import pandas as pd
from PIL import Image
import time
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error
import math

In [3]:
#load engine file

import tensorrt as trt
import numpy as np
import os
import sys

import pycuda.driver as cuda
import pycuda.autoinit

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

class TrtModel:
    
    def __init__(self,engine_path,max_batch_size=1,dtype=np.float32):
        
        self.engine_path = engine_path
        self.dtype = dtype
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.runtime = trt.Runtime(self.logger)
        self.engine = self.load_engine(self.runtime, self.engine_path)
        self.max_batch_size = max_batch_size
        self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
        self.context = self.engine.create_execution_context()

                
                
    @staticmethod
    def load_engine(trt_runtime, engine_path):
        trt.init_libnvinfer_plugins(None, "")             
        with open(engine_path, 'rb') as f:
            engine_data = f.read()
        engine = trt_runtime.deserialize_cuda_engine(engine_data)
        return engine
    
    def allocate_buffers(self):
        
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        
        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
            host_mem = cuda.pagelocked_empty(size, self.dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            
            bindings.append(int(device_mem))

            if self.engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        
        return inputs, outputs, bindings, stream
       
            
    def __call__(self,x:np.ndarray,batch_size=1):
        
        x = x.astype(self.dtype)
        
        np.copyto(self.inputs[0].host,x.ravel())
        
        for inp in self.inputs:
            cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
        
        self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out.host, out.device, self.stream) 
            
        
        self.stream.synchronize()
        return [out.host.reshape(batch_size,-1) for out in self.outputs]

In [6]:
def get_img_yolo(img_file):
    img_size = 224
    stride = 32

    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    imh, imw = img.shape[:2]
    r = min(img_size / imh, img_size / imw)
    h, w = round(imh * r), round(imw * r)
    # hs, ws = (math.ceil(x / stride) * stride for x in (h, w))
    hs, ws = 224, 224
    top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
    img_pad = np.full((img_size, img_size, 3), 114, dtype=img.dtype)
    img_pad[top:top + h, left:left + w] = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)


    img_norm = img_pad/255

    face = np.empty((1, img_size, img_size, 3))
    face[0,:,:,:] = img_norm
    face = np.transpose(face, (0, 3, 1, 2))

    return face

In [7]:
def prediction_yolo(img_files, model):

    pred_list = []
    gt_list = []

    for img_file in img_files:
        img = get_img_yolo(img_file)
        pred = model(img)
        class_id = np.argmax(pred)

        if class_id == 0:
            pred_gender = "female"
        else:
            pred_gender = "male"

        if int(os.path.basename(img_file).split("A")[0]) <= 7380:
            gt_gender = "female"
        else:
            gt_gender = "male"

        pred_list.append(pred_gender)
        gt_list.append(gt_gender)

    return gt_list, pred_list

In [10]:
def cal_accuracy(gt, pred):
    accuracy = accuracy_score(gt, pred)
    print("accuracy: ", accuracy)

# YOLOv8s

In [9]:
img_files_female = glob("test/female/*")
img_files_male = glob("test/male/*")
len(img_files_female), len(img_files_male)

(287, 287)

In [5]:
# load engine model
batch_size = 1 
trt_engine_path = 'yolov8s_cls_gender.plan'
model = TrtModel(trt_engine_path)
shape = model.engine.get_binding_shape(0)
shape

(1, 3, 224, 224)

In [11]:
# inference
# female
print("female")
gt_female, pred_female = prediction_yolo(img_files_female, model)
cal_accuracy(gt_female, pred_female)
# male
print("male")
gt_male, pred_male = prediction_yolo(img_files_male, model)
cal_accuracy(gt_male, pred_male)
# all
print("all")
gt_female.extend(gt_male)
pred_female.extend(pred_male)
cal_accuracy(gt_female, pred_female)

female
accuracy:  0.9337979094076655
male
accuracy:  0.9024390243902439
all
accuracy:  0.9181184668989547
