## Convert pytorch to onnx

In [251]:
import torch
from train.models import resnet
import numpy as np
import onnxruntime


In [252]:
model = resnet.ResNet18()
model.load_state_dict(
    torch.load("/Users/lucas.boscatti/Documents/nero/emotion_detection/resnet18_3_classes_300_balanced/ResNet18_epoch300_bs64_lr0.1_momentum0.9_wd0.0001_seed0_smoothTrue_mixupTrue_schedulerreduce_resnet18_3_classes_300_balanced/checkpoints/best_checkpoint.tar", map_location=torch.device("cpu"))["model_state_dict"]
)
model.to(torch.device("cpu"))
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [253]:
# Input to the model
from PIL import Image
import torchvision.transforms as transforms
import cv2
import onnx

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


transform = transforms.Compose(
        [
            transforms.Grayscale(),
            transforms.TenCrop(40),
            transforms.Lambda(
                lambda crops: torch.stack(
                    [transforms.ToTensor()(crop) for crop in crops]
                )
            ),
            transforms.Lambda(
                lambda tensors: torch.stack(
                    [
                        transforms.Normalize(mean=(0,), std=(255,))(t)
                        for t in tensors
                    ]
                )
            ),
        ]
    )

face = np.array(Image.open("./1000_F_390012920_uaK8LRMyy36SkCZn6oFHax1Qv6zZoJFY.jpg"))
resize_frame = cv2.resize(face, (48, 48))
gray_frame = cv2.cvtColor(resize_frame, cv2.COLOR_BGR2GRAY)
inputs = Image.fromarray(gray_frame)
inputs = transform(inputs).unsqueeze(0).to(torch.device("cpu"))

with torch.no_grad():
    bs, ncrops, c, h, w = inputs.shape
    inputs = inputs.view(-1, c, h, w)

    # forward pas

    torch_out = model(inputs)
    outputs = torch_out

    # combine results across the crops
    outputs = outputs.view(bs, ncrops, -1)
    outputs = torch.sum(outputs, dim=1) / ncrops

    _, preds = torch.max(outputs.data, 1)
    preds = preds.cpu().numpy()[0]

    print(preds)

    # Export the model
    torch.onnx.export(model,               # model being run
                    inputs,                         # model input (or a tuple for multiple inputs)
                    "resnet18.onnx",   # where to save the model (can be a file or file-like object)
                    export_params=True,        # store the trained parameter weights inside the model file
                    opset_version=10,          # the ONNX version to export the model to
                    do_constant_folding=True,  # whether to execute constant folding for optimization
                    input_names = ['input'],   # the model's input names
                    output_names = ['output'], # the model's output names
                    dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                    'output' : {0 : 'batch_size'}})

    onnx_model = onnx.load("resnet18.onnx")
    onnx.checker.check_model(onnx_model)

    ort_session = onnxruntime.InferenceSession("resnet18.onnx", providers=["CPUExecutionProvider"])

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(inputs)}
    ort_outs = ort_session.run(None, ort_inputs)

    # compare ONNX Runtime and PyTorch results
    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

    print("Exported model has been tested with ONNXRuntime, and the result looks good!")

1
verbose: False, log level: Level.ERROR

Exported model has been tested with ONNXRuntime, and the result looks good!


In [254]:
import onnx

onnx_model = onnx.load("resnet18.onnx")
onnx.checker.check_model(onnx_model)

In [255]:
import onnxruntime

ort_session = onnxruntime.InferenceSession("resnet18.onnx", providers=["CPUExecutionProvider"])

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(inputs)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Exported model has been tested with ONNXRuntime, and the result looks good!


In [256]:
transform = transforms.Compose(
                [
                    transforms.Grayscale(),
                    transforms.TenCrop(40),
                    transforms.Lambda(
                        lambda crops: torch.stack(
                            [transforms.ToTensor()(crop) for crop in crops]
                        )
                    ),
                    transforms.Lambda(
                        lambda tensors: torch.stack(
                            [
                                transforms.Normalize(mean=(0,), std=(255,))(t)
                                for t in tensors
                            ]
                        )
                    ),
                ]
            )
img = np.array(Image.open("./1000_F_390012920_uaK8LRMyy36SkCZn6oFHax1Qv6zZoJFY.jpg").convert('L'))
resize_frame = cv2.resize(img, (48, 48))
inputs = Image.fromarray(resize_frame)
inputs = transform(inputs).unsqueeze(0).to("cpu")

bs, ncrops, c, h, w = inputs.shape
inputs = inputs.view(-1, c, h, w)

ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(inputs)}
ort_outs = ort_session.run([ort_session.get_outputs()[0].name], ort_inputs)
print(ort_outs)

[array([[-0.8155476 ,  1.3353965 , -0.49885798],
       [-0.7843051 ,  1.3514168 , -0.54651946],
       [-0.8433894 ,  1.2995262 , -0.43066955],
       [-0.8092322 ,  1.2985456 , -0.46532828],
       [-0.84739995,  1.2572088 , -0.38902307],
       [-0.86073065,  1.3213009 , -0.43915457],
       [-0.82604635,  1.2793139 , -0.43202597],
       [-0.8922109 ,  1.2684306 , -0.3555634 ],
       [-0.8562209 ,  1.2941206 , -0.41554368],
       [-0.87740606,  1.2149951 , -0.31604144]], dtype=float32)]


In [257]:
def load_trained_model(model_path: str):
    """
    Loads a pre-trained emotion recognition model from the specified path.

    Args:
        model_path (str): The path to the pre-trained model file.

    Returns:
        Face_Emotion_CNN: The loaded pre-trained model.
    """
    model = resnet.ResNet18()
    model.load_state_dict(
        torch.load("/Users/lucas.boscatti/Documents/nero/emotion_detection/resnet18_3_classes_300_balanced/ResNet18_epoch300_bs64_lr0.1_momentum0.9_wd0.0001_seed0_smoothTrue_mixupTrue_schedulerreduce_resnet18_3_classes_300_balanced/checkpoints/best_checkpoint.tar", map_location=torch.device("cpu"))["model_state_dict"]
    )
    model.to(torch.device("cpu"))
    model.eval()
    return model

def recognize_emotion(face: np.ndarray) -> str:
    transform = transforms.Compose(
        [
            transforms.Grayscale(),
            transforms.TenCrop(40),
            transforms.Lambda(
                lambda crops: torch.stack(
                    [transforms.ToTensor()(crop) for crop in crops]
                )
            ),
            transforms.Lambda(
                lambda tensors: torch.stack(
                    [
                        transforms.Normalize(mean=(0,), std=(255,))(t)
                        for t in tensors
                    ]
                )
            ),
        ]
    )
    resize_frame = cv2.resize(face, (48, 48))
    gray_frame = cv2.cvtColor(resize_frame, cv2.COLOR_BGR2GRAY)
    inputs = Image.fromarray(gray_frame)
    inputs = transform(inputs).unsqueeze(0).to(torch.device("cpu"))

    with torch.no_grad():
        bs, ncrops, c, h, w = inputs.shape
        inputs = inputs.view(-1, c, h, w)

        # forward pas

        model = load_trained_model("a")
        outputs = model(inputs)

        # combine results across the crops
        outputs = outputs.view(bs, ncrops, -1)
        outputs = torch.sum(outputs, dim=1) / ncrops

        _, preds = torch.max(outputs.data, 1)
        preds = preds.cpu().numpy()[0]

        print(preds)


In [258]:
img = np.array(Image.open("./1000_F_390012920_uaK8LRMyy36SkCZn6oFHax1Qv6zZoJFY.jpg"))
recognize_emotion(img)

1


In [259]:
def load_trained_model2(model_path: str):
    """
    Loads a pre-trained emotion recognition model from the specified path.

    Args:
        model_path (str): The path to the pre-trained model file.

    Returns:
        Face_Emotion_CNN: The loaded pre-trained model.
    """
    return onnxruntime.InferenceSession("resnet18.onnx", providers=["CPUExecutionProvider"])

def recognize_emotion2(face: np.ndarray) -> str:
    transform = transforms.Compose(
        [
            transforms.Grayscale(),
            transforms.TenCrop(40),
            transforms.Lambda(
                lambda crops: torch.stack(
                    [transforms.ToTensor()(crop) for crop in crops]
                )
            ),
            transforms.Lambda(
                lambda tensors: torch.stack(
                    [
                        transforms.Normalize(mean=(0,), std=(255,))(t)
                        for t in tensors
                    ]
                )
            ),
        ]
    )
    resize_frame = cv2.resize(face, (48, 48))
    gray_frame = cv2.cvtColor(resize_frame, cv2.COLOR_BGR2GRAY)
    inputs = Image.fromarray(gray_frame)
    inputs = transform(inputs).unsqueeze(0).to(torch.device("cpu"))

    with torch.no_grad():
        bs, ncrops, c, h, w = inputs.shape
        inputs = inputs.view(-1, c, h, w)

        # forward pas

        model = load_trained_model2("a")
        ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(inputs)}
        ort_outs = ort_session.run([ort_session.get_outputs()[0].name], ort_inputs)
        outputs = torch.from_numpy(ort_outs[0])

        # combine results across the crops
        outputs = outputs.view(bs, ncrops, -1)
        outputs = torch.sum(outputs, dim=1) / ncrops

        _, preds = torch.max(outputs.data, 1)
        preds = preds.cpu().numpy()[0]

        print(preds)


In [260]:
img = np.array(Image.open("./1000_F_390012920_uaK8LRMyy36SkCZn6oFHax1Qv6zZoJFY.jpg"))
recognize_emotion2(img)

1
