# Conversion to ONNX

In this notebook, the conversion of the model to ONNX format is presented. The whole process contains the following step:

* Isolating the actor from the actor-critic model since it is the only one responsible for producing actions.
* Save a collection of states for later validation of the ONNX model's outputs.
* Converting the actor to ONNX format.
* Validating the ONNX model using the collected states.

In [1]:
import sys
sys.path.append('../src')
sys.path.append('../../learning/src')

import torch, logging, joblib, onnxruntime, numpy as np
from torchvision import transforms as T
from torch.nn import functional as F 
from gym_duckietown.simulator import Simulator
from big_experiment_utils.wrappers import DiscreteWrapper, DtRewardWrapper
from collections import deque

from utilities import environment_creator

logger = logging.getLogger('gym-duckietown')
logger.setLevel(logging.WARNING)

DEBUG:commons:version: 6.2.4 *
DEBUG:typing:version: 6.2.3
DEBUG:duckietown_world:duckietown-world version 6.2.38 path /home/kanagnostopoulos/anaconda3/envs/rl/lib/python3.8/site-packages
DEBUG:geometry:PyGeometry-z6 version 2.1.4 path /home/kanagnostopoulos/anaconda3/envs/rl/lib/python3.8/site-packages
DEBUG:aido_schemas:aido-protocols version 6.0.59 path /home/kanagnostopoulos/anaconda3/envs/rl/lib/python3.8/site-packages
DEBUG:nodes:version 6.2.13 path /home/kanagnostopoulos/anaconda3/envs/rl/lib/python3.8/site-packages pyparsing 3.0.6
DEBUG:gym-duckietown:gym-duckietown version 6.1.31 path /home/kanagnostopoulos/Desktop/ReinforcementLearning/gym-duckietown/src



{'audio': ('xaudio2', 'directsound', 'openal', 'pulse', 'silent'), 'debug_font': False, 'debug_gl': True, 'debug_gl_trace': False, 'debug_gl_trace_args': False, 'debug_graphics_batch': False, 'debug_lib': False, 'debug_media': False, 'debug_texture': False, 'debug_trace': False, 'debug_trace_args': False, 'debug_trace_depth': 1, 'debug_trace_flush': True, 'debug_win32': False, 'debug_x11': False, 'graphics_vbo': True, 'shadow_window': True, 'vsync': None, 'xsync': True, 'xlib_fullscreen_override_redirect': False, 'darwin_cocoa': True, 'search_local_libs': True, 'headless': False, 'headless_device': 0}


## Isolation of the Actor

In [4]:
env = environment_creator()

model = joblib.load('../models/duckieTown_PPO_simple.joblib')

class Actor(torch.nn.Module):
    #Implements the inference of only the actor model coming from the acotr critic object

    def __init__(self, 
                actor_critic_model,
                device = 'cpu'):
        #Initilizes actor by copying necesseary layers
        super(Actor, self).__init__()
        self.device = device
        self.conv_core = actor_critic_model.conv_core.to(device)
        self.actor_head = actor_critic_model.actor_head.to(device)
        self.transform = actor_critic_model.transform

    def forward(self, x):
        #Implements forward pass of model 
        x = torch.permute(x, (0, 3, 1, 2))   # Place channel axis in correct position
        x = self.transform(x)               # Apply transform
        #x = x / 255
        x = T.functional.crop(x, top=20, left=0, height=40, width=80)
        #x = x[:,:,20:,:]
        x = x.to(device=self.device)
        visual_repr = self.conv_core(x).squeeze(-1).squeeze(-1)  
        dist = F.log_softmax(self.actor_head(visual_repr), dim=1).to('cpu')
        return dist
    
    def infer_action(self, x):
        # Utilizes torch distributions to return an action
        dist_probs = self.forward(x)
        dist = torch.distributions.Categorical(logits=dist_probs)
        return dist.sample().cpu().numpy()[0]
    
model = Actor(model)

INFO:duckietown_world: data: /home/kanagnostopoulos/anaconda3/envs/rl/lib/python3.8/site-packages/duckietown_world/data


## Collect environment states

In [5]:
saved_states = []

frame = env.reset()
stacked_frames = deque([torch.zeros(size=frame.shape).unsqueeze(0)]*5,
                        maxlen=5)
env.render()

for _ in range(0,500):
    
    frame = torch.FloatTensor(frame).unsqueeze(0)
    stacked_frames.append(frame)
    state = torch.cat(tuple(stacked_frames), dim=-1)
    
    saved_states.append(state)
    action = model.infer_action(state)
    next_frame, reward, done, _ = env.step(action)
    env.render()
    frame = next_frame
    if done:
        break

env.close()

## Convert Pytorch model to ONNX format

In [38]:
dummy_input = saved_states[10] # Randomly selected input state

torch.onnx.export(model,                              # model being run
                  dummy_input,     # model input (or a tuple for multiple inputs)
                  "../models/actor.onnx",    # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=11,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

For easier use, an ONNXActor class is implemented that implements the same methods as the Actor class with the only difference being the fact the _forward()_ is based on _onnxruntime.InferenceSession()_.

In [39]:
class ONNXActor():
    # Implements actor using ONNX runtime
    
    def __init__(self, onnx_path, providers):
        # Initiliaze model
        self.ort_session = onnxruntime.InferenceSession(onnx_path, providers=providers)
    
    def forward(self, x):
        # Implements forward pass of model
        output = self.ort_session.run(None, {'input' : x.numpy().astype(np.float32)})[0]
        return torch.Tensor(output)
    
    def infer_action(self, x):
        # Utilizes torch distributions to return an action
        dist_probs = self.forward(x)
        dist = torch.distributions.Categorical(logits=dist_probs)
        return dist.sample().numpy()[0]


model_onnx = ONNXActor(onnx_path='../models/actor.onnx', providers=['CPUExecutionProvider'])

## Validating correctness of ONNX model

Using the collected environment states, we validate that the initial Pytorch model produces the same probabilities as the ONNX based model with an accuracy up to 6th decimal digit.

In [40]:
batch_input  = torch.cat(saved_states)

golden_probs = model(batch_input).detach().numpy()
onnx_probs = model_onnx.forward(batch_input).numpy()

try:
    np.testing.assert_array_almost_equal(golden_probs, onnx_probs, decimal=6)
    print('Test passed')
except:
    print('Test failed')

Test passed
