# Acceleration Benchmarks

In [24]:
import sys
sys.path.append('../src')
sys.path.append('../../learning/src')

import onnxruntime, torch, logging, joblib, numpy as np
from torchvision import transforms as T
from torch.nn import functional as F 
from utilities import create_environment, time_inference, collect_states
from collections import deque

logger = logging.getLogger('gym-duckietown')
logger.setLevel(logging.WARNING)

env = create_environment()
states = collect_states(env=env, 
                        golden_model_path='../models/duckieTown_PPO_simple.joblib',
                        max_steps=3500)

## Time Pytorch models

In [47]:
class Actor(torch.nn.Module):
    #Implements the inference of only the actor model coming from the acotr critic object

    def __init__(self, 
                actor_critic_model,
                device = 'cpu'):
        #Initilizes actor by copying necesseary layers
        super(Actor, self).__init__()
        self.device = device
        self.conv_core = actor_critic_model.conv_core.to(device)
        self.actor_head = actor_critic_model.actor_head.to(device)
        self.transform = actor_critic_model.transform

    def forward(self, x):
        #Implements forward pass of model 
        x = x.to(device=self.device)
        x = torch.permute(x, (0, 3, 1, 2))   # Place channel axis in correct position
        #x = self.transform(x)               # Apply transform
        x = x / 255
        #x = T.functional.crop(x, top=20, left=0, height=40, width=80)
        x = x[:,:,20:,:]
        #x = x.to(device=self.device)
        visual_repr = self.conv_core(x).squeeze(-1).squeeze(-1)  
        dist = F.log_softmax(self.actor_head(visual_repr), dim=1)
        return dist
    
    def infer_action(self, x):
        # Utilizes torch distributions to return an action
        dist_probs = self.forward(x)
        dist = torch.distributions.Categorical(logits=dist_probs)
        return dist.sample().cpu().numpy()[0]

In [48]:
model = Actor(actor_critic_model=joblib.load('../models/duckieTown_PPO_simple.joblib'), device='cpu')

avg_duration, n_frames = time_inference(states=states, model=model)

Average inference time 1.1214316728790956ms calculated on 3500 frames


In [49]:
model = Actor(actor_critic_model=joblib.load('../models/duckieTown_PPO_simple.joblib'), device='cuda')

avg_duration, n_frames = time_inference(states=states, model=model)

Average inference time 1.5132223806244187ms calculated on 3500 frames


## Time ONNX models

In [50]:
class ONNXActor():
    # Implements actor using ONNX runtime
    
    def __init__(self, onnx_path, providers):
        # Initiliaze model
        self.ort_session = onnxruntime.InferenceSession(onnx_path, providers=providers)
    
    def forward(self, x):
        # Implements forward pass of model
        output = self.ort_session.run(None, {'input' : x.numpy().astype(np.float32)})[0]
        return torch.Tensor(output)
    
    def infer_action(self, x):
        # Utilizes torch distributions to return an action
        dist_probs = self.forward(x)
        dist = torch.distributions.Categorical(logits=dist_probs)
        return dist.sample().numpy()[0]

In [51]:
model = ONNXActor(onnx_path='../models/actor.onnx', providers=['CPUExecutionProvider'])

avg_duration, n_frames = time_inference(states=states, model=model)

Average inference time 0.42981160960126935ms calculated on 3500 frames


In [52]:
model = ONNXActor(onnx_path='../models/actor.onnx', providers=['CUDAExecutionProvider'])

avg_duration, n_frames = time_inference(states=states, model=model)

2022-03-30 00:20:49.511757040 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:535 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Please reference https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements to ensure all dependencies are met.


Average inference time 0.425264579177435ms calculated on 3500 frames
