In [1]:
import sys
from pathlib import Path
import time
import math
# sys.path = [sys.path[-1]] + sys.path[:-1] # move conda env to beginning of sys.path

import numpy as np
import torch
import torch.nn.functional as F

import onnx
import onnxruntime as ort
import tensorrt

import wandb
from tqdm.notebook import tqdm

sys.path.append('/home/nikita/e2e-driving')

from dataloading.nvidia import NvidiaValidationDataset
from metrics.metrics import calculate_open_loop_metrics
from _tensorrt.tensorrt_util import TensorrtModel


In [42]:
def evaluate(model, dataloader, engine='ort', channel_order=[0,1,2]):
    all_predictions = []
    inference_times = []
    progress_bar = tqdm(total=len(dataloader), smoothing=0)
    inputs_name = model.get_inputs()[0].name if 'ort' in engine  else None
    batch_size = dataloader.batch_size

    epoch_mae = 0.0
    ask_batch_timestamp = time.time()
    for i, (input, target, _) in enumerate(dataloader):

        if input['image'].shape[0] != batch_size:
            print(f'Dropping batch with {input["image"].shape[0]} samples')
            continue

        # if i < 3400:
        #     all_predictions.extend(np.array([0]).repeat(batch_size))
        #     progress_bar.update(1)
        #     continue

        # inputs = input['image'].cpu().numpy()[:, channel_order, ...]
        inputs = input['image'].cpu().numpy()[:, channel_order, ...]

        # cv2 convert rgb to bgr

        # print(type(inputs), inputs.shape, inputs.dtype, inputs.min(), inputs.max(), inputs.mean())

        trt_inputs = np.ascontiguousarray(inputs)
        ort_inputs = {inputs_name: inputs}

        inference_start = time.perf_counter()
        if 'trt' in engine:
            preds = model.predict(trt_inputs)
        elif 'ort' in engine:
            preds = model.run(None, ort_inputs)[0]
            preds = preds.squeeze().reshape(-1, 1)
        inference_end = time.perf_counter()

        # print('preds:', preds)

        inference_time = inference_end - inference_start
        inference_times.append(inference_time)

        mae = F.l1_loss(torch.as_tensor(preds).reshape(-1,1), target.view(-1, 1))
        mae_degrees = math.degrees(mae.item())
        epoch_mae += mae_degrees

        if 'ort' in engine:
            all_predictions.extend(preds)
        elif 'trt' in engine:
            all_predictions.append(preds)

        progress_bar.update(1)
        progress_description = f'MAE: {(epoch_mae / (i + 1)):.4f}'
        if 'trt' in engine:
            progress_description += f', Preds: {preds:.4f}'
        progress_bar.set_description(progress_description)


    avg_mae = epoch_mae / len(dataloader)
    result = np.array(all_predictions)
    return avg_mae, result


def calculate_metrics(fps, predictions, valid_loader):
    '''For steering angle only.'''

    frames_df = valid_loader.dataset.frames.copy()

    # support dropping smaller batches
    batch_size = valid_loader.batch_size
    n_drop = frames_df.shape[0] % batch_size
    if n_drop > 0:
        frames_df = frames_df[:-n_drop]

    true_steering_angles = frames_df.steering_angle.to_numpy()
    metrics = calculate_open_loop_metrics(predictions, true_steering_angles, fps=fps)

    left_turns = frames_df["turn_signal"] == 0  # TODO: remove magic values
    left_metrics = calculate_open_loop_metrics(predictions[left_turns], true_steering_angles[left_turns], fps=fps)
    metrics["left_mae"] = left_metrics["mae"]

    straight = frames_df["turn_signal"] == 1
    straight_metrics = calculate_open_loop_metrics(predictions[straight], true_steering_angles[straight], fps=fps)
    metrics["straight_mae"] = straight_metrics["mae"]

    right_turns = frames_df["turn_signal"] == 2
    right_metrics = calculate_open_loop_metrics(predictions[right_turns], true_steering_angles[right_turns], fps=fps)
    metrics["right_mae"] = right_metrics["mae"]

    return metrics

def convert_static_to_dynamic_bs(onnx_path_in, onnx_path_out):
    model = onnx.load(onnx_path_in)
    for inputs in model.graph.input:
        dim1 = inputs.type.tensor_type.shape.dim[0]
        dim1.dim_param = 'batch'
    onnx.save(model, onnx_path_out)

def convert_dynamic_to_static_bs(onnx_path_in, onnx_path_out):
    model = onnx.load(onnx_path_in)
    for inputs in model.graph.input:
        dim1 = inputs.type.tensor_type.shape.dim[0]
        dim1.dim_value = 1
    onnx.save(model, onnx_path_out)

In [44]:
dataset_path = '/data/Bolt/dataset-new-small/summer2021'
output_modality = 'steering_angle'
n_branches = 1
n_waypoints = 1
batch_size = 1
num_workers = 8

validset = NvidiaValidationDataset(Path(dataset_path), output_modality, n_branches, n_waypoints=1)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=batch_size, shuffle=False,
                                          num_workers=num_workers, pin_memory=True,
                                          persistent_workers=True)

/data/Bolt/dataset-new-small/summer2021/2021-05-28-15-19-48_e2e_sulaoja_20_30: lenght=10708, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-06-07-14-20-07_e2e_rec_ss6: lenght=25836, filtered=1
/data/Bolt/dataset-new-small/summer2021/2021-06-07-14-06-31_e2e_rec_ss6: lenght=3003, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-06-07-14-09-18_e2e_rec_ss6: lenght=4551, filtered=1
/data/Bolt/dataset-new-small/summer2021/2021-06-07-14-36-16_e2e_rec_ss6: lenght=25368, filtered=1
/data/Bolt/dataset-new-small/summer2021/2021-09-24-14-03-45_e2e_rec_ss11_backwards: lenght=25172, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-10-26-10-49-06_e2e_rec_ss20_elva: lenght=33045, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-10-26-11-08-59_e2e_rec_ss20_elva_back: lenght=33281, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-10-20-15-11-29_e2e_rec_vastse_ss13_17_back: lenght=26763, filtered=0
/data/Bolt/dataset-new-small/summer2021/2021-10-11-14-50-59_e2e_rec_va

In [40]:
# vanilla_pilotnet_path = '/home/nikita/e2e-driving/_models/vanilla-pilotnet.onnx'
# autumn_v3_path = '/home/nikita/e2e-driving/_models/autumn-v3.onnx'
# autumn_v3_ort_path = '/home/nikita/e2e-driving/_models/dynamic_autumn-v3.onnx'


# model_paths = ['/home/nikita/e2e-driving/_models/dynamic_autumn-v3.onnx']
# model_paths = [models_dir / 'ibc-dfo-3-1024-static.onnx']
# model_paths = [models_dir / '.onnx']
model_paths = ['/home/nikita/e2e-driving/_models/vanilla-pilotnet-static.onnx']
models = [TensorrtModel(model_path) for model_path in model_paths]
model_names = ['vanilla-pilotnet']
model_runtimes = [f'trt{tensorrt.__version__}']

# vanilla_pilotnet = ort.InferenceSession(vanilla_pilotnet_path, providers=['CUDAExecutionProvider'])
# autumn_v3 = ort.InferenceSession(autumn_v3_path, providers=['CUDAExecutionProvider'])

# autumn_v3_trt = TensorrtModel(autumn_v3_path)
# autumn_v3_ort = ort.InferenceSession(autumn_v3_ort_path, providers=['CUDAExecutionProvider'])

# model = TensorrtModel('/home/nikita/e2e-driving/_models/autumn-v3.onnx')


In [21]:
# verify that dynamic model & bs1_model give same results when fed with batch vs single image

dynamic_model = models[0]

inputs, _, __ = iter(valid_loader).next()
print('raw inputs:', inputs['image'].shape)

batched_input = {dynamic_model.get_inputs()[0].name: inputs['image'].cpu().numpy()}
single_input = {dynamic_model.get_inputs()[0].name: inputs['image'].cpu().numpy()[0, None, ...]}

batch_input_np =  batched_input[list(batched_input.keys())[0]]
single_input_np = single_input[list(single_input.keys())[0]]

print('onnx inputs batched:', batch_input_np.shape)
print('onnx inputs single:', single_input_np.shape)

np.testing.assert_equal(batch_input_np[0], single_input_np[0])
print('inputs are equal')

dynamic_batched_outs = dynamic_model.run(None, batched_input)[0][0]
dynamic_single_outs = dynamic_model.run(None, single_input)[0][0]
# bs1_single_outs = bs1_model.run(None, single_input)[0][0]

print('dynamic_batch_outs:', dynamic_batched_outs.shape)
print('dynamic_single_outs:', dynamic_single_outs.shape)
# print('bs1_single_outs:', bs1_single_outs.shape)

np.testing.assert_almost_equal(dynamic_batched_outs, dynamic_single_outs)
# np.testing.assert_almost_equal(bs1_single_outs, dynamic_single_outs)
print('outs are equal')

raw inputs: torch.Size([256, 3, 68, 264])
onnx inputs batched: (256, 3, 68, 264)
onnx inputs single: (1, 3, 68, 264)
inputs are equal
dynamic_batch_outs: (1,)
dynamic_single_outs: (1,)
outs are equal


In [22]:
%%timeit

dynamic_single_outs = dynamic_model.run(None, single_input)[0]

416 µs ± 65.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [45]:
import json

fps = 30
channel_order = [0, 1, 2]
# channel_orders = [[0,1,2], [0,2,1], [1,0,2], [1,2,0], [2,0,1], [2,1,0]]


# for channel_order in channel_orders:
for model, model_name, model_path, inference_eng in zip(models, model_names, model_paths, model_runtimes):
    wandb.init(project="ibc-tuning", name=model_name, config={"model_path": model_path, "channel_order": channel_order}, tags=[inference_eng])

    print(f'Evaluating model {model_name}...')
    mae, preds = evaluate(model, valid_loader, inference_eng, channel_order)
    metrics = calculate_metrics(fps, preds.squeeze(), valid_loader)
    wandb.log(metrics)
    print('experiment metrics:')
    print(json.dumps(str(metrics), indent=2))
    print()
    wandb.finish()

VBox(children=(Label(value='0.054 MB of 0.054 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Evaluating model vanilla-pilotnet...


  0%|          | 0/222380 [00:00<?, ?it/s]

experiment metrics:
"{'mae': 8.460469773816776, 'rmse': 28.71684608129731, 'max': 774.800240904782, 'whiteness': 166.29128, 'expert_whiteness': 25.61257234059799, 'left_mae': 42.27578282599571, 'straight_mae': 5.821816372862548, 'right_mae': 68.15964181056624, '_timestamp': 1657270421, '_runtime': 2103}"



VBox(children=(Label(value='0.054 MB of 0.054 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
expert_whiteness,▁
left_mae,▁
mae,▁
max,▁
right_mae,▁
rmse,▁
straight_mae,▁
whiteness,▁

0,1
expert_whiteness,25.61257
left_mae,42.27578
mae,8.46047
max,774.80024
right_mae,68.15964
rmse,28.71685
straight_mae,5.82182
whiteness,166.29128


In [39]:
inputs = np.random.randn(1, 3, 68, 264).astype(np.float32)
surgery_out_path = '/home/nikita/e2e-driving/_models/autumn-v3-bs1.onnx'

# ORT before surgery
autumn_v3_ort = ort.InferenceSession(autumn_v3_path, providers=['CUDAExecutionProvider'])
outs = autumn_v3_ort.run(None, {'input.1': inputs.repeat(64, axis=0)})[0][0]
print('before surgery:', outs)

# surgery
autumn_v3_onnx = onnx.load(autumn_v3_path)
autumn_v3_onnx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 1
onnx.save_model(autumn_v3_onnx, surgery_out_path)

# ORT after surgery
autumn_v3_b1_ort = ort.InferenceSession(surgery_out_path, providers=['CUDAExecutionProvider'])
outs = autumn_v3_b1_ort.run(None, {'input.1': inputs})[0]
print('after surgery:', outs)


before surgery: [4.1178102]
after surgery: [[4.1178093]]


In [38]:
# convert_static_to_dynamic_bs('/home/nikita/e2e-driving/_models/autumn-v3.onnx', '/home/nikita/e2e-driving/_models/autumn-v3-dynamic.onnx')
models_dir = Path('/home/nikita/ros-e2e-workspace/src/e2e_platform/config/nvidia_e2e_models/')

convert_dynamic_to_static_bs('/home/nikita/e2e-driving/_models/vanilla-pilotnet.onnx', '/home/nikita/e2e-driving/_models/vanilla-pilotnet-static.onnx')