In [1]:
import numpy as np
import torch 
import torchvision 
import torch.nn as nn
from torchvision import datasets
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose
from torch.utils.data import DataLoader
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import Adam
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from tqdm import tqdm 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN , self).__init__()
        ## Conv1D
        self.Conv1 = nn.Conv2d(in_channels=1 , out_channels=16, kernel_size=(3,3), stride=1 , padding=1)
        self.act1 = nn.ReLU()
        ## MaxPooling 1
        self.Max1 = nn.MaxPool2d(2)
        
        ## Convolution 2d
        self.Conv2 = nn.Conv2d(in_channels=16, out_channels=32 , kernel_size=(3,3) , stride=1 , padding=1)
        self.act2 = nn.ReLU()
        ## MaxPooling 2d
        self.max2 = nn.MaxPool2d(2)
       
        self.dropout = nn.Dropout(p=0.5)
        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(1568, 10)
        self.softmax = nn.Softmax(dim=1)
    def forward(self , x):
            x = self.Conv1(x)
            x = self.act1(x)
            x = self.Max1(x)
            
            ## Convolution 2d
            x = self.Conv2(x)
            x = self.act2(x)
            x = self.max2(x)
            x = self.dropout(x)
            x = x.view(x.size(0), -1)
            x = self.fc1(x)
            x = self.softmax(x)
            return  x

In [4]:
model = CNN().to(device)

In [5]:
from torchsummary import summary

In [6]:
summary(model, (1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 16, 28, 28]          160
├─ReLU: 1-2                              [-1, 16, 28, 28]          --
├─MaxPool2d: 1-3                         [-1, 16, 14, 14]          --
├─Conv2d: 1-4                            [-1, 32, 14, 14]          4,640
├─ReLU: 1-5                              [-1, 32, 14, 14]          --
├─MaxPool2d: 1-6                         [-1, 32, 7, 7]            --
├─Dropout: 1-7                           [-1, 32, 7, 7]            --
├─Linear: 1-8                            [-1, 10]                  15,690
├─Softmax: 1-9                           [-1, 10]                  --
Total params: 20,490
Trainable params: 20,490
Non-trainable params: 0
Total mult-adds (M): 1.03
Input size (MB): 0.00
Forward/backward pass size (MB): 0.14
Params size (MB): 0.08
Estimated Total Size (MB): 0.22


Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 16, 28, 28]          160
├─ReLU: 1-2                              [-1, 16, 28, 28]          --
├─MaxPool2d: 1-3                         [-1, 16, 14, 14]          --
├─Conv2d: 1-4                            [-1, 32, 14, 14]          4,640
├─ReLU: 1-5                              [-1, 32, 14, 14]          --
├─MaxPool2d: 1-6                         [-1, 32, 7, 7]            --
├─Dropout: 1-7                           [-1, 32, 7, 7]            --
├─Linear: 1-8                            [-1, 10]                  15,690
├─Softmax: 1-9                           [-1, 10]                  --
Total params: 20,490
Trainable params: 20,490
Non-trainable params: 0
Total mult-adds (M): 1.03
Input size (MB): 0.00
Forward/backward pass size (MB): 0.14
Params size (MB): 0.08
Estimated Total Size (MB): 0.22

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [8]:
model.load_state_dict(torch.load('Fashion_Mnist.pth')) 

<All keys matched successfully>

In [9]:
sample_inputs = torch.rand((1,1, 28, 28),device=device)

In [10]:
model

CNN(
  (Conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (act1): ReLU()
  (Max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (act2): ReLU()
  (max2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=1568, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)

In [11]:
torch.onnx.export(model,
                  sample_inputs,
                  './model_Fashion_Mnist.onnx',
                  export_params=True,
                  opset_version=14
)

verbose: False, log level: Level.ERROR



In [12]:
!trtexec \
  --onnx=model_Fashion_Mnist.onnx \
  --saveEngine=model_Fashion_mnist_trt16.plan \
  --fp16

&&&& RUNNING TensorRT.trtexec [TensorRT v8503] # trtexec --onnx=model_Fashion_Mnist.onnx --saveEngine=model_Fashion_mnist_trt16.plan --fp16
[05/22/2023-17:19:25] [I] === Model Options ===
[05/22/2023-17:19:25] [I] Format: ONNX
[05/22/2023-17:19:25] [I] Model: model_Fashion_Mnist.onnx
[05/22/2023-17:19:25] [I] Output:
[05/22/2023-17:19:25] [I] === Build Options ===
[05/22/2023-17:19:25] [I] Max batch: explicit batch
[05/22/2023-17:19:25] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[05/22/2023-17:19:25] [I] minTiming: 1
[05/22/2023-17:19:25] [I] avgTiming: 8
[05/22/2023-17:19:25] [I] Precision: FP32+FP16
[05/22/2023-17:19:25] [I] LayerPrecisions: 
[05/22/2023-17:19:25] [I] Calibration: 
[05/22/2023-17:19:25] [I] Refit: Disabled
[05/22/2023-17:19:25] [I] Sparsity: Disabled
[05/22/2023-17:19:25] [I] Safe mode: Disabled
[05/22/2023-17:19:25] [I] DirectIO mode: Disabled
[05/22/2023-17:19:25] [I] Restricted mode: Disabled
[05/22/2023-1

In [13]:
!trtexec \
  --onnx=model_Fashion_Mnist.onnx \
  --saveEngine=model_Fashion_mnist_trt32.plan \
  

&&&& RUNNING TensorRT.trtexec [TensorRT v8503] # trtexec --onnx=model_Fashion_Mnist.onnx --saveEngine=model_Fashion_mnist_trt32.plan
[05/22/2023-17:34:07] [I] === Model Options ===
[05/22/2023-17:34:07] [I] Format: ONNX
[05/22/2023-17:34:07] [I] Model: model_Fashion_Mnist.onnx
[05/22/2023-17:34:07] [I] Output:
[05/22/2023-17:34:07] [I] === Build Options ===
[05/22/2023-17:34:07] [I] Max batch: explicit batch
[05/22/2023-17:34:07] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[05/22/2023-17:34:07] [I] minTiming: 1
[05/22/2023-17:34:07] [I] avgTiming: 8
[05/22/2023-17:34:07] [I] Precision: FP32
[05/22/2023-17:34:07] [I] LayerPrecisions: 
[05/22/2023-17:34:07] [I] Calibration: 
[05/22/2023-17:34:07] [I] Refit: Disabled
[05/22/2023-17:34:07] [I] Sparsity: Disabled
[05/22/2023-17:34:07] [I] Safe mode: Disabled
[05/22/2023-17:34:07] [I] DirectIO mode: Disabled
[05/22/2023-17:34:07] [I] Restricted mode: Disabled
[05/22/2023-17:34:07] [I]