In [5]:
import time
import torch
from torchvision.models import mobilenet_v3_small
from torch import nn
import gc
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [23]:
def calculate_time(model,ip,batch,cuda):
    if(cuda):
        model=model.to('cuda')
        ip=ip.to('cuda')
        print(f'Using cuda\n')

    start_time = 0
    end_time = 0

    # Avoiding init time
    '''
    we run some dummy examples through the network to do a ‘GPU warm-up.’ 
    This will automatically initialize the GPU and prevent it from going into power-saving mode 
    when we measure time. (https://deci.ai/blog/measure-inference-time-deep-neural-networks/)
    '''
    
    with torch.inference_mode():
        _ = model(ip) # GPU WARMUP
    
    with torch.inference_mode():
        start_time = time.perf_counter()
        _ = model(ip*5)                       # Multiplied by 5 to make sure old results aren't cached and just returned
        torch.cuda.synchronize()
        end_time = time.perf_counter()
    
    duration = end_time - start_time
    
    # Print the duration in seconds
    print(f"Duration: {duration} seconds\nFPS: {batch/duration:.0f}")

    del ip
    del model
    del _
    gc.collect()
    if(cuda):
        torch.cuda.empty_cache()

In [3]:
def build_generator():
    
    class ResidualBlock(nn.Module):
        def __init__(self, in_channels, out_channels, expansion=6, stride=1, alpha=1.0):
            super(ResidualBlock, self).__init__()
            self.expansion = expansion
            self.stride = stride
            self.in_channels = in_channels
            self.out_channels = int(out_channels * alpha)
            self.pointwise_conv_filters = self._make_divisible(self.out_channels, 8)
            self.conv1 = nn.Conv2d(in_channels, in_channels * expansion, kernel_size=1, stride=1, padding=0, bias=True)
            self.bn1 = nn.BatchNorm2d(in_channels * expansion)
            self.conv2 = nn.Conv2d(in_channels * expansion, in_channels * expansion, kernel_size=3, stride=stride, padding=1, groups=in_channels * expansion, bias=True)
            self.bn2 = nn.BatchNorm2d(in_channels * expansion)
            self.conv3 = nn.Conv2d(in_channels * expansion, self.pointwise_conv_filters, kernel_size=1, stride=1, padding=0, bias=True)
            self.bn3 = nn.BatchNorm2d(self.pointwise_conv_filters)
            self.relu = nn.ReLU(inplace=True)
            self.skip_add = (stride == 1 and in_channels == self.pointwise_conv_filters)

        def forward(self, x):
            identity = x

            out = self.conv1(x)
            out = self.bn1(out)
            out = self.relu(out)

            out = self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)

            out = self.conv3(out)
            out = self.bn3(out)

            if self.skip_add:
                out = out + identity

            return out

        @staticmethod
        def _make_divisible(v, divisor, min_value=None):
            if min_value is None:
                min_value = divisor
            new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
            if new_v < 0.9 * v:
                new_v += divisor
            return new_v

    class Generator(nn.Module):
        def __init__(self, in_channels, num_residual_blocks, gf):
            super(Generator, self).__init__()
            self.num_residual_blocks = num_residual_blocks
            self.gf = gf

            self.conv1 = nn.Conv2d(in_channels, gf, kernel_size=3, stride=1, padding=1)
            self.bn1 = nn.BatchNorm2d(gf)
            self.prelu1 = nn.PReLU()

            self.residual_blocks = self.make_layer(ResidualBlock, gf, num_residual_blocks)

            self.conv2 = nn.Conv2d(gf, gf, kernel_size=3, stride=1, padding=1)
            self.bn2 = nn.BatchNorm2d(gf)

            self.upsample1 = nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
                nn.Conv2d(gf, gf, kernel_size=3, stride=1, padding=1),
                nn.PReLU()
            )

            self.upsample2 = nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
                nn.Conv2d(gf, gf, kernel_size=3, stride=1, padding=1),
                nn.PReLU()
            )

            self.conv3 = nn.Conv2d(gf, 3, kernel_size=3, stride=1, padding=1)
            self.tanh = nn.Tanh()

        def make_layer(self, block, out_channels, blocks):
            layers = []
            for _ in range(blocks):
                layers.append(block(out_channels, out_channels))
            return nn.Sequential(*layers)

        def forward(self, x):
            out1 = self.prelu1(self.bn1(self.conv1(x)))
            out = self.residual_blocks(out1)
            out = self.bn2(self.conv2(out))
            out = out + out1
            out = self.upsample1(out)
            out = self.upsample2(out)
            out = self.tanh(self.conv3(out))
            return out

    return Generator(3, 6, 32)



In [6]:
model_d=mobilenet_v3_small()
model_d.classifier[3]=nn.Linear(in_features=1024,out_features=2,bias=True)

model_g=build_generator().to(device)
model_g.load_state_dict(torch.load('./generator_weight.pt'))


<All keys matched successfully>

#### Detection - method 1 (direct)

In [8]:
model_d.load_state_dict(torch.load('method1(0.668).pt'))
calculate_time(model_d,torch.rand((1000,3,224,224)),1000,True)

Using cuda

Duration: 0.44500826299918117 seconds
FPS: 2247


#### Detection - method 2 (edge detection)

In [7]:
model_d.load_state_dict(torch.load('method2(0.960).pt'))
calculate_time(model_d,torch.rand((1000,3,224,224)),1000,True)

Using cuda

Duration: 0.44565098600287456 seconds
FPS: 2244


#### Generator

In [24]:
calculate_time(model_g,torch.rand((5,3,443,180)),5,True)

Using cuda

Duration: 0.002014044002862647 seconds
FPS: 2483
