In [8]:
import numpy as np
import torch
import time
from torchvision.models import *
import pandas as pd
import os
from apex import amp

In [9]:
# make results
os.makedirs("results", exist_ok=True)

In [10]:
def computeTime(model, input_size=[1, 3, 224, 224], device='cuda', FP16=False):
    inputs = torch.randn(input_size)
    if device == 'cuda':
        model = model.cuda()
        inputs = inputs.cuda()
    if FP16:
        model = model.half()
        inputs = inputs.half()

    model.eval()

    i = 0
    time_spent = []
    while i < 200:
        start_time = time.time()
        with torch.no_grad():
            _ = model(inputs)

        if device == 'cuda':
            torch.cuda.synchronize() # wait for cuda to finish (cuda is asynchronous!)
        if i != 0:
            time_spent.append(time.time() - start_time)
        i += 1
    print('Avg execution time (ms): {:.3f}'.format(np.mean(time_spent)))
    return np.mean(time_spent)

In [11]:
modellist = ["resnet18", "resnet34", "resnet50", "resnet101", "resnet152", \
 "resnext50_32x4d", "resnext101_32x8d", "mnasnet1_0", "squeezenet1_0", "densenet121", "densenet169", "inception_v3"]

# resnet is enought for now
modellist = ["resnet18", "resnet34", "resnet50"]

In [17]:
# test amp
model_name = "resnet18"
print("model: {}".format(model_name))
mdl = globals()[model_name]
model = mdl().to("cuda")
model = amp.initialize(model, opt_level=opt_level)
print("Looks ok!")

model: resnet18
Looks ok!


In [20]:
for i, model_name in enumerate(modellist):

    runtimes = []
    
    # define model
    print("model: {}".format(model_name))
    mdl = globals()[model_name]
    model = mdl()
    
    # Run FP32
    runtimes.append(computeTime(model, input_size=[1, 3, 256, 256], device="cuda", FP16=False))
    # Run FP16
    runtimes.append(computeTime(model, input_size=[1, 3, 256, 256], device="cuda", FP16=True))
    
    # Amp Initialization
    opt_level = 'O1'  # for only use FP32
    mdl = globals()[model_name]
    model = mdl().to("cuda")
    model = amp.initialize(model, opt_level=opt_level)
    
    # Run FP16
    runtimes.append(computeTime(model, input_size=[1, 3, 256, 256], device="cuda", FP16=False))
    
    if i == 0:
        df = pd.DataFrame({model_name: runtimes},
                         index = ["FP32", "FP16_torch", "FP16_apex"])
    else:
        df[model_name] = runtimes
        

model: resnet18
Avg execution time (ms): 0.010
Avg execution time (ms): 0.009
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Avg execution time (ms): 0.009
model: resnet34
Avg execution time (ms): 0.017
Avg execution time (ms): 0.017
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimi

In [21]:
df.to_csv("results/fp16.csv")
df

Unnamed: 0,resnet18,resnet34,resnet50
FP32,0.009891,0.01694,0.019452
FP16_torch,0.009034,0.017017,0.019448
FP16_apex,0.009057,0.016978,0.020046
