In [1]:
from clf_funcs import env_builder

import pandas as pd
from tqdm import trange

import torch

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(f'CUDA enabled: {use_cuda}')

torch.set_grad_enabled(False)

INDEX = 2

CUDA enabled: True


In [2]:
telemetry = {
	'framework': [],
	'model_name': [],
	'rep': [],
	'batch_size': [],
	'elapsed_time': []
}
warmup_steps = 100
repetitions = 50
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

In [3]:
for model_name in ['FullyConnectedNet', 'SimpleConvNet', 'ResNet-50', 'DenseNet-121', 'MobileNet-v2', 'ConvNeXt-Tiny']:
	for batch_size in [1, 16, 32, 64, 96, 128, 192, 256]:
		model, dl, _, _ = env_builder(model_name, 10, batch_size, None)
		model.eval()
		model = model.to(device)
		sample = next(iter(dl))[0].to(device)

		for i in trange(warmup_steps, desc=f"Warmup for {model_name} (batch of {batch_size})"):
			_ = model(sample)

		for i in trange(1, repetitions+1, desc=f"Benchmark for {model_name} (batch of {batch_size})"):
			start.record()
			_ = model(sample)
			end.record()
			torch.cuda.synchronize()

			telemetry['framework'].append("PyTorch")
			telemetry['model_name'].append(model_name)
			telemetry['rep'].append(i)
			telemetry['batch_size'].append(batch_size)
			telemetry['elapsed_time'].append(start.elapsed_time(end))

Warmup for FullyConnectedNet (batch of 1): 100%|██████████| 100/100 [00:06<00:00, 15.02it/s]
Benchmark for FullyConnectedNet (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 4193.21it/s]
Warmup for FullyConnectedNet (batch of 16): 100%|██████████| 100/100 [00:00<00:00, 1677.38it/s]
Benchmark for FullyConnectedNet (batch of 16): 100%|██████████| 50/50 [00:00<00:00, 2092.59it/s]
Warmup for FullyConnectedNet (batch of 32): 100%|██████████| 100/100 [00:00<00:00, 2466.37it/s]
Benchmark for FullyConnectedNet (batch of 32): 100%|██████████| 50/50 [00:00<00:00, 5084.62it/s]
Warmup for FullyConnectedNet (batch of 64): 100%|██████████| 100/100 [00:00<00:00, 8407.61it/s]
Benchmark for FullyConnectedNet (batch of 64): 100%|██████████| 50/50 [00:00<00:00, 3877.80it/s]
Warmup for FullyConnectedNet (batch of 96): 100%|██████████| 100/100 [00:00<00:00, 8138.59it/s]
Benchmark for FullyConnectedNet (batch of 96): 100%|██████████| 50/50 [00:00<00:00, 2161.54it/s]
Warmup for FullyConnectedNet (batch of 

Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 1): 100%|██████████| 100/100 [00:00<00:00, 102.56it/s]
Benchmark for ResNet-50 (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 148.55it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 16): 100%|██████████| 100/100 [00:01<00:00, 94.67it/s]
Benchmark for ResNet-50 (batch of 16): 100%|██████████| 50/50 [00:00<00:00, 87.26it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 32): 100%|██████████| 100/100 [00:01<00:00, 59.61it/s]
Benchmark for ResNet-50 (batch of 32): 100%|██████████| 50/50 [00:00<00:00, 53.49it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 64): 100%|██████████| 100/100 [00:02<00:00, 33.91it/s]
Benchmark for ResNet-50 (batch of 64): 100%|██████████| 50/50 [00:01<00:00, 29.82it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 96): 100%|██████████| 100/100 [00:04<00:00, 23.14it/s]
Benchmark for ResNet-50 (batch of 96): 100%|██████████| 50/50 [00:02<00:00, 20.28it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 128): 100%|██████████| 100/100 [00:05<00:00, 18.32it/s]
Benchmark for ResNet-50 (batch of 128): 100%|██████████| 50/50 [00:03<00:00, 16.06it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 192): 100%|██████████| 100/100 [00:07<00:00, 13.08it/s]
Benchmark for ResNet-50 (batch of 192): 100%|██████████| 50/50 [00:04<00:00, 11.40it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ResNet-50 (batch of 256): 100%|██████████| 100/100 [00:09<00:00, 10.07it/s]
Benchmark for ResNet-50 (batch of 256): 100%|██████████| 50/50 [00:05<00:00,  8.80it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 1): 100%|██████████| 100/100 [00:01<00:00, 62.18it/s]
Benchmark for DenseNet-121 (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 70.12it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 16): 100%|██████████| 100/100 [00:01<00:00, 69.82it/s]
Benchmark for DenseNet-121 (batch of 16): 100%|██████████| 50/50 [00:00<00:00, 71.96it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 32): 100%|██████████| 100/100 [00:01<00:00, 62.58it/s]
Benchmark for DenseNet-121 (batch of 32): 100%|██████████| 50/50 [00:00<00:00, 60.53it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 64): 100%|██████████| 100/100 [00:02<00:00, 36.50it/s]
Benchmark for DenseNet-121 (batch of 64): 100%|██████████| 50/50 [00:01<00:00, 34.91it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 96): 100%|██████████| 100/100 [00:03<00:00, 25.93it/s]
Benchmark for DenseNet-121 (batch of 96): 100%|██████████| 50/50 [00:02<00:00, 24.63it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 128): 100%|██████████| 100/100 [00:05<00:00, 19.90it/s]
Benchmark for DenseNet-121 (batch of 128): 100%|██████████| 50/50 [00:02<00:00, 18.80it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 192): 100%|██████████| 100/100 [00:07<00:00, 13.92it/s]
Benchmark for DenseNet-121 (batch of 192): 100%|██████████| 50/50 [00:03<00:00, 13.09it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for DenseNet-121 (batch of 256): 100%|██████████| 100/100 [00:09<00:00, 10.46it/s]
Benchmark for DenseNet-121 (batch of 256): 100%|██████████| 50/50 [00:05<00:00,  9.88it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 1): 100%|██████████| 100/100 [00:00<00:00, 162.31it/s]
Benchmark for MobileNet-v2 (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 184.55it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 16): 100%|██████████| 100/100 [00:00<00:00, 170.26it/s]
Benchmark for MobileNet-v2 (batch of 16): 100%|██████████| 50/50 [00:00<00:00, 178.73it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 32): 100%|██████████| 100/100 [00:00<00:00, 177.45it/s]
Benchmark for MobileNet-v2 (batch of 32): 100%|██████████| 50/50 [00:00<00:00, 156.67it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 64): 100%|██████████| 100/100 [00:00<00:00, 106.85it/s]
Benchmark for MobileNet-v2 (batch of 64): 100%|██████████| 50/50 [00:00<00:00, 90.38it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 96): 100%|██████████| 100/100 [00:01<00:00, 74.08it/s]
Benchmark for MobileNet-v2 (batch of 96): 100%|██████████| 50/50 [00:00<00:00, 62.23it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 128): 100%|██████████| 100/100 [00:01<00:00, 56.76it/s]
Benchmark for MobileNet-v2 (batch of 128): 100%|██████████| 50/50 [00:01<00:00, 47.52it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 192): 100%|██████████| 100/100 [00:02<00:00, 39.29it/s]
Benchmark for MobileNet-v2 (batch of 192): 100%|██████████| 50/50 [00:01<00:00, 33.03it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for MobileNet-v2 (batch of 256): 100%|██████████| 100/100 [00:03<00:00, 31.12it/s]
Benchmark for MobileNet-v2 (batch of 256): 100%|██████████| 50/50 [00:01<00:00, 26.21it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 1): 100%|██████████| 100/100 [00:00<00:00, 146.46it/s]
Benchmark for ConvNeXt-Tiny (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 176.80it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 16): 100%|██████████| 100/100 [00:00<00:00, 134.84it/s]
Benchmark for ConvNeXt-Tiny (batch of 16): 100%|██████████| 50/50 [00:00<00:00, 113.74it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 32): 100%|██████████| 100/100 [00:01<00:00, 90.16it/s]
Benchmark for ConvNeXt-Tiny (batch of 32): 100%|██████████| 50/50 [00:00<00:00, 76.12it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 64): 100%|██████████| 100/100 [00:01<00:00, 55.81it/s]
Benchmark for ConvNeXt-Tiny (batch of 64): 100%|██████████| 50/50 [00:01<00:00, 47.24it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 96): 100%|██████████| 100/100 [00:02<00:00, 40.36it/s]
Benchmark for ConvNeXt-Tiny (batch of 96): 100%|██████████| 50/50 [00:01<00:00, 33.88it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 128): 100%|██████████| 100/100 [00:03<00:00, 30.56it/s]
Benchmark for ConvNeXt-Tiny (batch of 128): 100%|██████████| 50/50 [00:01<00:00, 25.63it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 192): 100%|██████████| 100/100 [00:04<00:00, 21.39it/s]
Benchmark for ConvNeXt-Tiny (batch of 192): 100%|██████████| 50/50 [00:02<00:00, 17.86it/s]


Files already downloaded and verified
Files already downloaded and verified


Warmup for ConvNeXt-Tiny (batch of 256): 100%|██████████| 100/100 [00:06<00:00, 15.70it/s]
Benchmark for ConvNeXt-Tiny (batch of 256): 100%|██████████| 50/50 [00:03<00:00, 13.16it/s]


In [4]:
pd.DataFrame(telemetry).to_csv(f"../../results_ultimate_0/pytorch-batch-size-comp-{INDEX}.csv", index=False)

In [5]:
results = pd.DataFrame(telemetry).drop(["framework", "rep"], axis=1)
display(results.head())

results = results.groupby(["model_name", "batch_size"])
results.mean().head(15).reset_index()

Unnamed: 0,model_name,batch_size,elapsed_time
0,FullyConnectedNet,1,0.408576
1,FullyConnectedNet,1,0.27136
2,FullyConnectedNet,1,0.251904
3,FullyConnectedNet,1,0.246784
4,FullyConnectedNet,1,0.237568


Unnamed: 0,model_name,batch_size,elapsed_time
0,ConvNeXt-Tiny,1,5.606831
1,ConvNeXt-Tiny,16,7.924182
2,ConvNeXt-Tiny,32,11.814195
3,ConvNeXt-Tiny,64,19.027518
4,ConvNeXt-Tiny,96,26.365583
5,ConvNeXt-Tiny,128,34.863514
6,ConvNeXt-Tiny,192,49.874738
7,ConvNeXt-Tiny,256,67.723489
8,DenseNet-121,1,14.162842
9,DenseNet-121,16,13.801103
