In [1]:
import tome
import torch
import torch.nn as nn
import timm
import os
from torchvision import datasets, transforms
import copy
import dill
import time

In [2]:
test_transforms = transforms.Compose(
    [
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

In [3]:
kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}
test_dir = r"C:\Users\Oleg\Desktop\Caltech256\test"
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transforms)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, **kwargs) 

In [4]:
criterion = nn.CrossEntropyLoss()

def test(model, loader, dataset):
    epoch_val_accuracy = 0
    epoch_val_loss = 0
    model.eval()
    start = time.time()
    with torch.no_grad():
        for data, label in loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().sum()
            epoch_val_accuracy += acc
            epoch_val_loss += val_loss
        end = time.time()
    epoch_val_accuracy /= len(dataset)
    epoch_val_loss /= len(dataset)
    return epoch_val_accuracy, epoch_val_loss, (end - start)/len(test_dataset)

In [5]:
model_paths = {
    'mobilevit_s': r'C:\Users\Oleg\Desktop\CW\models\mobilevits-caltech256-e10-lr001-t69.pt',
    'efficientformer': r'C:\Users\Oleg\Desktop\CW\models\eficcientformer-caltech256-e10-lr0003-t78.pt',
    'deit_tiny': r'C:\Users\Oleg\Desktop\CW\models\deit_tiny_distilled_patch16_224-caltech256-e10-lr0001-t79.pt',
    'vit_small': r'C:\Users\Oleg\Desktop\CW\models\vit_small_patch16_224-caltech256-e10-lr0002-t80.pt',
    'swin_s3_tiny': r'C:\Users\Oleg\Desktop\CW\models\swin_s3_tiny_224-caltech256-e10-lr0002-t81.pt',
    'swin_tiny_window7': r'C:\Users\Oleg\Desktop\CW\models\swin_tiny_patch4_window7_224-caltech256-e10-lr0001-t83.pt'
}

In [6]:
device = 'cuda'

In [7]:
model = torch.load(r'C:\Users\Oleg\Desktop\CW\models\deit_tiny_distilled_patch16_224-caltech256-e10-lr0001-t79.pt').to(device)

In [8]:
test_accuracy, test_loss, inf_time=test(model.to(device), test_loader, test_dataset)
print(f"test_acc: {test_accuracy:.4f} - inference time GPU: {inf_time}\n")

test_acc: 0.7996 - inference time GPU: 0.004864293549742017



In [9]:
runs = 50
batch_size = 256 
input_size = model.default_cfg["input_size"]

In [10]:
tome_model = copy.deepcopy(model)

In [11]:
baseline_throughput = tome.utils.benchmark(
    tome_model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)

Benchmarking: 100%|████████████████████████████████████████████████████████████████████| 50/50 [00:20<00:00,  2.38it/s]


Throughput: 448.67 im/s


In [12]:
tome.patch.timm(tome_model)
tome_model.r = 10
tome_throughput = tome.utils.benchmark(
    tome_model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|████████████████████████████████████████████████████████████████████| 50/50 [00:17<00:00,  2.80it/s]


Throughput: 656.93 im/s
Throughput improvement: 1.46x


In [13]:
test_accuracy, test_loss, inf_time=test(tome_model.to(device), test_loader, test_dataset)
print(f"test_acc: {test_accuracy:.4f} - inference time GPU: {inf_time}\n")

test_acc: 0.7943 - inference time GPU: 0.0032330473162689986



In [14]:
tome_model.r = 16
tome_throughput = tome.utils.benchmark(
    tome_model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|████████████████████████████████████████████████████████████████████| 50/50 [00:13<00:00,  3.77it/s]


Throughput: 886.25 im/s
Throughput improvement: 1.98x


In [15]:
test_accuracy, test_loss, inf_time=test(tome_model.to(device), test_loader, test_dataset)
print(f"test_acc: {test_accuracy:.4f} - inference time GPU: {inf_time}\n")

test_acc: 0.7781 - inference time GPU: 0.003083465886967523



In [16]:
tome_model.r = 20
tome_throughput = tome.utils.benchmark(
    tome_model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|████████████████████████████████████████████████████████████████████| 50/50 [00:11<00:00,  4.53it/s]


Throughput: 1067.36 im/s
Throughput improvement: 2.38x


In [17]:
test_accuracy, test_loss, inf_time=test(tome_model.to(device), test_loader, test_dataset)
print(f"test_acc: {test_accuracy:.4f} - inference time GPU: {inf_time}\n")

test_acc: 0.7213 - inference time GPU: 0.002936875941802044



In [18]:
tome_model.r = 10

In [19]:
# сохранение модели с помощью dill
PATH = "tome_deit_tiny_distilled_patch16_224.pt"
with open(PATH, "wb") as f:
    dill.dump(tome_model, f)