MobileNet is slower than ResNet34, when: cuda + batch_size=1
else: MobileNet is faster than ResNet34

In [21]:
from model.mobilenet import MobileNetV3_Small, MobileNetV3_Large
from model.resnet34 import ResNet
import clip
import torch
import time
device = 'cpu'
batch_size = 1
non_blocking = True
def throughput(images, model):
    model.eval()
    batch_size, token_length = images[0].shape[0:2]
    for i in range(50):
        model(*images)
    torch.cuda.synchronize()
    tic1 = time.time()
    for i in range(100):
        model(*images)
    torch.cuda.synchronize()
    tic2 = time.time()
    print(f"batch_size {batch_size} token_length {token_length} throughput {100 * batch_size / (tic2 - tic1)}")
    MB = 1024.0 * 1024.0
    print('memory:', torch.cuda.max_memory_reserved() / MB)
    return (tic2 - tic1) / (100 * batch_size)

In [14]:
model = MobileNetV3_Small().to(device, non_blocking=non_blocking)
data = torch.rand((batch_size, 3, 224, 224)).to(device, non_blocking=non_blocking)
latency = throughput([data], model)
print(latency)

batch_size 1 token_length 3 throughput 114.94270135232865
memory: 1538.0
0.008699986934661865


In [15]:
model = ResNet(img_channels=3, layers=(3,4,6,3)).to(device, non_blocking=non_blocking)
data = torch.rand((batch_size, 3, 224, 224)).to(device, non_blocking=non_blocking)
latency = throughput([data], model)
print(latency)

batch_size 1 token_length 3 throughput 176.05623988925296
memory: 1544.0
0.0056800031661987304


In [22]:
names = ['ViT-L/14', 'ViT-B/16', '"RN101"']
for name in names:
    model, preprocess = clip.load(name, device=device)
    image = torch.rand((batch_size, 3, 224, 224)).to(device, non_blocking=non_blocking)
    text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)

    model.eval()
    batch_size, token_length = image.shape[0:2]
    for i in range(50):
        image_features = model.encode_image(image)
        # text_features = model.encode_text(text)
    torch.cuda.synchronize()
    tic1 = time.time()
    for i in range(100):
         image_features = model.encode_image(image)
    torch.cuda.synchronize()
    tic2 = time.time()
    print(f"batch_size {batch_size} throughput {100 * batch_size / (tic2 - tic1)}")
    MB = 1024.0 * 1024.0
    print('memory:', torch.cuda.max_memory_reserved() / MB)
    print((tic2 - tic1) / (100 * batch_size))

batch_size 1 throughput 116.00925019589317
memory: 606.0
0.008620002269744874
