In [1]:
import time
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision.models import resnet50, densenet121, mobilenet_v2, convnext_tiny

from clf_funcs import fit, test, get_cifar10_loaders, get_mnist_loaders, FullyConnectedNet, SimpleConvNet

In [2]:
if torch.cuda.is_available():
	device = torch.device("cuda")
	device_cap = torch.cuda.get_device_capability()
	print(f"GPU available with compatibility {device_cap}")
	if device_cap not in ((7, 0), (8, 0), (9, 0)):
		print("GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower than expected.")
else:
	device = torch.device("cpu")
	print("GPU unavailable")

GPU available with compatibility (6, 1)
GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower than expected.


In [3]:
def timed(fn):
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    start.record()
    result = fn()
    end.record()
    torch.cuda.synchronize()
    return result, start.elapsed_time(end) / 1000

In [4]:
cifar10, _ = get_cifar10_loaders(1024)
batch, target = next(iter(cifar10))
batch, target = batch.to(device), target.to(device)
batch.size(), batch.dtype

Files already downloaded and verified
Files already downloaded and verified


(torch.Size([1024, 3, 32, 32]), torch.float32)

In [5]:
convnext = convnext_tiny()
convnext.classifier[2] = nn.Linear(in_features=768, out_features=10, bias=True)
convnext = convnext.to(device)

import torch._dynamo
torch._dynamo.reset()
convnext_opt = torch.compile(convnext, mode="reduce-overhead")

In [6]:
with torch.no_grad():
	print('eager:', timed(lambda: convnext(batch)[1]))
	print('compiled:', timed(lambda: convnext_opt(batch)[1]))

eager: (tensor([-0.0240, -0.8221,  0.4009,  0.1233, -0.4076, -0.3061, -0.5997, -0.2484,
         0.1649, -0.3498], device='cuda:0'), 1.0466334228515626)


BackendCompilerFailed: backend='inductor' raised:
RuntimeError: Found NVIDIA GeForce GTX 1050 which is too old to be supported by the triton GPU compiler, which is used as the backend. Triton only supports devices of CUDA Capability >= 7.0, but your device is of CUDA capability 6.1

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True
