In [2]:
pip install timm

Collecting timm
  Downloading timm-1.0.15-py3-none-any.whl.metadata (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface_hub (from timm)
  Downloading huggingface_hub-0.29.2-py3-none-any.whl.metadata (13 kB)
Collecting safetensors (from timm)
  Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)
Downloading timm-1.0.15-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading huggingface_hub-0.29.2-py3-none-any.whl (468 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.1/468.1 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl (418 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m418.4/418.4 kB[0m [31m10.3 MB/s[0m eta [36m

In [4]:
# This is not orignial work, I use a pretrained model from the hugging face model repository

import timm
import torch
import torchvision.transforms as transforms
from torchvision import datasets

# Load the pre-trained ResNet-18 model
model = timm.create_model("resnet18", pretrained=False, num_classes=10)
model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.load_state_dict(
    torch.hub.load_state_dict_from_url(
        "https://huggingface.co/gpcarl123/resnet18_mnist/resolve/main/resnet18_mnist.pth",
        map_location="cpu",
        file_name="resnet18_mnist.pth",
    )
)


tensor([[  1.0592,   2.6631,   0.4409,  11.4708,   5.9391,   1.6281,  -8.9831,
          15.0629,   7.4719,  10.4136],
        [  4.1827,  -0.4211,  18.2950,   7.5906,  -7.0604,  -0.8214,  -3.4411,
           1.2572,   4.3333,  -3.0270],
        [ -1.6343,  13.6814,   0.6990,   2.2076,   0.8563,  -2.3933,  -2.7086,
           2.5865,   4.4610,   3.5285],
        [ 13.4636,  -2.2299,  -5.2741,   0.8120,  -0.9029,   7.2803,   6.0733,
         -12.0386,   1.2335,   4.4484],
        [ -6.9018,  -4.6472,  -7.9347, -11.1203,  12.1219,   0.7148,   5.7340,
          -4.0119,   2.3946,   4.6144]], grad_fn=<AddmmBackward0>)
tensor([7, 2, 1, 0, 4])


In [12]:
# Define the preprocessing transformation
preprocessor = transforms.Normalize((0.1307,), (0.3081,))
#transform = transforms.Compose([transforms.ToTensor()])
mnist_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,)),
    transforms.ToTensor()
])



# Load the MNIST test dataset
testset = datasets.MNIST(root='./datasets/mnist', train=False, download=True, transform=mnist_transform)
testloader = torch.utils.data.DataLoader(test_set, batch_size=5, shuffle=False, num_workers=2)

In [13]:
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, m

In [15]:
import numpy as np
total_correct = 0
total_images = 0
confusion_matrix = np.zeros([10,10], int)
net = model
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        for i, l in enumerate(labels):
            confusion_matrix[l.item(), predicted[i].item()] += 1 

model_accuracy = total_correct / total_images * 100
print('Model accuracy on {0} test images: {1:.2f}%'.format(total_images, model_accuracy))

Model accuracy on 10000 test images: 99.02%


In [7]:
print(model.conv1)  # Should be Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
