In [1]:
import numpy as np
from torchvision.models import resnet18, ResNet18_Weights
import torch 
import os
from PIL import Image
from torchvision import datasets

In [2]:
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [3]:
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [36]:
def classify_image(image_path):
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = model(image)
        print(outputs.shape)
        #print(outputs)
        _, predicted = torch.max(outputs, 1)
    return predicted.item(), _, outputs

In [5]:
import ast

def get_labels():
    with open('imagenet1000_clsidx_to_labels.txt', 'r') as file:
        data = file.read()

    labels_dict = ast.literal_eval(data)
    return labels_dict
labels = get_labels()

In [7]:
IMG_PATH = 'doc.png'
image = Image.open(IMG_PATH)
np.array(image).shape

(270, 320, 3)

In [37]:
if os.path.exists(IMG_PATH):
    prediction, _, outputs = classify_image(IMG_PATH)

torch.Size([1, 1000])


In [12]:
prediction, labels[prediction], _

(919, 'street sign', tensor([6.6958]))

In [21]:
import torch.nn.functional as F

In [35]:
probability, _= torch.max(F.softmax(outputs, dim=-1), dim=1)
probability.item()

0.07169125974178314

In [22]:
outputs.shape

torch.Size([1, 1000])

In [38]:
outputs

tensor([[-2.1958e+00, -2.2677e+00, -5.7663e-01, -8.2234e-01,  2.1285e-01,
          1.8938e+00, -2.2570e+00, -2.3974e+00, -3.7170e+00, -1.7143e+00,
         -3.6930e+00, -5.1755e+00, -1.6495e+00, -3.9218e+00,  3.9825e-01,
         -2.6958e+00, -3.1265e+00, -8.1859e-02,  4.7422e-02, -2.3331e+00,
         -3.8024e+00, -2.0613e+00, -1.9951e+00,  1.6014e-01,  5.0796e-01,
         -4.3565e+00, -3.5461e+00, -2.2444e+00, -4.2824e+00, -3.1028e+00,
         -2.1172e+00,  1.1887e-01,  5.9312e-02, -3.1008e+00, -2.3475e+00,
         -4.5183e+00, -2.7342e+00, -4.1876e+00, -2.7919e-01, -6.0350e-01,
         -1.4276e+00, -1.4183e+00, -1.0110e+00, -1.2377e+00, -1.5296e+00,
         -2.7963e+00, -6.0364e-02,  3.0157e+00, -3.4746e+00, -2.4767e+00,
         -2.6560e+00,  4.8175e-01, -2.3552e+00, -1.9622e+00, -7.9943e-01,
          3.7406e-01, -3.2294e+00, -4.3856e+00, -9.8966e-01,  1.8014e+00,
          9.8405e-01, -1.5945e+00, -2.0829e+00, -2.3095e+00,  7.4977e-01,
          8.2497e-01,  5.3673e-01, -4.

In [None]:
labels = get_labels()

'street sign'