In [1]:
import sys

from sympy.strategies.core import switch

print(sys.executable)

C:\Users\Game_\AppData\Local\Programs\Python\Python311\python.exe


In [2]:
# first install required dependencies with:
# pip install torch torchvision transformers datasets
# Then load the model from huggingface
import transformers

from transformers import MobileNetV2ForImageClassification, AutoImageProcessor

# Load the model and image processor
model_name = "google/mobilenet_v2_1.0_224"
model = MobileNetV2ForImageClassification.from_pretrained(model_name)
image_processor = AutoImageProcessor.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#Test on an image of a car
from PIL import Image
import requests
from torch.nn.functional import softmax

#Load image
url = "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3b/BlkStdSchnauzer2.jpg/440px-BlkStdSchnauzer2.jpg"  
# Replace with any sample image URL
image = Image.open(requests.get(url, stream=True).raw)
inputs = image_processor(images=image, return_tensors="pt")

#pass raw values to model
outputs = model(**inputs)
logits = outputs.logits

#normalize outputs to a probability distribution
probabilities = softmax(logits, dim=1)
num_predictions = 5
top_probs, top_classes = probabilities.topk(num_predictions, dim=1)
# This will print out the post-softmax values along with their class labels (still unintelligible to humans)
print("Top probabilities:", top_probs)
print("Top classes:", top_classes)

Top probabilities: tensor([[0.6593, 0.1369, 0.0238, 0.0233, 0.0137]], grad_fn=<TopkBackward0>)
Top classes: tensor([[199, 198, 263, 197, 200]])


In [4]:
#now labeling the outputs, we can read what the predictions are
labels = model.config.id2label
for i in range(num_predictions):
    class_id = top_classes[0][i].item()
    label = labels.get(class_id, "Unknown")
    probability = top_probs[0][i].item()
    print(f"Class: {label}, Probability: {probability:.4f}")

Class: standard schnauzer, Probability: 0.6593
Class: giant schnauzer, Probability: 0.1369
Class: Brabancon griffon, Probability: 0.0238
Class: miniature schnauzer, Probability: 0.0233
Class: Scotch terrier, Scottish terrier, Scottie, Probability: 0.0137


## Now we seek to get a sense of the model and data and make needed changes

In [5]:
#first we need to get a sense of the model
print(model)
#the model is made up of many 3x3 convolution layers that are then reduced and eventually coded to categorize them

MobileNetV2ForImageClassification(
  (mobilenet_v2): MobileNetV2Model(
    (conv_stem): MobileNetV2Stem(
      (first_conv): MobileNetV2ConvLayer(
        (convolution): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (normalization): BatchNorm2d(32, eps=0.001, momentum=0.997, affine=True, track_running_stats=True)
        (activation): ReLU6()
      )
      (conv_3x3): MobileNetV2ConvLayer(
        (convolution): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
        (normalization): BatchNorm2d(32, eps=0.001, momentum=0.997, affine=True, track_running_stats=True)
        (activation): ReLU6()
      )
      (reduce_1x1): MobileNetV2ConvLayer(
        (convolution): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (normalization): BatchNorm2d(16, eps=0.001, momentum=0.997, affine=True, track_running_stats=True)
      )
    )
    (layer): ModuleList(
      (0): MobileNetV2InvertedResidual(
        (expand_1x1): MobileNe

In [30]:
# this block will help us get a sense of the data we are working with.
from datasets import load_dataset

dataset = load_dataset("pcuenq/oxford-pets")
print(dataset)
# Now printing a single element
print(dataset['train'][0]['image']['bytes'])

DatasetDict({
    train: Dataset({
        features: ['path', 'label', 'dog', 'image'],
        num_rows: 7390
    })
})
191


In [32]:
#as the structure of the data is as raw bytes the easiest way to check the sizing is to change it into another format
from PIL import Image
from io import BytesIO

# we will look at 5 images to get a better sense of the data, as mobilenet v2 only accepts 224x224 images
num_check = 5
for i in range(num_check):
    image_raw_test = dataset['train'][i]['image']['bytes']
    image_conv = Image.open(BytesIO(image_raw_test))
    print(f"Image size: {image_conv.size}")

Image size: (345, 500)
Image size: (290, 370)
Image size: (333, 500)
Image size: (500, 375)
Image size: (416, 500)


In [12]:
#as the data is all different sizes we continue by resizing all images in our dataset to comply with the pre-trained model
dataset_size = 7390
def resize(datapoint):
    image_raw = datapoint['image']['bytes']
    image_workable = Image.open(BytesIO(image_raw)).convert('RGB')
    image_workable = image_workable.resize((224, 224))
    # Apply feature extractor to get the pixel values for the model
    datapoint['image']['bytes'] = image_processor(images=image_workable, return_tensors="pt")["pixel_values"]
    return datapoint

dataset_processed = dataset.map(resize, batched=False)

In [44]:
#noticing that the dataset is not split we do so making sure to keep a consistent random seed for reproducibility
dataset_split = dataset_processed['train'].train_test_split(test_size=0.2, seed=1)

# Access the splits
train_data = dataset_split["train"]
test_data = dataset_split["test"]
print(train_data)
print(test_data)

Dataset({
    features: ['path', 'label', 'dog', 'image'],
    num_rows: 5912
})
Dataset({
    features: ['path', 'label', 'dog', 'image'],
    num_rows: 1478
})


### Now we begin the process of fine-tuning via transfer learning

In [None]:
from torch import nn
#now we restructure the model to have 37 classes to match the pets dataset
num_classes = 37
model.classifier = nn.Linear(model.classifier.in_features, num_classes)