In [105]:
import torch
from torch import nn

# Import torchvision 
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

# Import matplotlib for visualization
import matplotlib.pyplot as plt

# Check versions
# Note: your PyTorch version shouldn't be lower than 1.10.0 and torchvision version shouldn't be lower than 0.11
print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

PyTorch version: 2.2.2
torchvision version: 0.17.2


In [106]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
import numpy as np

In [107]:
import pandas as pd

In [108]:
import torch

def set_seeds(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    

In [109]:
#for the vision transformer
from torchinfo import summary

In [110]:
label_csv_path = r'C:\Users\Inzayn\Downloads\dog-breed-identification\labels.csv'

In [111]:
data= pd.read_csv(label_csv_path)

In [112]:
breeds=data.breed

In [113]:
unique_breed=breeds.unique()

In [118]:
device


device(type='cuda')

In [114]:
len(unique_breed)

120

In [115]:
food_vision_classes= ['pizza', 'stake', 'sushi']

In [116]:
food_vision_classes

['pizza', 'stake', 'sushi']

In [None]:
from typing import List, Tuple
import os
from PIL import Image
import torch
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Function to load the model (make sure to define it correctly)
def load_model(model_path: str, model_name: str) -> torch.nn.Module:
    # This function should include your model loading logic
    # Here's an example based on the previous discussion
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if model_name == "vit_dog":
        model = torchvision.models.vit_b_16(pretrained=False).to(device)
        model.heads = nn.Linear(in_features=768, out_features=120).to(device)
    elif model_name=="vit_food":
        model= torchvision.models.vit_b_16(pretrained=False).to(device)
        model.heads= nn.Linear(in_features=768, out_features=3).to(device)
    elif model_name == "effnetb0_dog":
        model = torchvision.models.efficientnet_b0(pretrained=False).to(device)
        model.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(in_features=model.classifier[1].in_features, out_features=120)
        ).to(device)
    elif model_name == "effnetb2_dog":
        model = torchvision.models.efficientnet_b2(pretrained=False).to(device)
        model.classifier = nn.Sequential(
            nn.Dropout(p=0.3, inplace=True),
            nn.Linear(in_features=model.classifier[1].in_features, out_features=120)
        ).to(device)
    elif model_name == "mobilenet_dog":
        model = torchvision.models.mobilenet_v2(pretrained=False).to(device)
        model.classifier[1] = nn.Linear(in_features=model.classifier[1].in_features, out_features=120).to(device)
    else:
        raise ValueError(f"Model name {model_name} is not recognized. Choose from 'vit_dog', 'effnetb0_dog', 'effnetb2_dog', 'mobilenet_dog', 'vit_food'.")

    # Load the model state dictionary
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model.eval()

    return model

# Function to predict and plot an image
def pred_and_plot_image(model: torch.nn.Module,
                        image_path: str, 
                        class_names: List[str],
                        image_size: Tuple[int, int] = (224, 224),
                        transform: transforms = None,
                        device: torch.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    
    # Open image
    img = Image.open(image_path)

    # Create transformation for image (if one doesn't exist)
    if transform is not None:
        image_transform = transform
    else:
        image_transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])

    # Make sure the model is on the target device
    model.to(device)

    # Turn on model evaluation mode and inference mode
    model.eval()
    with torch.inference_mode():
        # Transform and add an extra dimension to image (model requires samples in [batch_size, color_channels, height, width])
        transformed_image = image_transform(img).unsqueeze(dim=0)

        # Make a prediction on image with an extra dimension and send it to the target device
        target_image_pred = model(transformed_image.to(device))

        # Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
        target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

        # Convert prediction probabilities -> prediction labels
        target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)

    # Plot image with predicted label and probability 
    plt.figure()
    plt.imshow(img)
    plt.title(f"Pred: {class_names[target_image_pred_label]} | Prob: {target_image_pred_probs.max():.3f}")
    plt.axis('off')
    plt.show()

# Example usage
if __name__ == "__main__":
    # Define the path to your model and the model name
    vit_model_5_dog = "models/07_vision transformer rotary_dog_vision_5_epochs.pth"
    effnetb0_model_10_dog = "models/07_effnetb0_dog_vision_10_epochs.pth"
    effnetb2_model_10_dog = "models/07_effnetb2_dog_vision_10_epochs.pth"
    mobilenet_model_10_dog = "models/07_mobilenet_dog_vision_10_epochs.pth"
    vit_model_10_food = "models/07_Vision_Transformer with code_foodvision_10_epochs.pth"
    
    model_choice = input("choice: ")
    if model_choice == "vit_dog":
        ready_model = load_model(vit_model_5_dog, model_choice)
    elif model_choice == "effnetb0_dog":
        ready_model = load_model(effnetb0_model_10_dog, model_choice)
    elif model_choice == "effnetb2_dog":
        ready_model = load_model(effnetb2_model_10_dog, model_choice)
    elif model_choice == "mobilenet_dog":
        ready_model = load_model(mobilenet_model_10_dog, model_choice)
    elif model_choice == "vit_food":
        ready_model = load_model(vit_model_10_food, model_choice)
    else:
        print("Enter valid input")

    

    # Directory containing images
    image_directory = r"C:\Users\Inzayn\Desktop\to_test"

    # Loop through each image in the directory and make predictions
    for image_file in os.listdir(image_directory):
        if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(image_directory, image_file)
            pred_and_plot_image(ready_model, image_path, unique_breed)



In [86]:
ready_model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat