# PyTorch implementation

We include this file for understanding how we got resnet50_torch.pth.

Note: For training we used big_train_data_1008 and our model didn't see data from "data/images" and others from drive.

In [3]:
import os
import torch
from torch import nn
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision.datasets import ImageFolder
from torchvision.transforms import Lambda
import torchvision.transforms as transforms

# Check if CUDA is available, otherwise use CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Define the labels for the classes
labels = {
    0: "dog", 
    1: "horse",
    2: "elephant",
    3: "butterfly",
    4: "rooster",
    5: "cat",
    6: "cow",
    7: "sheep",
    8: "spider",
    9: "squirrel"
}

In [None]:
path_train = ""  # This path shows the train data

# Specify the weights to use for the pre-processing transformation
weights = ResNet50_Weights.DEFAULT

# Get the pre-processing transformation for the model
preprocess = weights.transforms()

# Create an ImageFolder dataset for the training data
data_train = ImageFolder(
    root=path_train,
    transform=preprocess,
    target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)) # like O-H-E our labels
)

# Create a dataloader for the training data
train_dataloader = DataLoader(data_train, batch_size=128, shuffle=True)

In [28]:
# Load the pre-trained ResNet-50 model
model = resnet50(weights="IMAGENET1K_V2")

for param in model.parameters():
    param.requires_grad = False

num_classes = 10

# Replace the fully connected layer of the model with a new sequential layer
model.fc = nn.Sequential(
    nn.Linear(2048, 100), # 2048 - shape after ResNet50's average pooling
    nn.ReLU(),
    nn.Linear(100, 10) # 10 - number of our classes
    )

model.train()  # Set the model to training mode
model.to(device)  # Move the model to the specified device (CPU or GPU)

criterion = nn.CrossEntropyLoss()  # Define the loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Define the optimizer

# Training loop
for epoch in range(5): # We did just 5 epochs
    for i, (images, labels) in enumerate(train_dataloader):
        images = images.to(device)  # Move the input images to the specified device
        labels = labels.to(device)  # Move the labels to the specified device

        optimizer.zero_grad()  # Zero the gradients

        output = model(images)  # Forward pass

        _, output_predict = torch.max(output, 1)  # Get the predicted labels 
        _, label_predict = torch.max(labels, 1)  # Get the ground-truth labels
        # _ - max value, output_predict and label_predict - indeces 

        accuracy = torch.mean(output_predict == label_predict, dtype=torch.float).item()  # Calculate accuracy
        loss = criterion(output, labels)  # Calculate the loss

        loss.backward()  # Backward pass (compute gradients)
        optimizer.step()  # Update model parameters

        # Print the current iteration's loss and accuracy
        print(f"Epoch {epoch+1}, Iteration {i+1}: Loss = {loss.item()}: Accuracy = {accuracy},")

torch.save(model.state_dict(), "resnet50.pth")

Epoch 1, Iteration 1: Loss = 2.3279340267181396: Accuracy = 0.0546875,
Epoch 1, Iteration 2: Loss = 2.230929136276245: Accuracy = 0.3984375,
Epoch 1, Iteration 3: Loss = 2.133934736251831: Accuracy = 0.6171875,
Epoch 1, Iteration 4: Loss = 2.0644583702087402: Accuracy = 0.6484375,
Epoch 1, Iteration 5: Loss = 1.924604892730713: Accuracy = 0.8125,
Epoch 1, Iteration 6: Loss = 1.8178476095199585: Accuracy = 0.8203125,
Epoch 1, Iteration 7: Loss = 1.7037529945373535: Accuracy = 0.890625,
Epoch 1, Iteration 8: Loss = 1.6186394691467285: Accuracy = 0.890625,
Epoch 1, Iteration 9: Loss = 1.4860680103302002: Accuracy = 0.875,
Epoch 1, Iteration 10: Loss = 1.3224585056304932: Accuracy = 0.875,
Epoch 1, Iteration 11: Loss = 1.2022466659545898: Accuracy = 0.9140625,
Epoch 1, Iteration 12: Loss = 1.202772855758667: Accuracy = 0.890625,
Epoch 1, Iteration 13: Loss = 1.0467451810836792: Accuracy = 0.8984375,
Epoch 1, Iteration 14: Loss = 0.9276514053344727: Accuracy = 0.921875,
Epoch 1, Iteration 1

In [5]:
from resnet50Pytorch import AnimalClassifier  # Import the AnimalClassifier class from the resnet50Pytorch module

network = AnimalClassifier()  # Create an instance of the AnimalClassifier

path = "../data/images/scoiattolo/"  # You could change "cavallo" to any animal from data/images or may be set path to your own data

# Iterate over the images in the directory
for animal in os.listdir(path):
    print(network(path + animal))  # Print the result of classifying each image using the AnimalClassifier

squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
squirrel
dog
squirrel
squirrel
squirrel
