# 1. Overview

# 2. Libraries

In [34]:
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# 3. Global variables

In [35]:
PATH_TRAINING_PICTURES = "../data/fruits-360_3-body-problem/Training"
PATH_TEST_PICTURES = "../data/fruits-360_3-body-problem/Test"

# 4. Functions

# 5. Code

## 5.1 Data Loading and Transforming

We are going to transform the pictures to be all of a certain size. And we are also then going to apply in the same pipeline the standardization (in the ToTensor) of the picture to transform it into the right format for the neural network. Lastly, we also apply a normalization of the pictures. So that we can have the data between -1 and 1 so that the training of the neural network can be optimized

In [36]:
transform = transforms.Compose([
    transforms.Resize((100,100)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

We are going to use ImageFolder to apply a pipeline transformation for each of the train and test images

In [37]:
train_data = ImageFolder(root=PATH_TRAINING_PICTURES, transform=transform)
test_data = ImageFolder(root=PATH_TEST_PICTURES, transform=transform)

We are going to do a DataLoader. It's a function that will inform the model what the train and test data are. Along with what the batch sizes are going to be and whether shuffling will take place or not.

In [38]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True) # I want to shuffle all the pictures when training (ie shuffle the pears with apples etc)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [39]:
train_data.classes

['Apple', 'Cherry', 'Tomatoe']

## 5.2 Architecture

In [None]:
class FruitDetector(nn.Module):
    def __init__(self, num_classes):
        super(FruitDetector, self).__init__()
        self.conv1=nn.Conv2d(3,16,kernel_size=3, stride=1, padding=1) 
        # Conv2d: kernel will be of 3x3 dimensions, parameters: 3 channels, characteristical maps: 16, kernel size: 3x3, stride: 1
        self.conv2=nn.Conv2d(16,32,kernel_size=3, stride=1, padding=1)
        self.fc1=nn.Linear(32*25*25,128) 
        # images of 25*25 and each image has 32 characteristical maps.
        # 25: we go from 100, then /2 because of the first max pool, then we apply the same for the second conv nn and we end up with 25. 
        # 32: are the characteristical maps that come from the conv1. 128: will be the number of output neurons
        self.fc2=nn.Linear(128,num_classes)
    
    def forward(self, x): # every batch (x) will follow this treatment. [32, 3, 100, 100] -> each x has batches of 32, 3 channels, size=100*100 
        x=F.relu(self.conv1(x)) # activation function. End result [32, 16, 100, 100]
        x=F.max_pool2d(x,2) # to better generalize in the training and try to avoid overfitting. End result [32, 16, 50, 50]
        x=F.relu(self.conv2(x)) # End result [32, 32, 50, 50]
        x=F.max_pool2d(x,2) # End result [32, 32, 25, 25]
        x=torch.flatten(x,1) # End result [32, 32*25*25]
        x=F.relu(self.fc1(x)) # End result [32, 128]
        x=self.fc2(x) # End result [32, num_classes]
        return(x)
        

In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Select the device on which to run the model. By default it's a cpu, although if there's an open cuda (a more powerful cpu from NVidia) it will use that

model=FruitDetector(num_classes=len(train_data.classes)).to(device)

## 5.3 Training

In [None]:
criterion = nn.CrossEntropyLoss() # Loss function for multi-class
optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam is a type of optimizer. It's going to update the parameters with the gradients

In [44]:
for epoch in range(20):
    model.train()
    actual_loss=0
    for image, label in train_loader:
        image, label = image.to(device), label.to(device)
        optimizer.zero_grad()
        output=model(image)
        loss=criterion(output, label)
        loss.backward() # backward propagation
        optimizer.step() # update the nn parameters
        actual_loss=actual_loss+loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {actual_loss/len(train_loader):.4f}")

Epoch 1, Loss: 0.0003
Epoch 2, Loss: 0.0001
Epoch 3, Loss: 0.0000
Epoch 4, Loss: 0.0000
Epoch 5, Loss: 0.0000
Epoch 6, Loss: 0.0000
Epoch 7, Loss: 0.0000
Epoch 8, Loss: 0.0000
Epoch 9, Loss: 0.0000
Epoch 10, Loss: 0.0000
Epoch 11, Loss: 0.0000
Epoch 12, Loss: 0.0000
Epoch 13, Loss: 0.0000
Epoch 14, Loss: 0.0000
Epoch 15, Loss: 0.0000
Epoch 16, Loss: 0.0000
Epoch 17, Loss: 0.0000
Epoch 18, Loss: 0.0000
Epoch 19, Loss: 0.0000
Epoch 20, Loss: 0.0000


## 5.4 Model evaluation

In [None]:
correct, total = 0, 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"Accuracy en test: {100*correct/total:.2f}%")