In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES=True

## Setting up DataLoaders

There are two key elements in PyTorch for working with data: `datasets` and `data loaders`. A dataset class helps manage the data you want to use, while data loaders feed this data into your neural network.

The `torchvision` package provides a class called `ImageFolder`, which simplifies this process, especially when your data is organized in directories, with each directory representing a specific category or label. For example, in our case have a dataset with images of cats and fish, ImageFolder is a handy choice.

The `check_image` function is a small utility function that we supply to the `is_valid_file` parameter within the ImageFolder class. Its purpose is to perform a quick check to ensure that PIL (Python Imaging Library) can successfully open the file. By using this function, we can avoid the need to manually clean up the downloaded dataset.

In [2]:
def check_image(path):
    try:
        im = Image.open(path)
        return True
    except:
        return False

Set up the transforms for every image:

- Resize to 64x64
- Convert to tensor
- Normalize using ImageNet mean & std

In [5]:
img_transforms = transforms.Compose([
    transforms.Resize((64,64)),    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225] )
    ])

In [6]:
train_data_path = "./train/"
train_data = torchvision.datasets.ImageFolder(root=train_data_path,transform=img_transforms, is_valid_file=check_image)

In [7]:
val_data_path = "./val/"
val_data = torchvision.datasets.ImageFolder(root=val_data_path,transform=img_transforms, is_valid_file=check_image)

In [8]:
test_data_path = "./test/"
test_data = torchvision.datasets.ImageFolder(root=test_data_path,transform=img_transforms, is_valid_file=check_image) 

In [9]:
batch_size=64

In [10]:
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
val_data_loader  = torch.utils.data.DataLoader(val_data, batch_size=batch_size) 
test_data_loader  = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

## Our First Model, SimpleNet
SimpleNet is a very simple combination of three Linear layers and ReLu activations between them. Note that as we don't do a `softmax()` in our `forward()`, we will need to make sure we do it in our training function during the validation phase.

In [13]:
class SimpleNet(nn.Module):

    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(12288, 84) # 12288 = 64*64*3 beacuse each image size is (64*64) and have 3 layes RGB
        self.fc2 = nn.Linear(84, 50)
        self.fc3 = nn.Linear(50,2)
    
    def forward(self, x):
        x = x.view(-1, 12288)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [14]:
simplenet = SimpleNet()

## Create an optimizer
Here, we're just using Adam as our optimizer with a learning rate of 0.001.

In [15]:
optimizer = optim.Adam(simplenet.parameters(), lr=0.001)

## Copy the model to GPU
Copy the model to the GPU if available.

In [17]:
if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

simplenet.to(device)

SimpleNet(
  (fc1): Linear(in_features=12288, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=2, bias=True)
)

## Training
Trains the model, copying batches to the GPU if required, calculating losses, optimizing the network and perform validation for each epoch.

In [35]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    for epoch in range(1, epochs+1):
        # Training the model
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Loop through the training data
        for batch in train_loader:
            # resetting the gradients to zero 
            optimizer.zero_grad()
            inputs, targets = batch

            # copying batches data to the GPU
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Forward pass and loss calculation
            output = model(inputs)
            loss = loss_fn(output, targets)

            # Backpropagation and optimization using gradient calculation
            loss.backward()
            optimizer.step()
            
            training_loss += loss.data.item() * inputs.size(0)
        training_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        num_correct = 0 
        num_examples = 0

        # Loop through the validation data
        for batch in val_loader:
            inputs, targets = batch
            # copying batches data to the GPU
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Forward pass and Calculating validation loss
            output = model(inputs)
            loss = loss_fn(output,targets) 
            valid_loss += loss.data.item() * inputs.size(0)

            # Calculate accuracy
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        valid_loss /= len(val_loader.dataset)

        # Print training and validation results for this epoch
        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}'.format(epoch, training_loss,
        valid_loss, num_correct / num_examples))

In [36]:
train(simplenet, optimizer,torch.nn.CrossEntropyLoss(), train_data_loader,val_data_loader, epochs=5, device=device)

Epoch: 1, Training Loss: 0.26, Validation Loss: 0.84, accuracy = 0.68
Epoch: 2, Training Loss: 0.28, Validation Loss: 0.56, accuracy = 0.76
Epoch: 3, Training Loss: 0.14, Validation Loss: 0.76, accuracy = 0.72
Epoch: 4, Training Loss: 0.16, Validation Loss: 0.62, accuracy = 0.75
Epoch: 5, Training Loss: 0.12, Validation Loss: 0.71, accuracy = 0.72


## Making predictions
Labels are in alphanumeric order, so `cat` will be 0, `fish` will be 1. We'll need to transform the image and also make sure that the resulting tensor is copied to the appropriate device before applying our model to it.

In [37]:
labels = ['cat','fish']

img = Image.open("./val/fish/100_1422.JPG") 
img = img_transforms(img).to(device)
img = torch.unsqueeze(img, 0) # add a dimension for batch

simplenet.eval()
prediction = F.softmax(simplenet(img), dim=1)
prediction = prediction.argmax()
print(labels[prediction]) 

fish


## Saving Models
We can either save the entire model using `save` or just the parameters using `state_dict`. Using the `state_dict` is normally preferable, as it allows you to reuse parameters even if the model's structure changes (or apply parameters from one model to another).

In [40]:
torch.save(simplenet, "./tmp/simplenet") 
simplenet = torch.load("./tmp/simplenet")  

In [41]:
torch.save(simplenet.state_dict(), "./tmp/simplenet")    
simplenet = SimpleNet()
simplenet_state_dict = torch.load("./tmp/simplenet")
simplenet.load_state_dict(simplenet_state_dict)   

<All keys matched successfully>