# Classification on Image Data with PyTorch

In this example, we build a simple CNN to predict 10 types of flowers using `PyTorch`. Then, we build a small application that allows uploading and labeling in real time.

### Load data

Image data should be in a `zip` file and organized by one label - one folder. More specifically, all images from one label are placed in the same folder, and the folder name is the label name.

Please set `data_path` to the `zip` file in your Google Drive. The curly brackets `{}` allow us to use Python variable in a terminal command (`!unzip`) through Google Colab.

In this example, we use the flowers.zip dataset which is originally from Kaggle: https://www.kaggle.com/datasets/jonathanflorez/extended-flowers-recognition


In [None]:
data_path = 'flowers.zip'
!unzip '/content/flowers.zip'

After unzipping, the images will be stored in the `resized` folder in 10 folders representing 10 classes.

### Process data

This part can be run as is.

In [None]:
import numpy as np
import torch
from torchvision import datasets
from torchvision.transforms import v2

In [None]:
batch_size = 32
data_dir = '/content/resized/'
device = torch.device('cuda:0')

train_process = v2.Compose([
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    v2.ToDtype(torch.float32, scale=True),
    v2.ToTensor()
])

infer_process = v2.Compose([
    v2.Resize(size=(224, 224)),
    v2.ToDtype(torch.float32, scale=True),
    v2.ToTensor()
])

train_set = datasets.ImageFolder(data_dir, transform = train_process)
import pickle
with open('class_dict.dict', 'wb') as f:
  pickle.dump(train_set.class_to_idx, f)

valid_set = datasets.ImageFolder(data_dir, transform = infer_process)

indices = np.arange(len(train_set))
np.random.shuffle(indices)
split = int(np.floor(0.2 * len(train_set)))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, sampler=valid_sampler)



### Modeling

We can change a few hyperparamters to see if the performance improves. Save the model when you are happy with the model performance.
- `num_epochs`: like in the previous module, this is the number of iteration
- `num_cnns`: number of CNN blocks - including one Conv2D layer and one MaxPooling layer per block.
- `learning_rate`: how fast the model will update in each iteration
- `batch_size`: how many images are used in each batch in one iteration
- `weight_decay_rate`: how fast the learning rate drops while training

Finally, we will save the best model during training in terms of validation accuracy.

In [None]:
num_epochs = 10
num_cnns = 3
learning_rate = 1e-3
batch_size = 32

In [None]:
from torch import nn

neural_net = nn.Sequential().to(device)
neural_net.append(nn.Conv2d(3, 32, 3))
neural_net.append(nn.ReLU())
neural_net.append(nn.MaxPool2d(2, 2))
for _ in range(num_cnns-1):
  neural_net.append(nn.Conv2d(32, 32, 3))
  neural_net.append(nn.ReLU())
  neural_net.append(nn.MaxPool2d(2, 2))
neural_net.append(nn.Flatten())
out = neural_net(torch.randn(3,224,224).unsqueeze(0))
neural_net.append(nn.Linear(out.shape[1], 256))
num_classes = len(train_set.classes)
neural_net.append(nn.Linear(256, num_classes))
neural_net = neural_net.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()

from torch.optim import Adam
optimizer = Adam(neural_net.parameters(), lr = learning_rate)
train_len = len(train_set) - split
best_accuracy = 0

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, batch in enumerate(train_loader):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = neural_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i == train_len // batch_size - 1:
            print('epoch %d, loss: %.3f' % (epoch + 1, running_loss / i), end=', ')
            running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in valid_loader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = neural_net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = (100 * correct / total)
    if accuracy > best_accuracy:
        torch.save(neural_net, 'best-cnn.pt')
    print('valid accuracy: %d %%' % accuracy)

epoch 1, loss: 1.885, valid accuracy: 38 %
epoch 2, loss: 1.635, valid accuracy: 43 %
epoch 3, loss: 1.605, valid accuracy: 44 %
epoch 4, loss: 1.562, valid accuracy: 52 %
epoch 5, loss: 1.513, valid accuracy: 46 %
epoch 6, loss: 1.500, valid accuracy: 50 %
epoch 7, loss: 1.476, valid accuracy: 53 %
epoch 8, loss: 1.456, valid accuracy: 49 %
epoch 9, loss: 1.428, valid accuracy: 52 %
epoch 10, loss: 1.400, valid accuracy: 50 %


# Image Classification Application

Now we will build our application. This should be much easier since we don't need large forms like in tabular data. For applications with image inputs, we just need a button to upload image, and another for prediction.

First, load the trained model.

In [None]:
model_path = 'best-cnn.pt'

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image
import io
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import pickle

model = torch.load(model_path)
with open('class_dict.dict','rb') as f:
  id2label = pickle.load(f)
id2label = dict(map(reversed, id2label.items()))

In [None]:
#button to predict
button_predict = widgets.Button(description="Predict")
#upload button
uploader = widgets.FileUpload(multiple=False)
#output
output = widgets.Output()
#display everything
display(button_predict, uploader, output)

#prediction function to attached to the predict button
@output.capture()
def on_predict_clicked(b):
  output.clear_output()
  try:
    image = Image.open(io.BytesIO(list(uploader.value.values())[0]['content']))
    image_tensor = infer_process(image).to(device)
    with torch.no_grad():
      prediction = neural_net(torch.unsqueeze(image_tensor, 0))
      prediction = np.argmax(prediction.cpu().numpy())
    label = id2label[prediction]
    plt.imshow(image)
    plt.title('this image is classified as ' + label, y=-0.2)
    plt.show()
  except:
    print('please upload an image first')

button_predict.on_click(on_predict_clicked)

Button(description='Predict', style=ButtonStyle())

FileUpload(value={}, description='Upload')

Output()