In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

import torchvision
from torchvision import transforms

In [2]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
#This tells PIL to load what it can, even from truncated images, rather than throwing an error

In [3]:
import os
base_dir = './data/weather_pics/'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

### Data augmentation
some examples of common operations:  
- `transforms.RandomCrop`
- `transforms.RandomRotation`
- `transforms.RandomHorizontalFlip()`
- `transforms.RandomVerticalFlip()`
- `transforms.ColorJitter(brightness)`
- `transforms.ColorJitter(contrast)`
- `transforms.ColorJitter(saturation)`
- `transforms.ColorJitter(hue)`
- `transforms.RandomGreyscale()`

In [5]:
#data augmentation only applies to training data
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomCrop(192), #crop 192*192
    transforms.RandomHorizontalFlip(0.5), #0.5 probability of beiing flipped
    transforms.RandomVerticalFlip(), #by default, it's 0.5
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness = 0.5), # adjusts brightness randomly up to ±50%
    transforms.ColorJitter(contrast = 0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

Applying the transforms:
- The number of images in your dataset remains the same.
- Each image gets a fresh version of the transformation every time it’s loaded (e.g., during every epoch).
- No permanent changes are written to disk unless you explicitly save them yourself.

In [6]:
train_ds = torchvision.datasets.ImageFolder(train_dir, transform = train_transform)
test_ds = torchvision.datasets.ImageFolder(test_dir, transform = test_transform)

In [7]:
batch_size = 32
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = batch_size, shuffle = True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = batch_size*2)

In [8]:
#load pretrained model
model = torchvision.models.vgg16(pretrained = True)



In [9]:
#obeserve the pretrained model
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
#freeze the parameters of the features. The parameters in classifier/fc layers can still be tuned.
for param in model.features.parameters():
    param.required_grad = False

In [11]:
#change the classifier layer's output to align with our objective
model.classifier[-1].out_features = 4
#another method:
# model.classifier[-1] = torch.nn.Linear(model.classifier[-1],4)

In [12]:
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = "cpu" #mps has some problem with the model
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_fn = nn.CrossEntropyLoss()

In [14]:
#learning rate decay
from torch.optim import lr_scheduler
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size = 7, gamma = 0.1) #decay to 0.1 every 7 steps

In [15]:
def fit (epoch, model, train_loader, test_loader):
    correct = 0
    total = 0
    running_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            y_pred = torch.argmax(y_pred, dim = 1)
            correct += (y_pred==y).sum().item()
            total += y.size(0)
            running_loss +=loss.item()

    step_lr_scheduler.step()
    epoch_loss = running_loss/len(train_loader.dataset)
    epoch_acc = correct/total
    
    #test
    test_correct = 0
    test_running_loss = 0
    test_total = 0

    with torch.no_grad():
        for x, y in train_loader:
            #put the data to GPU
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            y_pred = torch.argmax(y_pred, dim = 1)
            test_correct += (y_pred==y).sum().item()
            test_total += y.size(0)
            test_running_loss +=loss.item()
    
    test_epoch_loss = test_running_loss/len(test_loader.dataset)
    test_epoch_acc = test_correct/test_total

    print(f'Epoch: {epoch}, loss:{round(epoch_loss, 3)}, accuracy: {round(epoch_acc, 3)}, test_loss: {round(test_epoch_loss, 3)}, test_accuracy: {round(test_epoch_acc, 3)}')
    
    return  epoch_loss, epoch_acc, test_epoch_loss, test_epoch_acc 

epochs = 2
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
    epoch_loss, epoch_acc, test_epoch_loss, test_epoch_acc = fit(epoch, model, train_dl, test_dl)
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)
    test_loss.append(test_epoch_loss)
    test_acc.append(test_epoch_acc)

Without learning rate decay, the first 2 epoch performance is  

Epoch: 0, loss:0.176, accuracy: 0.254, test_loss: 0.197, test_accuracy: 0.256  
Epoch: 1, loss:0.035, accuracy: 0.507, test_loss: 0.183, test_accuracy: 0.615

With learning rate decay, the first 2 epoch performance is  

Epoch: 0, loss:0.137, accuracy: 0.287, test_loss: 0.174, test_accuracy: 0.39  
Epoch: 1, loss:0.03, accuracy: 0.579, test_loss: 0.085, test_accuracy: 0.665

Add data augmentation, the first 2 epoch performance is  

Epoch: 0, loss:0.091, accuracy: 0.296, test_loss: 0.203, test_accuracy: 0.32  
Epoch: 1, loss:0.035, accuracy: 0.505, test_loss: 0.106, test_accuracy: 0.593

The result is not necessarily bettwe with data augmentation, this means selecting the appoach to change the image is very important. Especially for weather images, some colorjitters are not very helpful.

## Save the model

In [18]:
#.state_dict() is a dictionary that saves all the parameters of the model
model.state_dict()

OrderedDict([('features.0.weight',
              tensor([[[[-0.5499,  0.1451,  0.5307],
                        [-0.5788,  0.3591,  0.7676],
                        [-0.6850, -0.0446,  0.4866]],
              
                       [[ 0.1776,  0.0115, -0.0800],
                        [ 0.0465, -0.0686, -0.2589],
                        [ 0.1352, -0.1708, -0.1307]],
              
                       [[ 0.3143, -0.1642, -0.4256],
                        [ 0.4765, -0.0811, -0.4851],
                        [ 0.6332,  0.0207, -0.2759]]],
              
              
                      [[[ 0.2317,  0.1255,  0.1843],
                        [-0.4289, -0.2447,  0.2445],
                        [-0.2519,  0.1402, -0.0074]],
              
                       [[-0.1419, -0.2206,  0.1482],
                        [-0.8424, -0.3533,  0.5618],
                        [-0.2436,  0.5172,  0.5367]],
              
                       [[-0.3172, -0.3735, -0.1345],
                     

In [21]:
dict2 = model.state_dict()

In [22]:
type(dict2)

collections.OrderedDict

In [24]:
len(dict2)

32

In [30]:
list(dict2.keys())[1]

'features.0.bias'

In [31]:
dict2['features.0.bias']

tensor([ 0.3993,  0.3838,  0.4718, -0.3204,  0.3867, -0.3791,  0.3974, -0.5562,
         0.2692, -0.7465, -0.3542,  0.2378, -1.3275, -0.1686,  0.3957, -0.0988,
         0.0435, -0.6846,  0.1346,  0.5599,  0.3031,  0.3361,  0.1089,  0.4559,
         0.1156,  0.0851, -0.0510, -0.5560,  0.1537, -0.3992, -0.0655,  0.0746,
         0.2798,  0.3267,  0.5644, -1.2680, -0.8218, -0.9467,  0.1411,  0.2680,
         0.1793, -0.5430,  0.3466, -0.0698, -1.0372, -0.6794, -0.7562,  0.2502,
         0.3982, -0.4699, -0.2973,  0.5665, -1.3420,  0.4806, -0.8223,  0.2372,
         0.2290,  0.5002,  0.5492,  0.5177, -0.2224,  0.0047, -0.5582,  0.2057])

In [34]:
dict2['features.0.weight'][:1]

tensor([[[[-0.5499,  0.1451,  0.5307],
          [-0.5788,  0.3591,  0.7676],
          [-0.6850, -0.0446,  0.4866]],

         [[ 0.1776,  0.0115, -0.0800],
          [ 0.0465, -0.0686, -0.2589],
          [ 0.1352, -0.1708, -0.1307]],

         [[ 0.3143, -0.1642, -0.4256],
          [ 0.4765, -0.0811, -0.4851],
          [ 0.6332,  0.0207, -0.2759]]]])

In [35]:
list(dict2)

['features.0.weight',
 'features.0.bias',
 'features.2.weight',
 'features.2.bias',
 'features.5.weight',
 'features.5.bias',
 'features.7.weight',
 'features.7.bias',
 'features.10.weight',
 'features.10.bias',
 'features.12.weight',
 'features.12.bias',
 'features.14.weight',
 'features.14.bias',
 'features.17.weight',
 'features.17.bias',
 'features.19.weight',
 'features.19.bias',
 'features.21.weight',
 'features.21.bias',
 'features.24.weight',
 'features.24.bias',
 'features.26.weight',
 'features.26.bias',
 'features.28.weight',
 'features.28.bias',
 'classifier.0.weight',
 'classifier.0.bias',
 'classifier.3.weight',
 'classifier.3.bias',
 'classifier.6.weight',
 'classifier.6.bias']

#### Save the parameteres

In [37]:
path = './vgg16.pth'
torch.save(model.state_dict(), path)

#### The saved model is over 500mb, really big!

#### Recover the model

As we generated the model from transfer learning, it's difficult to create a model from an existing instance.