In [None]:
from google.colab import drive
drive.mount('/content/drive/')

## **Import necessarcy packages and setup environment**

In [2]:
#Import neccessary package
import numpy as np
import random
import torch
from torch import nn
from torch import optim
from torchvision import datasets,transforms,models

In [3]:
#Setup for getting the reproducbility of results
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
#Check GPU 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## **Load datasets**

In [None]:
# !gdown --id 14GeD4qruCPN1nM2LZ4w0H3_i3OZ2m2xf

In [None]:
# !tar -xvf /content/flower_data.tar.gz -C /content/drive/MyDrive/flower/

In [13]:
#Directory of dataset
data_dir = '/content/drive/MyDrive/flower'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir +'/test'

In [14]:
# Define trainformation
trans = transforms.Compose([transforms.Resize(256),
                              transforms.CenterCrop(224),
                              transforms. ToTensor(),
                              transforms. Normalize([0.485, 0.456, 0.406],
                                                    [0.229, 0.224, 0.225])])
# Load the datasets with ImageFolder
training_set = datasets. ImageFolder(train_dir, transform=trans)
validation_set = datasets. ImageFolder(valid_dir, transform=trans)
testing_set = datasets. ImageFolder(test_dir, transform=trans)
# Using the image datasets and the trainforms, define the dataloaders
train_loader = torch.utils.data.DataLoader(training_set, batch_size=64,
  shuffle=True)
validate_loader = torch.utils.data.DataLoader(validation_set, batch_size=32)
test_loader = torch.utils.data.DataLoader(testing_set, batch_size=32) 

## **Load model**

In [17]:
model_pretrained = models.vgg16(pretrained = True)
model_nopretrained = models.vgg16(pretrained = False)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [18]:
# Build custom classifier
classifier = nn.Sequential(nn.Linear(25088, 4096),
                            nn.ReLU(inplace=True),
                            nn.Dropout(p=0.5, inplace=False),
                            nn.Linear(4096, 4096),
                            nn.ReLU(inplace=True),
                            nn.Dropout(p=0.5, inplace=False),
                            nn.Linear(4096, 102),
                            nn.LogSoftmax(dim=1)
                            )

In [19]:
#Build new classifier
model_pretrained.classifier = classifier
model_nopretrained.classifier = classifier

## **Train model**

Function for validating model

In [20]:
# Function for validation model
def validation(model, validateloader, criterion):
    val_loss = 0
    accuracy = 0
    for images, labels in iter(validateloader):
          images, labels = images.to('cuda'), labels.to('cuda')
          output = model. forward(images)
          val_loss += criterion(output, labels).item()
          probabilities = torch.exp(output)
          equality = (labels.data == probabilities.max(dim=1)[1])
          accuracy += equality.type(torch.FloatTensor).mean()
    return val_loss, accuracy

Function for training model

In [21]:
import time
def train_model(model,criterion,optimizer):
      max_epoch = 10
      print_every = 20
      n_steps = 0
      model.to('cuda')
      start_train = time.time()
      for n_epochs in range(max_epoch):
        print(f'- Epoch {n_epochs + 1}')
        start_epoch = time.time()
        model.train()
        epoch_loss = 0
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to('cuda'), labels.to('cuda')
            optimizer.zero_grad()
            output = model.forward( images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            if n_steps % print_every == 0 :
                  model.eval()
                  #Turn off gradients for validation, saves memory and computations
                  with torch.no_grad():
                        validation_loss, accuracy = validation(model, validate_loader, criterion)
                        print(f'Batch: {batch_idx}/{len(train_loader)}.. '
                               f'Training Loss: {epoch_loss/print_every:.3f}..'
                               f'Validation Loss: {validation_loss/len(validate_loader): 3f} ..',
                               f'Validation Accuracy: {accuracy/len(validate_loader): 3f}')
                        epoch_loss = 0
                        model.train()
            n_steps += 1
        end_epoch = time.time()
        print(f'--> Epoch {n_epochs + 1} costs {end_epoch - start_epoch} seconds')
        end_train = time.time()
        print(f'Training time: {end_train - start_train}')

In [22]:
from torch.nn.modules.loss import NLLLoss

Train with pretrain

In [23]:
criterion = NLLLoss()
optimizer = optim.Adam(model_pretrained.classifier.parameters(),lr = 0.001)
train_model(model_pretrained,criterion,optimizer)

- Epoch 1
Batch: 0/103.. Training Loss: 0.232..Validation Loss:  7.404507 .. Validation Accuracy:  0.022837
Batch: 20/103.. Training Loss: 4.778..Validation Loss:  3.144641 .. Validation Accuracy:  0.365118
Batch: 40/103.. Training Loss: 2.756..Validation Loss:  1.795306 .. Validation Accuracy:  0.573451
Batch: 60/103.. Training Loss: 1.938..Validation Loss:  1.312778 .. Validation Accuracy:  0.646635
Batch: 80/103.. Training Loss: 1.704..Validation Loss:  1.161159 .. Validation Accuracy:  0.709669
Batch: 100/103.. Training Loss: 1.633..Validation Loss:  0.997223 .. Validation Accuracy:  0.742121
--> Epoch 1 costs 744.8585646152496 seconds
Training time: 744.859753370285
- Epoch 2
Batch: 17/103.. Training Loss: 0.931..Validation Loss:  0.900267 .. Validation Accuracy:  0.754140
Batch: 37/103.. Training Loss: 1.015..Validation Loss:  0.930229 .. Validation Accuracy:  0.738248
Batch: 57/103.. Training Loss: 1.032..Validation Loss:  0.815124 .. Validation Accuracy:  0.786859
Batch: 77/103

Train without pretrain

In [24]:
criterion = NLLLoss()
optimizer = optim.Adam(model_nopretrained.parameters(),lr = 0.001)
train_model(model_nopretrained,criterion,optimizer)

- Epoch 1
Batch: 0/103.. Training Loss: 0.376..Validation Loss:  56.195982 .. Validation Accuracy:  0.006010
Batch: 20/103.. Training Loss: 8.937..Validation Loss:  4.587228 .. Validation Accuracy:  0.016827
Batch: 40/103.. Training Loss: 4.570..Validation Loss:  4.563555 .. Validation Accuracy:  0.033654
Batch: 60/103.. Training Loss: 4.549..Validation Loss:  4.537372 .. Validation Accuracy:  0.033654
Batch: 80/103.. Training Loss: 4.535..Validation Loss:  4.535018 .. Validation Accuracy:  0.033654
Batch: 100/103.. Training Loss: 4.510..Validation Loss:  4.510748 .. Validation Accuracy:  0.033654
--> Epoch 1 costs 712.2042028903961 seconds
Training time: 712.2059652805328
- Epoch 2
Batch: 17/103.. Training Loss: 4.070..Validation Loss:  4.510929 .. Validation Accuracy:  0.033654
Batch: 37/103.. Training Loss: 4.537..Validation Loss:  4.518927 .. Validation Accuracy:  0.033654
Batch: 57/103.. Training Loss: 4.514..Validation Loss:  4.527546 .. Validation Accuracy:  0.033654
Batch: 77/1

## **Save model**

In [None]:
def save_checkpoint(model,path):
  model.class_to_idx = training_set.class_to_idx
  checkpoint = {'arch':'vgg16',
                'class_to_idx' : model.class_to_idx,
                'model_state_dict': model.state_dict()
      
              }
  torch.save(checkpoint,path)

In [None]:
save_checkpoint(model_pretrained,'/content/drive/MyDrive/flower/checkpoint/checkpoint_use_pretrained.pth')

In [None]:
save_checkpoint(model_nopretrained,'/content/drive/MyDrive/flower/checkpoint/checkpoint_no_use_pretrained.pth')

## **Test model**

In [None]:
def test_model(model,test_loader):
  #Do validation on the test set
  model.eval()
  model.to('cuda')

  with torch.no_grad():
    accuracy = 0
    for images,labels in iter(test_loader):
      images, labels = images.to('cuda'),labels.to('cuda')
      output = model. forward(images)
      probabilities = torch.exp(output)
      equality = (labels.data == probabilities.max(dim=1)[1])
      accuracy += equality.type(torch.FloatTensor).mean()
    print("Test Accuracy : {}".format(accuracy/len(test_loader)))

In [None]:
test_model(model_pretrained,test_loader)

In [None]:
test_model(model_nopretrained,test_loader)

Train mô hình trừ pretrain đạt hiệu quả cao hơn so với train lại mô hình từ đầu (Nếu train cùng số epoch).

Do số epoch của mô hình train lại từ đầu còn khá nhỏ, đồng thời data lớn, nên độ chính xác của mô hình rất thấp.