In [1]:
from google.colab import drive
import torch
import torchvision.transforms as T
import torchvision.datasets as dset
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, ConcatDataset
import numpy as np
import random

# import torch.nn.functional as F
# import torchvision

In [2]:
# Hyper Parameter 超參數

NUM_EPOCHS = 20
BATCH_SIZE = 20  # DataLoader每次抓取的數量
LEARNING_RATE = 1e-4

# 其他可調參數
PRINT_EVERY = 100


In [3]:
drive.mount("/content/drive",force_remount=True)

Mounted at /content/drive


In [4]:
FOLDERPATH = 'Colab\ Notebooks/Poster'

In [5]:
%cd drive/MyDrive/$FOLDERPATH

/content/drive/.shortcut-targets-by-id/1OC3Immm4L7H1Rp4Po9pQSFc3-pDwQpi4/Poster


In [6]:
#Check if gpu is available
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f'計算裝置:{device}')

計算裝置:cuda


In [7]:
# 添加DataLoader的random seed
# 方法來源：https://yanwei-liu.medium.com/pytorch-reproducibility-db8458111b75
# Ensure that the output of all random values ​​is consistent

def set_seed(seed=42, loader=None):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  try:
    loader.sampler.generator.manual_seed(seed)
  except AttributeError:
    pass

set_seed()

In [8]:
SIZE = 224

In [9]:
# data preprocessing: re-size images to SIZE*SIZE and convert image to tensor

transform_turing = T.Compose([
    T.Resize((300,300)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomAutocontrast(),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.RandomRotation(10),
    T.RandomCrop((224,224)),
    T.ToTensor()
])
transform_new = T.Compose([
    T.Resize((300,300)),
    T.RandomCrop((224,224)),
    T.ToTensor()
])
transform_test = T.Compose([
    T.Resize((224,224)),
    T.ToTensor()
])

In [10]:
# Load data in train and val and test from google drive


# Load train data
# 0: portrait  1: SD
train_data_1 = dset.ImageFolder('Dataset/2_classes/train', transform=transform_turing)
train_data_2 = dset.ImageFolder('new_test', transform=transform_new)
train_data_3 = dset.ImageFolder('new_test', transform=transform_turing)
train_data = ConcatDataset([train_data_1, train_data_2, train_data_3])

# Load val and test Data
# 0: portrait  1: SD
val_data = dset.ImageFolder('Dataset/2_classes/val', transform=transform_test)
test_data = dset.ImageFolder('Dataset/2_classes/test', transform=transform_test)

NUM_TRAIN = len(train_data)
NUM_VAL = len(val_data)
NUM_TEST = len(test_data)

print('NUM_TRAIN:', NUM_TRAIN)
print('NUM_VAL: ', NUM_VAL)
print('NUM_TEST:', NUM_TEST)

NUM_TRAIN: 1440
NUM_VAL:  80
NUM_TEST: 80


In [11]:
def seed_worker(worker_id):
  worker_seed = torch.initial_seed() % 2**32
  np.random.seed(worker_seed)
  random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x7e18c86170b0>

In [12]:
# Create Mini-Batch
from torch.utils.data import DataLoader
mini_trains = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, worker_init_fn=seed_worker, generator=g)
mini_vals = DataLoader(val_data, batch_size=BATCH_SIZE, worker_init_fn=seed_worker, generator=g)
mini_test = DataLoader(test_data, batch_size=BATCH_SIZE)

print(mini_trains)

<torch.utils.data.dataloader.DataLoader object at 0x7e18c8682b30>


In [13]:
# Check data in mini_trains
iterable = iter(mini_trains)
x, y = next(iterable)

iterable = iter(mini_vals)
x, y = next(iterable)

In [14]:
# # input data
# x.shape
# y.shape

# Check data dimension
print(train_data[0][0].shape)
print(val_data[0][1])

torch.Size([3, 224, 224])
0


In [15]:
# Building model EfficientNet
import torch.nn as nn
from torchvision import models

efficientnet = models.efficientnet_b4(pretrained = True)
#print(model)
num_flatten = efficientnet.classifier[1].in_features # 獲取 EfficientNet 最後一層的輸入特徵數量
efficientnet.classifier[1] = nn.Linear(num_flatten, 2) # 替換為適應2元分類的新線性層（2個輸出）
model = efficientnet.to(device)



In [16]:
# Define loss function & optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-4 )

In [17]:
# Training procedure
def train(model, mini_trains, optimizer, device, NUM_EPOCHS):
  for epoch in range(NUM_EPOCHS):
    for count, (x, y) in enumerate(mini_trains):
        model.train() #Turn on training mode
        # Move data to device
        x = x.to(device)
        y = y.to(device)
        # ForwardProp
        scores = model(x)
        # Calculate loss
        loss = loss_function(scores, y)
        if count % PRINT_EVERY == 0:
            print(f'[Epoch {epoch+1}] Training Loss:', loss.item())
            validation(mini_vals, epoch, model, device)
        # Clear the previous gradients
        optimizer.zero_grad()
        # Get the gradients
        loss.backward()
        # Update the weights
        optimizer.step()
    print('-'*50)

In [18]:
# Validation Procedure
def validation(mini_vals, epoch, model, device):
  # Turn on val mode
  model.eval()
  with torch.no_grad():
    Val_Acc = 0
    for x, y in mini_vals:
      # move data to device
      x = x.to(device)
      y = y.to(device)
      # Forward Prop
      scores = model(x)
      # Calculate accuracy
      predictions = scores.max(1)[1] # scores.argmax(1)
      acc = predictions.eq(y).sum().item()
      Val_Acc += acc
    print(f'[Epoch {epoch+1}] Accuracy:', Val_Acc/NUM_VAL)

In [19]:
train(model, mini_trains, optimizer, device, NUM_EPOCHS)

[Epoch 1] Training Loss: 0.6715043783187866
[Epoch 1] Accuracy: 0.5
--------------------------------------------------
[Epoch 2] Training Loss: 0.6437995433807373
[Epoch 2] Accuracy: 0.6625
--------------------------------------------------
[Epoch 3] Training Loss: 0.4669240117073059
[Epoch 3] Accuracy: 0.6875
--------------------------------------------------
[Epoch 4] Training Loss: 0.4839194715023041
[Epoch 4] Accuracy: 0.75
--------------------------------------------------
[Epoch 5] Training Loss: 0.12260200083255768
[Epoch 5] Accuracy: 0.8125
--------------------------------------------------
[Epoch 6] Training Loss: 0.19490507245063782
[Epoch 6] Accuracy: 0.85
--------------------------------------------------
[Epoch 7] Training Loss: 0.07363544404506683
[Epoch 7] Accuracy: 0.875
--------------------------------------------------
[Epoch 8] Training Loss: 0.08867534250020981
[Epoch 8] Accuracy: 0.875
--------------------------------------------------
[Epoch 9] Training Loss: 0.04

In [20]:
# Test score procedure
def test(mini_tests, model, device):
  # Use val mode to test
  model.eval()
  with torch.no_grad():
    Val_Acc = 0
    for x, y in mini_tests:
      # move data to device
      x = x.to(device)
      y = y.to(device)
      # Last Forward Prop
      score_test = model(x)
      # Calculate Accuracy
      predictions = score_test.max(1)[1]
      acc = predictions.eq(y).sum().item()
      Val_Acc += acc
    print(f'[Final] Test Accuracy:', Val_Acc/NUM_TEST)

In [21]:
test(mini_test, model, device)

[Final] Test Accuracy: 0.8875


In [25]:
# save trained weights
torch.save(efficientnet.state_dict(), 'model_EfficientNetb4_weights.pth')

In [26]:
# Load model and trained weights

efficientnet = models.efficientnet_b4(pretrained=False)
num_flatten = efficientnet.classifier[1].in_features # 獲取 EfficientNet 最後一層的輸入特徵數量
efficientnet.classifier[1] = nn.Linear(num_flatten, 2) # 替換為適應2元分類的新線性層（2個輸出）

PATH = 'model_EfficientNetb4_weights.pth'
efficientnet.load_state_dict(torch.load(PATH))
model = efficientnet.to(device)


  efficientnet.load_state_dict(torch.load(PATH))


In [27]:
%ls

 checkpoint.pt                           module_VIiT_0908.ipynb
'Copy of ensemble.ipynb'                 [0m[01;34mnew_test[0m/
'Copy of module_ResNet50.ipynb'          resnet10_max+avgpool_1e4_100_2classes_0918.pth
 [01;34mDataset[0m/                                resnet18_avgpool_1e3_100.pth
 Densenet121.ipynb                       resnet18_maxpool_1e3_100_2classes.pth
 [01;34mDensenet121_weights_920[0m/                [01;34mresnet50_2classes_weights_0918[0m/
 Densenet121_weights_920.pth             resnet50_2classes_weights_0918.pth
 Densenet121_weights_earlystop_920.pth   [01;34mresnet50_2classes_weights_0920[0m/
'EfficientNet_b0 920.ipynb'              resnet50_2classes_weights_0920.pth
 [01;34mEfficientNetb0_weights_920[0m/             resnet50_finetuned_weights_0907_output.csv
 EfficientNetb0_weights_920.pth          resnet_avgpool_1e3_200.pth
 EfficientNet_b4.ipynb                   resnet_with_avgpool.ipynb
 ensemble_920.ipynb                      ResNeXt.ipynb


In [28]:
# Load test data
new_test_data = dset.ImageFolder('milestone', transform=transform_test)
NUM_NEW_TEST = len(new_test_data)
print('Number of new test:', NUM_NEW_TEST)
mini_tests = DataLoader(new_test_data, batch_size=1, worker_init_fn=seed_worker, generator=g)

Number of new test: 400


In [29]:
new_test_data[0][0].shape

torch.Size([3, 224, 224])

In [30]:
# Test score procedure
def milestone_test(mini_tests, model, device):
    # Use eval mode to test
    model.eval()
    #valid_classes = [0, 2]  # Classes Filter
    with torch.no_grad():
        acc_count = 0
        total_samples = 0
        for x, y in mini_tests:
            # move data to device
            x = x.to(device)
            y = y.to(device)

            # Last Forward Prop
            score_test = model(x)

            # Calculate predictions
            predictions = score_test.max(1)[1]

            # Calculate Accuracy
            acc = predictions.eq(y).sum().item()
            acc_count += acc

        print(f'[Final] Test Accuracy:', acc_count / NUM_NEW_TEST)

In [31]:
milestone_test(mini_tests, model, device)

[Final] Test Accuracy: 0.84
