<a href="https://colab.research.google.com/github/thaianh1210/DeepLearning-exercise/blob/main/Untitled50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
from zipfile import ZipFile

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
root_dir = '/content/drive/MyDrive/img_cls_scenes_classification (1)/scenes_classification'
train_dir = os.path.join(root_dir, 'train')
test_dir = os.path.join(root_dir, 'val')
classes = {
    label_idx : class_name \
    for label_idx, class_name in enumerate(sorted(os.listdir(train_dir)))
}
print(classes)

X_train, X_test = [], []
y_train, y_test = [], []
for dataset_path in [train_dir, test_dir]:
  for label_idx, class_name in classes.items():
    class_dir = os.path.join(dataset_path, class_name)
    for img_filename in os.listdir(class_dir):
      img_path = os.path.join(class_dir, img_filename)

      if 'train' in dataset_path:
        X_train.append(img_path)
        y_train.append(label_idx)
      else:
        X_test.append(img_path)
        y_test.append(label_idx)

print(f'X_train: {len(X_train)}')
print(f'X_test: {len(X_test)}')
print(f'y_train: {len(y_train)}')
print(f'y_test: {len(y_test)}')



{0: 'buildings', 1: 'forest', 2: 'glacier', 3: 'mountain', 4: 'sea', 5: 'street'}
X_train: 4468
X_test: 3000
y_train: 4468
y_test: 3000


In [4]:
#Train test split
seed = 0
val_size = 0.3
is_shuffle = True
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = val_size, random_state = seed, shuffle = is_shuffle)


In [5]:
class SceneDataset(Dataset):
  def __init__(self, X, y, transform = None):
    self.img_paths = X
    self.labels = y
    self.transform = transform

  def __len__(self):
    return len(self.img_paths)

  def __getitem__(self, idx):
    img_path = self.img_paths[idx]
    img = Image.open(img_path).convert('RGB')

    if self.transform:
      img = self.transform(img)
    return img, self.labels[idx]

def transform(img, img_size = (224, 224)):
  img = img.resize(img_size)
  img = np.array(img)[..., :3]
  img = torch.tensor(img).permute(2, 0, 1).float()
  return img / 255.0

In [6]:
train_dataset = SceneDataset(X_train, y_train, transform = transform)
val_dataset = SceneDataset(X_val, y_val, transform = transform)
test_dataset = SceneDataset(X_test, y_test, transform = transform)

train_batch_size = 64
test_batch_size = 8
train_dataloader = DataLoader(train_dataset, batch_size = train_batch_size, shuffle = is_shuffle)
test_dataloader = DataLoader(test_dataset, batch_size = test_batch_size, shuffle = False)
val_dataloader = DataLoader(val_dataset, batch_size = test_batch_size, shuffle = is_shuffle)

In [7]:
class BottleNeckBlock(nn.Module):
  def __init__(self, in_channels, growth_rate):
    super(BottleNeckBlock, self).__init__()
    self.bn1 = nn.BatchNorm2d(in_channels)
    self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size = 1, bias = False)
    self.bn2 = nn.BatchNorm2d(4* growth_rate)
    self.conv2 = nn.Conv2d(4* growth_rate, growth_rate, kernel_size = 3, padding = 1, bias = False)
    self.relu = nn.ReLU()

  def forward(self, x):
    res = x.clone().detach()
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv1(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = torch.cat([res, x], 1)
    return x

class DenseBlock(nn.Module):
  def __init__(self, num_layers, in_channels, growth_rate):
    super(DenseBlock, self).__init__()
    layers = []
    for i in range(num_layers):
      layers.append(BottleNeckBlock(in_channels + i* growth_rate, growth_rate))
    self.block = nn.Sequential(*layers)

  def forward(self, x):
    return self.block(x)

In [8]:
class DenseNet(nn.Module):
  def __init__(self, num_blocks, growth_rate, num_classes):
    super(DenseNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 2 * growth_rate, kernel_size = 7, stride = 2, padding = 3, bias = True)
    self.bn1 = nn.BatchNorm2d(2* growth_rate)
    self.pool1 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)

    self.dense_blocks = nn.ModuleList()
    in_channels = 2* growth_rate
    for i, num_layers in enumerate(num_blocks):
      self.dense_blocks.append(DenseBlock(num_layers, in_channels, growth_rate))
      in_channels += num_layers * growth_rate
      if i != len(num_blocks) - 1:
        out_channels = in_channels // 2
        self.dense_blocks.append(nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.Conv2d(in_channels, out_channels, kernel_size = 1, bias = False),
            nn.AvgPool2d(kernel_size = 2, stride = 2)
        )
        )
        in_channels = out_channels
    self.bn2 = nn.BatchNorm2d(in_channels)
    self.relu = nn.ReLU()
    self.pool2 = nn.AvgPool2d(kernel_size = 7)
    self.fc = nn.Linear(in_channels, num_classes)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.pool1(x)
    for block in self.dense_blocks:
      x = block(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.pool2(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x


In [9]:
n_classes = len(list(classes.keys()))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = DenseNet([6, 12, 24, 16], growth_rate = 32, num_classes = n_classes).to(device)

In [16]:
def evaluate(model, test_dataloader, criterion, device):
  model.eval()
  correct = 0
  total = 0
  losses = []
  with torch.no_grad():
    for images, labels in test_dataloader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      loss = criterion(outputs, labels)
      losses.append(loss.item())
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  val_acc = correct / total
  val_loss = sum(losses) / len(losses)
  return val_acc, val_loss



In [11]:
def fit(model, train_dataloader, criterion, optimizer, num_epochs, device):
  train_losses = []
  train_accuracies = []
  for epoch in range(num_epochs):
    batch_train_losses = []
    correct = 0
    total = 0
    model.train()
    for idx, (inputs, labels) in enumerate(train_dataloader):
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      batch_train_losses.append(loss.item())
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    acc = correct / total
    train_loss = sum(batch_train_losses) / len(batch_train_losses)
    train_losses.append(train_loss)
    train_accuracies.append(acc)
    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {acc:.4f}')
  return train_losses, train_accuracies


In [13]:
lr = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = lr)
num_epochs = 50
train_losses, train_accuracies = fit(model, train_dataloader, criterion, optimizer, num_epochs, device)

Epoch 1/50, Train Loss: 0.4785, Train Acc: 0.8438
Epoch 2/50, Train Loss: 0.4554, Train Acc: 0.8509
Epoch 3/50, Train Loss: 0.4421, Train Acc: 0.8536
Epoch 4/50, Train Loss: 0.4269, Train Acc: 0.8597
Epoch 5/50, Train Loss: 0.4158, Train Acc: 0.8583
Epoch 6/50, Train Loss: 0.4061, Train Acc: 0.8597
Epoch 7/50, Train Loss: 0.3981, Train Acc: 0.8650
Epoch 8/50, Train Loss: 0.3920, Train Acc: 0.8583
Epoch 9/50, Train Loss: 0.3828, Train Acc: 0.8655
Epoch 10/50, Train Loss: 0.3749, Train Acc: 0.8666
Epoch 11/50, Train Loss: 0.3723, Train Acc: 0.8711
Epoch 12/50, Train Loss: 0.3685, Train Acc: 0.8695
Epoch 13/50, Train Loss: 0.3601, Train Acc: 0.8740
Epoch 14/50, Train Loss: 0.3579, Train Acc: 0.8693
Epoch 15/50, Train Loss: 0.3492, Train Acc: 0.8744
Epoch 16/50, Train Loss: 0.3458, Train Acc: 0.8767
Epoch 17/50, Train Loss: 0.3402, Train Acc: 0.8798
Epoch 18/50, Train Loss: 0.3365, Train Acc: 0.8778
Epoch 19/50, Train Loss: 0.3340, Train Acc: 0.8787
Epoch 20/50, Train Loss: 0.3331, Train A

In [17]:
val_accuracies, val_losses = evaluate(model, val_dataloader, criterion, device)


KeyboardInterrupt: 