In [6]:
!unzip "/content/img_cls_weather_dataset.zip" -d "/content/"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/weather-dataset/dataset/frost/3922.jpg  
  inflating: /content/weather-dataset/dataset/frost/3923.jpg  
  inflating: /content/weather-dataset/dataset/frost/3924.jpg  
  inflating: /content/weather-dataset/dataset/frost/3925.jpg  
  inflating: /content/weather-dataset/dataset/frost/3926.jpg  
  inflating: /content/weather-dataset/dataset/frost/3927.jpg  
  inflating: /content/weather-dataset/dataset/frost/3928.jpg  
  inflating: /content/weather-dataset/dataset/frost/3929.jpg  
  inflating: /content/weather-dataset/dataset/frost/3930.jpg  
  inflating: /content/weather-dataset/dataset/frost/3931.jpg  
  inflating: /content/weather-dataset/dataset/frost/3932.jpg  
  inflating: /content/weather-dataset/dataset/frost/3933.jpg  
  inflating: /content/weather-dataset/dataset/frost/3934.jpg  
  inflating: /content/weather-dataset/dataset/frost/3935.jpg  
  inflating: /content/weather-dataset/dataset/frost/3

In [7]:
import torch
import torch.nn as nn
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [8]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 59
set_seed(seed)

In [10]:
root_dir = '/content/weather-dataset/dataset'
img_paths = []
labels = []
classes = {
    label_idx: class_name \
        for label_idx, class_name in enumerate(
                sorted(os.listdir(root_dir))
            )
}

for label_idx, class_name in classes.items():
    class_dir = os.path.join(root_dir, class_name)
    for img_filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_filename)
        img_paths.append(img_path)
        labels.append(label_idx)

In [11]:
val_size = 0.2
test_size = 0.125
is_shuffle = True

X_train, X_val, y_train, y_val = train_test_split(
    img_paths, labels,
    test_size=val_size,
    shuffle=is_shuffle
)

X_val, X_test, y_val, y_test = train_test_split(
    X_val, y_val,
    test_size=test_size,
    shuffle=is_shuffle
)

In [12]:
class WeatherDataset(Dataset):
    def __init__(
        self,
        X, y,
        transform=None
    ):
        self.img_paths = X
        self.labels = y
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, self.labels[idx]

In [13]:
def transform(img, img_size=(224,224)):
    img = img.resize(img_size)
    img = np.array(img)[..., :3]
    img = torch.tensor(img).permute(2, 0, 1).float()
    normalized_img = img / 255.0

    return normalized_img

In [14]:
train_dataset = WeatherDataset(
    X_train, y_train,
    transform=transform
)

val_dataset = WeatherDataset(
    X_val, y_val,
    transform=transform
)

test_dataset = WeatherDataset(
    X_test, y_test,
    transform=transform
)

In [15]:
train_batch_size = 512
test_batch_size = 8

train_loader = DataLoader(
    train_dataset,
    batch_size=train_batch_size,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=test_batch_size,
    shuffle=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=test_batch_size,
    shuffle=False
)

In [16]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels, out_channels, kernel_size=3, stride=stride, padding=1
        )
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(
            out_channels, out_channels, kernel_size=3, stride=1, padding=1
        )
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
        self.relu = nn.ReLU()


    def forward(self, x):
        shortcut = x.clone()
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x += self.downsample(shortcut)
        x = self.relu(x)

        return x

In [17]:
class ResNet(nn.Module):
    def __init__(self, residual_block, n_blocks_lst, n_classes):
        super (ResNet, self ).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = self.create_layer(residual_block, 64, 64, n_blocks_lst[0], 1)
        self.conv3 = self.create_layer(residual_block, 64, 128, n_blocks_lst[1], 2)
        self.conv4 = self.create_layer(residual_block, 128, 256, n_blocks_lst[2], 2)
        self.conv5 = self.create_layer(residual_block, 256, 512, n_blocks_lst[3], 2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(512, n_classes)

    def create_layer(self, residual_block, in_channels, out_channels, n_blocks, stride):
        blocks = []
        first_block = residual_block(in_channels, out_channels, stride)
        blocks.append(first_block)

        for idx in range(1, n_blocks):
            block = residual_block(out_channels, out_channels, stride)
            blocks.append(block)

        block_sequential = nn.Sequential(*blocks)
        return block_sequential

    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.maxpool(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc1(x)

        return x

In [18]:
n_classes = len(list(classes.keys()))
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = ResNet(
    residual_block=ResidualBlock,
    n_blocks_lst=[2, 2, 2, 2],
    n_classes=n_classes
).to(device)

In [19]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct, total = 0, 0
    losses = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            losses.append(loss.item())

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = correct / total
    loss = sum(losses) / len(losses)
    return loss, acc


In [20]:
def fit(
    model, train_dataloader, val_dataloader, criterion, optimizer, device, epochs
):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        batch_train_lossses = []

        model.train()
        for idx, (inputs, labels) in enumerate(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_train_lossses.append(loss.item())

        train_loss = sum(batch_train_lossses) / len(batch_train_lossses)

        train_losses.append(train_loss)

        val_loss, val_acc = evaluate(model, val_dataloader, criterion, device)

        val_losses.append(val_loss)

        print(f'EPOCH {epoch + 1}:\tTrain loss: {train_loss:.4f}\tVal loss: {val_loss:.4f}')

    return train_losses, val_losses

In [21]:
lr=1e-2
epochs=25

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

train_losses, val_losses = fit(
    model, train_loader, val_loader, criterion, optimizer, device, epochs
)

EPOCH 1:	Train loss: 1.8591	Val loss: 2.3834
EPOCH 2:	Train loss: 1.2584	Val loss: 2.3625
EPOCH 3:	Train loss: 0.9795	Val loss: 2.1546
EPOCH 4:	Train loss: 0.7819	Val loss: 1.5431
EPOCH 5:	Train loss: 0.6314	Val loss: 1.1446
EPOCH 6:	Train loss: 0.4854	Val loss: 1.1338
EPOCH 7:	Train loss: 0.3843	Val loss: 1.0264
EPOCH 8:	Train loss: 0.2659	Val loss: 1.0297
EPOCH 9:	Train loss: 0.1929	Val loss: 0.9533
EPOCH 10:	Train loss: 0.1334	Val loss: 0.9361
EPOCH 11:	Train loss: 0.1044	Val loss: 0.9746
EPOCH 12:	Train loss: 0.1037	Val loss: 0.9213
EPOCH 13:	Train loss: 0.0587	Val loss: 0.9151
EPOCH 14:	Train loss: 0.0502	Val loss: 0.9633
EPOCH 15:	Train loss: 0.0417	Val loss: 0.9243
EPOCH 16:	Train loss: 0.0338	Val loss: 0.9196
EPOCH 17:	Train loss: 0.0282	Val loss: 0.9175
EPOCH 18:	Train loss: 0.0261	Val loss: 0.9356
EPOCH 19:	Train loss: 0.0371	Val loss: 0.9314
EPOCH 20:	Train loss: 0.0200	Val loss: 0.9307
EPOCH 21:	Train loss: 0.0184	Val loss: 0.9247
EPOCH 22:	Train loss: 0.0174	Val loss: 0.93

In [22]:
val_loss, val_acc = evaluate(model, val_loader, criterion, device)

test_loss, test_acc = evaluate(model, test_loader, criterion, device)

print(f'Val loss: {val_loss:.4f}\tVal acc: {val_acc:.4f}')
print(f'Test loss: {test_loss:.4f}\tTest acc: {test_acc:.4f}')

Val loss: 0.9423	Val acc: 0.7119
Test loss: 0.7487	Test acc: 0.7326
