In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
import zipfile
import os

# Path to the zip file (Adjust as per your Google Drive structure)
zip_path = '/content/drive/MyDrive/AML Mod 13 data/archive 5.49.00 PM.zip'

# Destination folder where the zip file will be extracted
extract_to = '/content/sample_data/'

# Unzipping the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)


In [3]:
# Question1

import os
import cv2
import numpy as np

IMGSIZE = (128, 128)
CNAMES = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

# Define paths for training and testing data
base_path = extract_to
train_path = os.path.join(base_path, 'seg_train/seg_train/')
test_path = os.path.join(base_path, 'seg_test/seg_test/')

X_tr, y_tr = [], []
X_ts, y_ts = [], []

# Load training data
for label in CNAMES:
    path = train_path + label
    for f in sorted([_ for _ in os.listdir(path) if _.lower().endswith('.jpg')]):
        img = cv2.imread(os.path.join(path, f))
        img_resized = cv2.resize(img, IMGSIZE)
        X_tr.append(img_resized)
        y_tr.append(CNAMES.index(label))

# Convert lists to numpy arrays
X_tr = np.array(X_tr)
y_tr = np.array(y_tr)

# Load test data
X_ts, y_ts = [], []
for label in CNAMES:
    path = test_path + label
    for f in sorted([_ for _ in os.listdir(path) if _.lower().endswith('.jpg')]):
        img = cv2.imread(os.path.join(path, f))
        img_resized = cv2.resize(img, IMGSIZE)
        X_ts.append(img_resized)
        y_ts.append(CNAMES.index(label))

# Convert lists to numpy arrays
X_ts = np.array(X_ts)
y_ts = np.array(y_ts)


In [4]:
# Question 1

import torch
from torch.utils.data import TensorDataset, DataLoader
import torchvision.transforms as transforms

# Convert data to PyTorch tensors and create datasets
tensor_x_tr = torch.Tensor(X_tr) / 255.0  # Scale images to [0, 1]
tensor_x_tr = tensor_x_tr.permute(0, 3, 1, 2)  # Reshape to [N, C, H, W]
tensor_y_tr = torch.Tensor(y_tr).long()

tensor_x_ts = torch.Tensor(X_ts) / 255.0
tensor_x_ts = tensor_x_ts.permute(0, 3, 1, 2)
tensor_y_ts = torch.Tensor(y_ts).long()

train_dataset = TensorDataset(tensor_x_ts, tensor_y_ts) # training dataset actually has the test data
test_dataset = TensorDataset(tensor_x_tr, tensor_y_tr)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [5]:
# Question 1

import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 32 * 32, 512)
        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [6]:
# Question 1

import torch.optim as optim

model = CNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training the model
def train_model(num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1} completed, Loss: {running_loss / len(train_loader)}')

train_model()


Epoch 1 completed, Loss: 1.6862241657490427
Epoch 2 completed, Loss: 1.0510969301487536
Epoch 3 completed, Loss: 0.9075314751330842
Epoch 4 completed, Loss: 0.7939764432450558
Epoch 5 completed, Loss: 0.6213615752281026
Epoch 6 completed, Loss: 0.3801120655967834
Epoch 7 completed, Loss: 0.18967113366469424
Epoch 8 completed, Loss: 0.0901393237067981
Epoch 9 completed, Loss: 0.060291363491102104
Epoch 10 completed, Loss: 0.024450086859708772


In [7]:
# Question 1

def evaluate_model(data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Reclassification Performance: {accuracy}%')

evaluate_model(train_loader)


Reclassification Performance: 99.9%


In [8]:
# Question 2

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNWithDropout(nn.Module):
    def __init__(self):
        super(CNNWithDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)  # Adding dropout
        self.fc1 = nn.Linear(64 * 32 * 32, 512)
        self.dropout2 = nn.Dropout(0.5)  # Adding dropout
        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout1(x)
        x = x.view(-1, 64 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x


In [9]:
# Question 2

# Load your model
model = CNNWithDropout()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

# Training the model
def train_model():
    model.train()
    for epoch in range(10):  # Continue with 10 epochs
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1} completed, Loss: {running_loss / len(train_loader)}')
    print('Training complete')

train_model()


Epoch 1 completed, Loss: 1.3889086791809568
Epoch 2 completed, Loss: 1.0200879307503397
Epoch 3 completed, Loss: 0.8458650999880851
Epoch 4 completed, Loss: 0.7090086718188956
Epoch 5 completed, Loss: 0.5668134652870767
Epoch 6 completed, Loss: 0.48024125872774326
Epoch 7 completed, Loss: 0.3306287279788484
Epoch 8 completed, Loss: 0.2682206523703768
Epoch 9 completed, Loss: 0.22305361783884942
Epoch 10 completed, Loss: 0.14974628698001516
Training complete


In [10]:
# Question 2

def evaluate_model(data_loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

reclassification_performance = evaluate_model(train_loader)
print(f'Reclassification Performance: {reclassification_performance}%')


Reclassification Performance: 98.63333333333334%


**Question 2**

**Why Decreased Performance Standard Deviation Indicates Robustness:**
A model with a lower standard deviation in performance across different runs or datasets is considered more robust because it shows consistent results regardless of slight variations in the input data. This consistency is crucial in practical applications where input data can vary in quality or specifics. Regularization techniques like dropout help achieve lower variance in performance by preventing the model from overfitting to noise or specific details in the training set, thereby improving the model's ability to generalize to new data.


In [11]:
# Question 3

class CNNWithBatchNorm(nn.Module):
    def __init__(self):
        super(CNNWithBatchNorm, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 32 * 32, 512)
        self.bn3 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.dropout1(x)
        x = x.view(-1, 64 * 32 * 32)
        x = F.relu(self.bn3(self.fc1(x)))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

net_with_bn = CNNWithBatchNorm()


In [12]:
# Question 3

best_val_loss = float('inf')
patience = 2
trigger_times = 0

for epoch in range(20):  # increase epochs since we're using early stopping
    net_with_bn.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net_with_bn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation loss
    net_with_bn.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = net_with_bn(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
    else:
        trigger_times += 1
        if trigger_times >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

    print(f'Epoch {epoch + 1} loss: {running_loss / len(train_loader)}, val_loss: {val_loss / len(test_loader)}')


Epoch 1 loss: 1.9306779947686703, val_loss: 1.8270292635116057
Epoch 2 loss: 1.929624136458052, val_loss: 1.8260516229686
Epoch 3 loss: 1.9288318829333528, val_loss: 1.826689156149949
Epoch 4 loss: 1.9162208577419848, val_loss: 1.8246269557361994
Epoch 5 loss: 1.920689142764883, val_loss: 1.8264216578359755
Early stopping at epoch 5


In [13]:
def evaluate_model(data_loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

reclassification_performance = evaluate_model(train_loader)
print(f'Reclassification Performance: {reclassification_performance}%')


Reclassification Performance: 98.63333333333334%
