In [1]:
import torch
from torchvision import datasets, transforms
# from torchvision.transforms import v2
from torch.utils.data import DataLoader, Dataset, Subset, random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as  np
from torchvision import utils
import matplotlib.pyplot as plt
import os

In [2]:
# !unzip "Train_dataset_small.zip" -d "dataset"

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
path='dataset/Train_dataset_new'
n_channels = 3
n_classes = 10
batch_size = 64
learning_rate = 0.001
transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

In [5]:
dataset = datasets.ImageFolder(root=path, transform=transform)

# Split the dataset into train, test and validation.
total_dataset_len = len(dataset)
train_dataset_len = int(0.7 * total_dataset_len)
val_dataset_len = int(0.15 * total_dataset_len)
test_dataset_len = total_dataset_len - train_dataset_len - val_dataset_len
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_dataset_len, val_dataset_len, test_dataset_len])

train_loader = DataLoader(dataset=train_dataset, batch_size=64, num_workers=2, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=64, num_workers=2, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, num_workers=2, shuffle=False)
print(len(train_loader))

532


In [42]:
dataset.classes

['airport_terminal',
 'auditorium',
 'bedroom',
 'bookstore',
 'bus_station-indoor',
 'clothing_store',
 'computer_room',
 'food_court',
 'jewelry_shop',
 'railroad_track']

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

class AlaxNet_custom(nn.Module):
    def __init__(self, in_channels, out_classes):
        super(AlaxNet_custom, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=11, stride=4, padding=2)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=192, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm2d(192)
        self.conv3 = nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(384)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(256)
        
        self.relu = nn.ReLU(inplace=True)
        self.maxPool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(in_features=9216, out_features=4096)
        self.fc2 = nn.Linear(in_features=4096, out_features=4096)
        self.fc3 = nn.Linear(in_features=4096, out_features=out_classes)

    def forward(self, x):
        img = self.bn1(self.conv1(x))
        img = self.relu(img)
        img = self.maxPool(img)
        # 2nd
        img = self.bn2(self.conv2(img))
        img = self.relu(img)
        img = self.maxPool(img)
        # 3rd
        img = self.bn3(self.conv3(img))
        img = self.relu(img)
        # 4th
        img = self.bn4(self.conv4(img))
        img = self.relu(img)
        # 5th
        img = self.bn5(self.conv5(img))
        img = self.relu(img)
        img = self.maxPool(img)
        img = torch.flatten(img, 1)
        # 6th
        img = self.dropout(img)
        img = self.fc1(img)
        img = self.relu(img)
        # 7th
        img = self.dropout(img)
        img = self.fc2(img)
        img = self.relu(img)
        # output
        img = self.fc3(img)
        return img



# Create an instance of the model
model = AlaxNet_custom(3, 10)
# model.to(device)

# Print the model architecture
print(model)

# Reference: https://blog.paperspace.com/alexnet-pytorch/
# Reference: https://github.com/dansuh17/alexnet-pytorch/blob/d0c1b1c52296ffcbecfbf5b17e1d1685b4ca6744/model.py#L40

AlaxNet_custom(
  (conv1): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxPool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.

In [7]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, n_epochs):
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []

    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        train_loss = train_loss / len(train_loader)
        train_acc = (train_correct / train_total) * 100
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = (val_correct / val_total) * 100
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        print(f'Epoch {epoch+1}/{n_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

    return train_losses, train_accs, val_losses, val_accs

In [8]:
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [12]:
train_losses, train_accs, val_losses, val_accs = train_model(model, train_loader, val_loader, criterion, optimizer,scheduler, 20)

Epoch 1/20:
Train Loss: 0.7714, Train Acc: 74.2103
Val Loss: 0.8394, Val Acc: 72.2161
Epoch 2/20:
Train Loss: 0.7693, Train Acc: 74.3337
Val Loss: 0.8391, Val Acc: 72.2436
Epoch 3/20:
Train Loss: 0.7689, Train Acc: 74.4072
Val Loss: 0.8386, Val Acc: 72.1887
Epoch 4/20:
Train Loss: 0.7640, Train Acc: 74.6452
Val Loss: 0.8253, Val Acc: 72.7098
Epoch 5/20:
Train Loss: 0.7576, Train Acc: 74.7216
Val Loss: 0.8334, Val Acc: 72.0653
Epoch 6/20:
Train Loss: 0.7596, Train Acc: 74.6511
Val Loss: 0.8354, Val Acc: 72.5590
Epoch 7/20:
Train Loss: 0.7546, Train Acc: 74.9126
Val Loss: 0.8259, Val Acc: 72.5041
Epoch 8/20:
Train Loss: 0.7491, Train Acc: 74.8215
Val Loss: 0.8235, Val Acc: 72.6138
Epoch 9/20:
Train Loss: 0.7462, Train Acc: 74.9567
Val Loss: 0.8294, Val Acc: 72.3807
Epoch 10/20:
Train Loss: 0.7384, Train Acc: 75.3181
Val Loss: 0.8096, Val Acc: 74.0126
Epoch 11/20:
Train Loss: 0.7357, Train Acc: 75.3034
Val Loss: 0.8081, Val Acc: 73.6972
Epoch 12/20:
Train Loss: 0.7451, Train Acc: 75.0213


In [16]:
# torch.save(model.state_dict(), 'alaxnet_final_project_60.pth')

In [9]:
model.load_state_dict(torch.load('alaxnet_final_project_60.pth'))
model = model.to(device)

In [20]:
def eval_model(model, data_loader, type='validation'):
  correct=0
  total=0
  running_loss = 0.0
  with torch.no_grad():
    model.eval()
    for inputs, labels in data_loader:
      inputs, labels = inputs.to(device), labels.to(device)
      pred = model(inputs)
      total += labels.size(0)
      loss = criterion(pred, labels.squeeze().long())
      running_loss += loss.item()
      _, predicted = torch.max(pred.data, 1)
      correct += predicted.eq(labels.squeeze().long()).sum().item()
    # print(f'Accuracy of {type},  {100 * correct // total} %')
  test_val_loss = running_loss / len(data_loader)
  test_val_accuracy = 100 * correct / total
  return test_val_loss, test_val_accuracy

In [21]:
test_loss, test_accuracy = eval_model(model, test_loader, type='testing')

In [22]:
print(test_loss, test_accuracy)

0.7063335373736265 76.32300520976145


In [27]:
import cv2

image = cv2.imread('00000012.jpg')
image_size = 224  # Assuming this matches your training image size
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

resized = cv2.resize(image, (image_size, image_size))
normalized = (resized / 255.0 - mean)


In [36]:
transformed_image = torch.from_numpy(normalized.astype(np.float32)).permute(2, 0, 1)
transformed_image = transformed_image.unsqueeze(0) 
transformed_image = transformed_image.to(device)

In [43]:
model.eval()
with torch.no_grad():  # Disable gradient calculation for efficiency
    outputs = model(transformed_image)
    _, predicted = torch.max(outputs.data, 1)
    class_names = ['airport_terminal',
                     'auditorium',
                     'bedroom',
                     'bookstore',
                     'bus_station-indoor',
                     'clothing_store',
                     'computer_room',
                     'food_court',
                     'jewelry_shop',
                     'railroad_track']
    predicted_class = class_names[predicted.item()]
    print(f'Predicted class: {predicted_class}')


Predicted class: airport_terminal


In [39]:
predicted.item()

0