In [10]:
import torch
import torch.nn as nn
import numpy as np 
import torch 
import torch.nn as nn
import os 
import glob
from PIL import Image
import torch.utils.data as data
from torchvision import models, transforms as T
from tqdm import tqdm_notebook
from torchvision import transforms, datasets
import torch.optim as optim
import time

### Implementation of AlexNet
# ![title](AlexNet-architecture-Includes-5-convolutional-layers-and-3-fullyconnected-layers.png)

In [74]:
class AlexNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1=nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=0)
        self.relu=nn.ReLU(inplace=True)
        self.conv2=nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,padding=2)
        self.pool=nn.MaxPool2d(kernel_size=3,stride=2)
        self.conv3=nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,padding=1)
        ### another pooling layer 27--->13
        self.conv4=nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,padding=1)
        self.conv5=nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,padding=1)
        #### another pooling before the maxpooling 13 --> 6
        self.do1 = nn.Dropout(0.5)   
        self.fc1=nn.Linear(256*6*6,4096)
        self.do2 = nn.Dropout(0.5)
        self.fc2=nn.Linear(4096,4096)
        self.do3 = nn.Dropout(0.5)
        ### For IntelImage https://www.kaggle.com/datasets/puneet6060/intel-image-classification  -- so final layer has 6
        self.fc3=nn.Linear(4096,6)
    
    def forward(self,x):
        x=self.relu(self.conv1(x))
        x=self.relu(self.pool(self.conv2(x)))
        x=self.relu(self.pool(self.conv3(x)))
        x=self.relu(self.conv4(x))
        x=self.relu(self.pool(self.conv5(x)))
        x = torch.flatten(x, 1)
        x=self.relu(self.fc1(x))
        x=self.relu(self.fc2(x))
        x=self.fc3(x)
        return x
        
        
        

In [44]:
model=AlexNet()

In [45]:
model

AlexNet(
  (conv1): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
  (relu): ReLU(inplace=True)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (do1): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=4096, bias=True)
  (do2): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (do3): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=4096, out_features=6, bias=True)
)

In [46]:
train_data_dir = 'D:\\fastai\Beginning\pytorch\Chapters\Classification\seg_train\seg_train'
test_data_dir = 'D:\\fastai\Beginning\pytorch\Chapters\Classification\seg_test\seg_test'
batch_size=32


In [47]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [69]:

transform = transforms.Compose(
    [transforms.Resize([227,227]),
     transforms.ToTensor(),
    transforms.Normalize(mean=[0.4489, 0.4713, 0.4594], std=[0.2436, 0.2379, 0.2648], inplace=True)
    ])

train_dataset = datasets.ImageFolder(train_data_dir, 
                               transform=transform)

train_dataloader = data.DataLoader(train_dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
  
test_dataset = datasets.ImageFolder(test_data_dir, 
                               transform=transform)

test_dataloader = data.DataLoader(test_dataset, 
                                              batch_size=batch_size,
                                              shuffle=False,
                                              drop_last=True)

In [70]:
def batch_mean_and_sd(loader):
    
    cnt = 0
    fst_moment = torch.empty(3)
    snd_moment = torch.empty(3)

    for images, _ in loader:
        b, c, h, w = images.shape
        nb_pixels = b * h * w
        sum_ = torch.sum(images, dim=[0, 2, 3])
        sum_of_square = torch.sum(images ** 2,
                                  dim=[0, 2, 3])
        fst_moment = (cnt * fst_moment + sum_) / (
                      cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / (
                            cnt + nb_pixels)
        cnt += nb_pixels

    mean, std = fst_moment, torch.sqrt(
      snd_moment - fst_moment ** 2)        
    return mean,std

In [50]:
mean, std = batch_mean_and_sd(train_dataloader)

In [51]:
mean,std

(tensor([-2.6859e-05,  1.2928e-05,  2.3778e-04]),
 tensor([1.0000, 0.9999, 1.0000]))

In [87]:
model = AlexNet()

if torch.cuda.is_available():
    model.cuda()
        
epochs = 20
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
train_losses = []
test_losses =[]
test_accuracy = []

In [89]:
n_epochs = 10
for epoch in range(n_epochs):
    model.train()
    training_loss = 0
    for data, target in train_dataloader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        training_loss += loss.item()

    model.eval()
    valid_loss = 0
    with torch.no_grad():
        for data, target in test_dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target) 
            valid_loss += loss.item()

    training_loss /= len(train_dataloader)
    valid_loss /= len(test_dataloader)
    print(f'Epoch: {epoch+1}/{n_epochs}.. Training loss: {training_loss}.. Validation Loss: {valid_loss}')

Epoch: 1/10.. Training loss: 1.6710090322853768.. Validation Loss: 1.3411654060886753
Epoch: 2/10.. Training loss: 1.1236276509554963.. Validation Loss: 1.0337562913535743
Epoch: 3/10.. Training loss: 1.0181729496886196.. Validation Loss: 0.9342211725250367
Epoch: 4/10.. Training loss: 0.9456320194076729.. Validation Loss: 0.9137428752196732
Epoch: 5/10.. Training loss: 0.8870949682579737.. Validation Loss: 0.8611683488212606
Epoch: 6/10.. Training loss: 0.8470055818829907.. Validation Loss: 0.9544979397968579
Epoch: 7/10.. Training loss: 0.7985458267878179.. Validation Loss: 0.7637127218067005
Epoch: 8/10.. Training loss: 0.7299161916726256.. Validation Loss: 0.7378473706463332
Epoch: 9/10.. Training loss: 0.6800540484249864.. Validation Loss: 0.7914672280030866
Epoch: 10/10.. Training loss: 0.6363267878147021.. Validation Loss: 0.7282481727500757


In [90]:
def predict(model, batch_size=32, device=device, dataloader=test_dataloader):
    classes = ('buildings', 'forest', 'glacier', 'mountain',
           'sea', 'street')
    
    with torch.no_grad():
        n_correct = 0
        n_samples = 0
        n_class_correct = [0 for i in range(6)]
        n_class_samples = [0 for i in range(6)]
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()
        
            for i in range(batch_size):
                label = labels[i]
                pred = predicted[i]
                if (label == pred):
                    n_class_correct[label] += 1
                n_class_samples[label] += 1

        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network: {acc} %')

        for i in range(6):
            acc = 100.0 * n_class_correct[i] / n_class_samples[i]
            print(f'Accuracy of {classes[i]}: {acc} %')

In [91]:
model.eval()
predict(model)

Accuracy of the network: 73.95833333333333 %
Accuracy of buildings: 56.52173913043478 %
Accuracy of forest: 92.61603375527426 %
Accuracy of glacier: 89.5117540687161 %
Accuracy of mountain: 68.19047619047619 %
Accuracy of sea: 53.72549019607843 %
Accuracy of street: 81.34171907756813 %
