# **Classification of the "Intel Image Classification" Dataset**

We Start by importing the needed libraries

In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
from time import perf_counter

from random import randint as rand
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from torchvision.transforms import v2
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)
print(f"Using {device} device")

In [None]:
path = Path("./Data/seg_train/seg_train")

img = []
targets = []

for folder in path.iterdir():
    for file in folder.iterdir():
        img.append(v2.Resize((150,150))(read_image(str(file))))
        targets.append(folder.name)

In [None]:
img = torch.stack(img)

In [None]:
target_dict = np.unique(targets)
targets = torch.tensor([np.where(target_dict == target)[0][0] for target in targets])

In [None]:
class ImageDataset(Dataset):
    def __init__(self, img, targets):
        self.img = img.clone().detach().to(device).to(torch.float32)
        self.targets = targets.clone().detach().to(device).to(torch.float32)

    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):
        img = self.img[idx]
        target = self.targets[idx]
        return img, target

In [None]:
data = ImageDataset(img, targets)

In [None]:
img.shape

In [None]:
r = rand(0, len(img))
plt.imshow(v2.ToPILImage()(img[r]))
plt.show()

In [None]:
r

In [None]:
targets[r]

Data targets:

0. buildings
1. forest
2. glacier
3. mountain
4. sea
5. street

In [None]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5) # Input size - Kernel size / Stride + 1 = Output size (6,146,146)
        self.pool = nn.MaxPool2d(2, 2) # Divide the size by 2 (6,73,73)
        self.conv2 = nn.Conv2d(6, 16, 5) # (16,69,69) -> (16,34,34) (Because of the pooling layer (again)) -> Flatten (16*34*34) = 18496
        
        #Dense layers
        
        self.fc1 = nn.Linear(16 * 34 * 34, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 6)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
model = Model()
model.cuda()

In [None]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
data.img.dtype

In [None]:
data.targets.dtype

In [None]:
loader = DataLoader(data, batch_size=4, shuffle=True)

epoch = 100
time_start = perf_counter()
for e in range(epoch):
    for i, (img, target) in enumerate(loader):
        target = target.long()
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"Epoch: {e}, Loss: {loss}")
time_end = perf_counter()
print(f"Time taken: {time_end - time_start}")

In [None]:
def predict(img):
    return target_dict[torch.argmax(model(img)).item()]

Data targets:

0. buildings
1. forest
2. glacier
3. mountain
4. sea
5. street

In [None]:
for i in range(10):
    r = rand(0, len(data.img))
    print(f"{r = }")
    print(f"Predicted: {predict(data.img[r].unsqueeze(0))}")
    print(f"Real: {target_dict[int(data.targets[r])]}")

In [None]:
accuracy = 0
for i in range(10000):
    r = rand(0, len(data.img))
    if predict(data.img[r].unsqueeze(0)) == target_dict[int(data.targets[r])]:
        accuracy += 1

In [None]:
print(f"Accuracy: {accuracy/10000*100}%")

Epochs / Accuracy: 

- 10 : 18%
- 30 : 30%
- 50 : 60%
- 100 : 70%
- 150 : 18%