In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
import torchvision.transforms as transforms
import torch.nn.functional as F

In [2]:
transform = transforms.Compose([
    transforms.Resize((50, 50)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    # transforms.Normalize((0,5, ), (0.5, ))
])

In [3]:
batch_size = 4
trainset = torchvision.datasets.ImageFolder(root = '/content/data/train', transform=transform)
testset = torchvision.datasets.ImageFolder(root = '/content/data/test', transform=transform)

trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=True)

In [4]:
class ImageMultiClassClassification(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 32, 3) # BS, 32, 48 * 48
    self.pool = nn.MaxPool2d(2, 2) # BS, 32, 24, 24
    self.relu = nn.ReLU()
    self.conv2 = nn.Conv2d(32, 64, 3) # BS, 64, 22, 22
    self.conv3 = nn.Conv2d(64, 64, 3)
    self.softmax = nn.Softmax()
    self.fc1 = nn.Linear(64*4*4, 128)
    self.fc2 = nn.Linear(128, 3)

  def forward(self, x):
    # input image size = BS, 1, 50, 50
    x = self.conv1(x) # BS, 32, 48, 48
    x = self.pool(x) # BS, 32, 24, 24
    x = self.relu(x)
    x = self.conv2(x) # BS, 64, 22, 22
    x = self.pool(x) # BS, 64, 11, 11
    x = self.relu(x)
    x = self.conv3(x) # BS, 64, 9, 9
    x = self.pool(x) # BS, 64, 4, 4
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.softmax(x)
    return x

model = ImageMultiClassClassification()
print(model)

ImageMultiClassClassification(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (softmax): Softmax(dim=None)
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=3, bias=True)
)


In [5]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [6]:
epochs = 10
for epoch in range(epochs):
  for i, data in enumerate(trainloader, 0):
    input, labels = data
    optimizer.zero_grad()
    y_hat = model(input)
    # print("y_hat : ", y_hat)
    loss = loss_fn(y_hat, labels)
    loss.backward()
    optimizer.step()

  print("loss : ", loss)

  return self._call_impl(*args, **kwargs)


loss :  tensor(0.7180, grad_fn=<NllLossBackward0>)
loss :  tensor(0.6898, grad_fn=<NllLossBackward0>)
loss :  tensor(0.6748, grad_fn=<NllLossBackward0>)
loss :  tensor(0.7863, grad_fn=<NllLossBackward0>)
loss :  tensor(0.7326, grad_fn=<NllLossBackward0>)
loss :  tensor(0.5568, grad_fn=<NllLossBackward0>)
loss :  tensor(0.5520, grad_fn=<NllLossBackward0>)
loss :  tensor(0.8015, grad_fn=<NllLossBackward0>)
loss :  tensor(0.5524, grad_fn=<NllLossBackward0>)
loss :  tensor(0.5515, grad_fn=<NllLossBackward0>)


In [7]:
y_test = []
y_test_hat = []
for i, data in enumerate(testloader, 0):
    inputs, y_test_temp = data
    with torch.no_grad():
        y_test_hat_temp = model(inputs).round()

    y_test.extend(y_test_temp.numpy())
    y_test_hat.extend(y_test_hat_temp.numpy())

# %%
acc = accuracy_score(y_test, np.argmax(y_test_hat, axis=1))
print(f'Accuracy: {acc*100:.2f} %')

Accuracy: 93.33 %


In [8]:
confusion_matrix(y_test, np.argmax(y_test_hat, axis=1))

array([[19,  0,  1],
       [ 0, 20,  0],
       [ 3,  0, 17]])