In [1]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SubsetRandomSampler, ConcatDataset
from torch.utils.data import random_split

import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torchvision.datasets import ImageFolder

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [2]:
LEARNING_RATE = 0.001
EPOCHS = 10
MAX_FILTERS = 64
IMAGE_SIZE = 100
BATCH_SIZE = 128
PATH = 'Training_dataset'

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer_1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),           
            nn.BatchNorm2d(MAX_FILTERS),                       
            nn.ReLU(),                                
            nn.MaxPool2d(2))                          
        
        self.conv_layer_2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),           
            nn.BatchNorm2d(MAX_FILTERS * 2),                       
            nn.ReLU(),                                
            nn.MaxPool2d(2))                           
        
        self.conv_layer_3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),           
            nn.BatchNorm2d(MAX_FILTERS * 4),                       
            nn.ReLU(),                                
            nn.MaxPool2d(2))
        
        self.conv_layer_4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),           
            nn.BatchNorm2d(MAX_FILTERS * 8),                       
            nn.ReLU(),                                
            nn.MaxPool2d(2))
        
        self.classifier = nn.Sequential(                    
            nn.Flatten(),                                     
            nn.Linear(in_features=18432, out_features=3144),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(in_features=3144, out_features=786),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(in_features=786, out_features=786),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(in_features=786, out_features=131),
            nn.Softmax(dim=1)
        )
        
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x: torch.Tensor):
        x = self.conv_layer_1(x)
        x = self.dropout(x) 
        x = self.conv_layer_2(x)
        x = self.dropout(x)                   
        x = self.conv_layer_3(x)
        x = self.dropout(x)
        x = self.conv_layer_4(x)
        x = self.dropout(x) 
        x = self.classifier(x)
        return x

In [4]:
model = CNN()

In [5]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=20),
    transforms.RandomResizedCrop(size=(IMAGE_SIZE, IMAGE_SIZE), scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
train_dataset = ImageFolder(root=PATH, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
for epoch in range(EPOCHS):
    model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{EPOCHS}, Loss: {loss.item()}')

In [None]:
model.eval()