In [1]:
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SubsetRandomSampler, ConcatDataset
from torch.utils.data import random_split


import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torchvision.datasets import ImageFolder

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import PIL

In [2]:
LEARNING_RATE = 0.001
EPOCHS = 3
IMAGE_SIZE = 100
BATCH_SIZE = 256
PATH = 'Training_dataset'

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Flatten(), 
            nn.Linear(9216, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 131))
        
    def forward(self, xb):
        return self.network(xb)

In [4]:
def random_split_ratio(dataset, test_size=.2, random_state=None):
    L = len(dataset)
    n_second = int(L*test_size)
    n_first = L - n_second
    if random_state:
        first_split, second_split = random_split(dataset, lengths=[n_first, n_second], generator=torch.Generator().manual_seed(random_state))
    else:
        first_split, second_split = random_split(dataset, lengths=[n_first, n_second])

    return first_split, second_split

In [5]:
def verify_image(fp):
    try:
        PIL.Image.open(fp).verify()
        return True
    except:
        return False

In [6]:
model = CNN()

In [7]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=20),
    transforms.RandomResizedCrop(size=(IMAGE_SIZE, IMAGE_SIZE), scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [8]:
dataset = ImageFolder(root=PATH, transform=transform, is_valid_file=verify_image)

train_dataset, valid_dataset = random_split_ratio(dataset, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [9]:
for epoch in range(EPOCHS):
    model.train()
    
    epoch_train_running_loss = 0
    epoch_valid_running_loss = 0
    
    bar = tqdm(enumerate(train_loader), total=len(train_loader.dataset)/train_loader.batch_size)
    
    for batch_idx, (x_batch, y_batch) in bar:
        x_batch, y_batch = x_batch.to('cpu'), y_batch.to('cpu')
        outs = model(x_batch)
        loss = criterion(outs, y_batch)
        loss.backward() 
        optimizer.step()
        optimizer.zero_grad()

        epoch_train_running_loss += (loss.item() * x_batch.size(0))

    print(f'Epoch {epoch + 1}/{EPOCHS}, Loss: {loss.item()}')
    
    with torch.no_grad():
        
        model.eval()

        valid_predict = []  
        valid_targets = []  
        
        vbar = tqdm(valid_loader, total=len(valid_loader.dataset)/valid_loader.batch_size)
        
        for x_batch, y_batch in vbar:
            x_batch, y_batch = x_batch.to('cpu'), y_batch.to('cpu')
            outs = model(x_batch)
            loss = criterion(outs, y_batch)
            
            epoch_valid_running_loss += (loss.item() * x_batch.size(0))

            valid_predict.extend(torch.argmax(outs, dim=1).cpu().numpy())
            valid_targets.extend(y_batch.cpu().numpy())
            
        model.train()
    
    average_train_loss = epoch_train_running_loss / len(train_loader.dataset)
    average_valid_loss = epoch_valid_running_loss / len(valid_loader.dataset)
    validation_accuracy = accuracy_score(valid_targets, valid_predict)

    print(f"epoch {epoch+1}/{EPOCHS} | avg. training loss: {average_train_loss:.3f}, avg. validation loss: {average_valid_loss:.3f}, validation accuracy: {validation_accuracy:.3f}")


  full_bar = Bar(frac,
100%|████████████████████████████| 212/211.5390625 [1:53:17<-1:59:46, 32.06s/it]


Epoch 1/3, Loss: 2.634983539581299


100%|███████████████████████████████████| 53/52.8828125 [06:21<00:00,  7.20s/it]


epoch 1/3 | avg. training loss: 3.663, avg. validation loss: 2.641, validation accuracy: 0.200


  full_bar = Bar(frac,
100%|████████████████████████████| 212/211.5390625 [1:34:41<-1:59:48, 26.80s/it]


Epoch 2/3, Loss: 1.2720056772232056


100%|███████████████████████████████████| 53/52.8828125 [06:18<00:00,  7.15s/it]


epoch 2/3 | avg. training loss: 1.989, avg. validation loss: 1.218, validation accuracy: 0.601


  full_bar = Bar(frac,
100%|████████████████████████████| 212/211.5390625 [1:45:50<-1:59:47, 29.96s/it]


Epoch 3/3, Loss: 0.6703135371208191


100%|███████████████████████████████████| 53/52.8828125 [07:31<00:00,  8.52s/it]

epoch 3/3 | avg. training loss: 0.808, avg. validation loss: 0.482, validation accuracy: 0.832



