In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.metrics import precision_score, recall_score, f1_score

from tqdm import tqdm
from torchsummary import summary

import matplotlib.pyplot as plt

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Caminho para o seu dataset
data_dir = './editedDataset'

# Transformação para converter as imagens em tensores
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Redimensiona as imagens (ajuste conforme necessário)
    transforms.ToTensor(),
])

# Carregar o dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# DataLoader para iterar pelo dataset
dataloader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=1)

def calculate_mean_std(dataloader):
    mean = 0.0
    std = 0.0
    total_images = 0

    for images, _ in dataloader:
        # Redimensiona o batch: (batch_size, 3, height, width) para (3, batch_size*height*width)
        images = images.view(3, -1)
        
        # Calcula a média e soma
        mean += images.mean(1)
        
        # Calcula o desvio padrão e soma
        std += images.std(1)
        
        total_images += 1

    # Média total
    mean /= total_images
    
    # Desvio padrão total
    std /= total_images

    return mean, std

# Calcula a média e o desvio padrão
mean, std = calculate_mean_std(dataloader)
print(f'Mean: {mean}')
print(f'Std: {std}')

Mean: tensor([0.4521, 0.4465, 0.4503])
Std: tensor([0.2017, 0.2024, 0.2017])


In [4]:
def data_loader(data_dir, batch_size, random_seed=42, train_size=0.8, valid_size=0.1, shuffle=True):
    # Define as transformações
    normalize = transforms.Normalize(
        mean=[0.4521, 0.4465, 0.4503],  # Ajuste esses valores para o seu dataset
        std=[0.2017, 0.2024, 0.2017],
    )

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize,
    ])

    # Carregar o dataset
    dataset = datasets.ImageFolder(
        root=data_dir,
        transform=transform
    )

    num_images = len(dataset)
    print(f"Total de imagens no dataset: {num_images}")

    num_train = len(dataset)
    indices = list(range(num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    # Divisão dos índices
    train_split = int(np.floor(train_size * num_train))
    valid_split = int(np.floor((train_size + valid_size) * num_train))

    train_idx, valid_idx, test_idx = indices[:train_split], indices[train_split:valid_split], indices[valid_split:]

    # Criar samplers
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    test_sampler = SubsetRandomSampler(test_idx)

    # Criar data loaders
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    print(f"Imagens de treinamento: {len(train_idx)}")
    print(f"Imagens de validação: {len(valid_idx)}")
    print(f"Imagens de teste: {len(test_idx)}")

    return (train_loader, valid_loader, test_loader)

In [5]:
batch_size = 8
train_size= 0.8
valid_size = 0.1

train_loader, valid_loader, test_loader = data_loader(data_dir=data_dir, batch_size=batch_size, 
                                                      train_size=train_size, valid_size=valid_size)

Total de imagens no dataset: 4194
Imagens de treinamento: 3355
Imagens de validação: 419
Imagens de teste: 420


In [6]:
class DSSCConv(nn.Module):
    def __init__(self, in_channels, mid_channels=None, kernel_size=3, padding=1):
        super(DSSCConv, self).__init__()

        if mid_channels is None:
            mid_channels = in_channels

        # Depthwise Separable Convolution for k2
        self.k2 = nn.Sequential(
                    nn.AvgPool2d(kernel_size=4, stride=4),
                    nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=in_channels),
                    nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(in_channels),
                    )
        
        # Depthwise Separable Convolution for k3
        self.k3 = nn.Sequential(
                    nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=in_channels),
                    nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(in_channels),
                    )
        
        # Depthwise Separable Convolution for k4
        self.k4 = nn.Sequential(
                    nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=in_channels),
                    nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(mid_channels),
                    )

    def forward(self, x):
        identity = x

        out = torch.sigmoid(torch.add(identity, F.interpolate(self.k2(x), identity.size()[2:])))
        out = torch.mul(self.k3(x), out)
        out = self.k4(out)

        return out


class DSSCVGG16(nn.Module):
    def __init__(self, num_classes=1000):
        super(DSSCVGG16, self).__init__()
        self.features = self._make_layers()
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _make_layers(self):
        layers = []
        in_channels = 3
        
        # Configuração da arquitetura VGG16
        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [DSSCConv(in_channels, v),
                           nn.ReLU(inplace=True)]
                in_channels = v
                
        return nn.Sequential(*layers)

In [7]:
num_classes = 5
num_epochs = 10
learning_rate = 0.0001 # for Adam 0.0001 or 0.0004
weight_decay = 0.005
momentum = 0.9

model = DSSCVGG16(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)
classes = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Traffic Light']
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))

In [8]:
if torch.cuda.is_available():
    torch.cuda.synchronize()

start_time = time.time()

for epoch in range(num_epochs):
    # Training loop
    model.train()
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.6f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Validation loop
    with torch.no_grad():
        correct = 0
        total = 0
        model.eval()
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        print('Accuracy of the network on the validation images: {:.6f} %'.format(100 * correct / total))

    # Testing loop
    true_labels = []
    predictions = []
    with torch.no_grad():
        correct = 0
        total = 0
        model.eval()
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            true_labels.extend(labels.cpu().numpy())
            predictions.extend(predicted.cpu().numpy())

        print('Accuracy of the network on the test images: {:.6f} %'.format(100 * correct / total))

    # Calcula as métricas com tratamento de divisão por zero
    precision = precision_score(true_labels, predictions, average='weighted', zero_division=0)
    recall = recall_score(true_labels, predictions, average='weighted', zero_division=0)
    f1 = f1_score(true_labels, predictions, average='weighted', zero_division=0)
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}')

# Synchronize after training completion if using CUDA
if torch.cuda.is_available():
    torch.cuda.synchronize()

end_time = time.time()
total_training_time = end_time - start_time

print(f"Total training time: {total_training_time:.6f} seconds.")

Epoch [1/10], Step [420/420], Loss: 1.622693
Accuracy of the network on the validation images: 32.935561 %
Accuracy of the network on the test images: 32.857143 %
Precision: 0.3562, Recall: 0.3286, F1-Score: 0.2438
Epoch [2/10], Step [420/420], Loss: 1.835402
Accuracy of the network on the validation images: 33.651551 %
Accuracy of the network on the test images: 33.809524 %
Precision: 0.4177, Recall: 0.3381, F1-Score: 0.2519
Epoch [3/10], Step [420/420], Loss: 2.342988
Accuracy of the network on the validation images: 42.482100 %
Accuracy of the network on the test images: 45.000000 %
Precision: 0.5082, Recall: 0.4500, F1-Score: 0.4447
Epoch [4/10], Step [420/420], Loss: 1.015090
Accuracy of the network on the validation images: 48.448687 %
Accuracy of the network on the test images: 53.571429 %
Precision: 0.5521, Recall: 0.5357, F1-Score: 0.5168
Epoch [5/10], Step [420/420], Loss: 1.479457
Accuracy of the network on the validation images: 57.040573 %
Accuracy of the network on the te

In [9]:
print('VGG16 and Depthwise Separable Convolution Summary:')
summary(model, (3, 224, 224))

VGG16 and Depthwise Separable Convolution Summary:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         AvgPool2d-1            [-1, 3, 56, 56]               0
            Conv2d-2            [-1, 3, 56, 56]              27
            Conv2d-3            [-1, 3, 56, 56]               9
       BatchNorm2d-4            [-1, 3, 56, 56]               6
            Conv2d-5          [-1, 3, 224, 224]              27
            Conv2d-6          [-1, 3, 224, 224]               9
       BatchNorm2d-7          [-1, 3, 224, 224]               6
            Conv2d-8          [-1, 3, 224, 224]              27
            Conv2d-9         [-1, 64, 224, 224]             192
      BatchNorm2d-10         [-1, 64, 224, 224]             128
         DSSCConv-11         [-1, 64, 224, 224]               0
             ReLU-12         [-1, 64, 224, 224]               0
        AvgPool2d-13           [-1, 64, 56, 56]     

In [10]:
print(model)

DSSCVGG16(
  (features): Sequential(
    (0): DSSCConv(
      (k2): Sequential(
        (0): AvgPool2d(kernel_size=4, stride=4, padding=0)
        (1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False)
        (2): Conv2d(3, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (k3): Sequential(
        (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False)
        (1): Conv2d(3, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (k4): Sequential(
        (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False)
        (1): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   