In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.metrics import precision_score, recall_score, f1_score

from tqdm import tqdm
from torchsummary import summary

import matplotlib.pyplot as plt

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Caminho para o seu dataset
data_dir = './editedDataset'

# Transformação para converter as imagens em tensores
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Redimensiona as imagens (ajuste conforme necessário)
    transforms.ToTensor(),
])

# Carregar o dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# DataLoader para iterar pelo dataset
dataloader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=1)

def calculate_mean_std(dataloader):
    mean = 0.0
    std = 0.0
    total_images = 0

    for images, _ in dataloader:
        # Redimensiona o batch: (batch_size, 3, height, width) para (3, batch_size*height*width)
        images = images.view(3, -1)
        
        # Calcula a média e soma
        mean += images.mean(1)
        
        # Calcula o desvio padrão e soma
        std += images.std(1)
        
        total_images += 1

    # Média total
    mean /= total_images
    
    # Desvio padrão total
    std /= total_images

    return mean, std

# Calcula a média e o desvio padrão
mean, std = calculate_mean_std(dataloader)
print(f'Mean: {mean}')
print(f'Std: {std}')

Mean: tensor([0.4521, 0.4465, 0.4503])
Std: tensor([0.2017, 0.2024, 0.2017])


In [4]:
def data_loader(data_dir, batch_size, random_seed=42, train_size=0.8, valid_size=0.1, shuffle=True):
    # Define as transformações
    normalize = transforms.Normalize(
        mean=[0.4521, 0.4465, 0.4503],  # Ajuste esses valores para o seu dataset
        std=[0.2017, 0.2024, 0.2017],
    )

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize,
    ])

    # Carregar o dataset
    dataset = datasets.ImageFolder(
        root=data_dir,
        transform=transform
    )

    num_images = len(dataset)
    print(f"Total de imagens no dataset: {num_images}")

    num_train = len(dataset)
    indices = list(range(num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    # Divisão dos índices
    train_split = int(np.floor(train_size * num_train))
    valid_split = int(np.floor((train_size + valid_size) * num_train))

    train_idx, valid_idx, test_idx = indices[:train_split], indices[train_split:valid_split], indices[valid_split:]

    # Criar samplers
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    test_sampler = SubsetRandomSampler(test_idx)

    # Criar data loaders
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    print(f"Imagens de treinamento: {len(train_idx)}")
    print(f"Imagens de validação: {len(valid_idx)}")
    print(f"Imagens de teste: {len(test_idx)}")

    return (train_loader, valid_loader, test_loader)

In [5]:
batch_size = 8
train_size= 0.8
valid_size = 0.1

train_loader, valid_loader, test_loader = data_loader(data_dir=data_dir, batch_size=batch_size, 
                                                      train_size=train_size, valid_size=valid_size)

Total de imagens no dataset: 4194
Imagens de treinamento: 3355
Imagens de validação: 419
Imagens de teste: 420


In [6]:
class SCConv(nn.Module):

    def __init__(self, in_channels, mid_channels=None, kernel_size=3, padding=1):
        super(SCConv, self).__init__()
        #print(in_channels,mid_channels)

        self.k2 = nn.Sequential(
                    nn.AvgPool2d(kernel_size=4, stride=4),
                    nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=1),
                    nn.BatchNorm2d(in_channels),
                    )
        self.k3 = nn.Sequential(
                    nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=1),
                    nn.BatchNorm2d(in_channels),
                    )
        self.k4 = nn.Sequential(
                    nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, dilation=1, stride=1, bias=False, groups=1),
                    nn.BatchNorm2d(mid_channels),
                    )

    def forward(self, x):
        identity = x

        out = torch.sigmoid(torch.add(identity, F.interpolate(self.k2(x), identity.size()[2:]))) # sigmoid(identity + k2)
        out = torch.mul(self.k3(x), out) # k3 * sigmoid(identity + k2)
        out = self.k4(out) # k4

        return out

class SCVGG16(nn.Module):
    def __init__(self, num_classes=1000):
        super(SCVGG16, self).__init__()
        self.features = self._make_layers()
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _make_layers(self):
        layers = []
        in_channels = 3
        
        # Configuração da arquitetura VGG16
        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [SCConv(in_channels, v),
                           nn.ReLU(inplace=True)]
                in_channels = v
                
        return nn.Sequential(*layers)

In [7]:
num_classes = 5
num_epochs = 100
learning_rate = 0.0001 # for Adam 0.0001 or 0.0004

model = SCVGG16(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

classes = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Traffic Light']
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))

In [8]:
total_step = len(train_loader)

if torch.cuda.is_available():
    torch.cuda.synchronize()

start_time = time.time()

best_accuracy = 0.0
for epoch in range(num_epochs):
    # Training loop
    model.train()
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.6f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    if (epoch + 1) % 10 == 0:
        checkpoint_path = f'checkpoint/checkpoint_{type(model).__name__}_num_epochs-{num_epochs}_epoch_{epoch+1}.pt'
        torch.save(model.state_dict(), checkpoint_path)
    

    # Validation loop
    with torch.no_grad():
        correct = 0
        total = 0
        model.eval()
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
            
        print('Accuracy of the network on the validation images: {:.6f} %'.format(accuracy))
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            torch.save(model.state_dict(), f'best/best_{type(model).__name__}_epoch_{num_epochs}.pt')

    # Testing loop
    true_labels = []
    predictions = []
    with torch.no_grad():
        correct = 0
        total = 0
        model.eval()
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
            true_labels.extend(labels.cpu().numpy())
            predictions.extend(predicted.cpu().numpy())

        print('Accuracy of the network on the test images: {:.6f} %'.format(accuracy))

    # Calcula as métricas com tratamento de divisão por zero
    precision = precision_score(true_labels, predictions, average='weighted', zero_division=0)
    recall = recall_score(true_labels, predictions, average='weighted', zero_division=0)
    f1 = f1_score(true_labels, predictions, average='weighted', zero_division=0)
    print(f'Precision: {precision:.6f}, Recall: {recall:.6f}, F1-Score: {f1:.6f}')

# Synchronize after training completion if using CUDA
if torch.cuda.is_available():
    torch.cuda.synchronize()

end_time = time.time()
total_training_time = end_time - start_time

print(f"Total training time: {total_training_time:.6f} seconds.")

Epoch [1/100], Step [420/420], Loss: 1.429330
Accuracy of the network on the validation images: 19.809069 %
Accuracy of the network on the test images: 25.476190 %
Precision: 0.064904, Recall: 0.254762, F1-Score: 0.103452
Epoch [2/100], Step [420/420], Loss: 1.002724
Accuracy of the network on the validation images: 35.322196 %
Accuracy of the network on the test images: 39.047619 %
Precision: 0.320913, Recall: 0.390476, F1-Score: 0.307948
Epoch [3/100], Step [420/420], Loss: 1.268179
Accuracy of the network on the validation images: 43.436754 %
Accuracy of the network on the test images: 45.714286 %
Precision: 0.414278, Recall: 0.457143, F1-Score: 0.395516
Epoch [4/100], Step [420/420], Loss: 2.403227
Accuracy of the network on the validation images: 41.766110 %
Accuracy of the network on the test images: 43.809524 %
Precision: 0.447905, Recall: 0.438095, F1-Score: 0.396779
Epoch [5/100], Step [420/420], Loss: 1.446445
Accuracy of the network on the validation images: 47.971360 %
Accu

In [9]:
print('VGG16 and Self Calibrated Convolution Summary:')
summary(model, (3, 224, 224))

VGG16 and Self Calibrated Convolution Summary:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         AvgPool2d-1            [-1, 3, 56, 56]               0
            Conv2d-2            [-1, 3, 56, 56]              81
       BatchNorm2d-3            [-1, 3, 56, 56]               6
            Conv2d-4          [-1, 3, 224, 224]              81
       BatchNorm2d-5          [-1, 3, 224, 224]               6
            Conv2d-6         [-1, 64, 224, 224]           1,728
       BatchNorm2d-7         [-1, 64, 224, 224]             128
            SCConv-8         [-1, 64, 224, 224]               0
              ReLU-9         [-1, 64, 224, 224]               0
        AvgPool2d-10           [-1, 64, 56, 56]               0
           Conv2d-11           [-1, 64, 56, 56]          36,864
      BatchNorm2d-12           [-1, 64, 56, 56]             128
           Conv2d-13         [-1, 64, 224, 224]         

In [10]:
print(model)

SCVGG16(
  (features): Sequential(
    (0): SCConv(
      (k2): Sequential(
        (0): AvgPool2d(kernel_size=4, stride=4, padding=0)
        (1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (k3): Sequential(
        (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (k4): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ReLU(inplace=True)
    (2): SCConv(
      (k2): Sequential(
        (0): AvgPool2d(kernel_size=4, stride=4, padding=0)
        (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (2): BatchNorm2

In [11]:
# # Função para desnormalizar as imagens
# def imshow(img):
#     img = img / 2 + 0.5  # desnormalizar
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# # Função para obter o nome da classe
# def classes(index):
#     classes = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Traffic Light']
#     return classes[index]

# dataiter = iter(test_loader)
# images, labels = next(dataiter)

# # Mover as imagens para o dispositivo correto e obter previsões
# images = images.to(device)
# outputs = model(images)
# _, predicted = torch.max(outputs, 1)

# # Plotar imagens com previsões e etiquetas verdadeiras
# fig = plt.figure(figsize=(25, 4))
# for idx in np.arange(20):
#     ax = fig.add_subplot(2, 10, idx+1, xticks=[], yticks=[])
#     img = images[idx].cpu().numpy()  # Mover a imagem para a CPU antes de usar np.transpose
#     img = img / 2 + 0.5  # desnormalizar
#     plt.imshow(np.transpose(img, (1, 2, 0)))
#     ax.set_title("{} ({})".format(classes(predicted[idx]), classes(labels[idx])),
#                  color=("green" if predicted[idx]==labels[idx].item() else "red"))
# plt.show()

In [12]:
# def visualize_predictions(loader, model, num_images=8):
#     images, labels = next(iter(loader))
#     outputs = model(images)
#     _, predicted = torch.max(outputs, 1)

#     plt.figure(figsize=(12, 8))
#     for i in range(num_images):
#         plt.subplot(2, 4, i + 1)
#         plt.imshow(images[i].permute(1, 2, 0))
#         plt.title(f'{"Green" if predicted[i] == labels[i] else "Red"}: {predicted[i]}({labels[i]})')
#         plt.axis('off')
#     plt.show()

# # Visualizar as previsões
# visualize_predictions(test_loader, model)

In [13]:
# epochs = range(1, len(train_loss) + 1)
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 2, 1)
# plt.plot(epochs, train_loss, 'r', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
# plt.title('Training and Validation Loss')
# plt.legend()

# plt.subplot(1, 2, 2)
# plt.plot(epochs, train_accuracy, 'r', label='Training Accuracy')
# plt.plot(epochs, val_accuracy, 'b', label='Validation Accuracy')
# plt.title('Training and Validation Accuracy')
# plt.legend()

# plt.show()