In [None]:
import os
import cv2
import torch
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn as nn
import torch.optim as optim

In [None]:
class CustomDataset(Dataset):
    def __init__(self, imgs_folder, labels_txt):
        self.transform = transforms.Compose([
            transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.dataset = []
        with open(labels_txt, 'r') as f:
            for line in f.readlines():
                img_file, label = line.split(",")
                img_file = os.path.join(imgs_folder, img_file)
                label = int(label)
                self.dataset.append((img_file, label))

        random.shuffle(self.dataset)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        img_path, label = self.dataset[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0

        img = self.transform(img)
        img = img.to(torch.float32)

        label = torch.tensor(label, dtype=torch.float32)
        return img, label

In [None]:
dataset_path = "/media/work/matheusvieira/truck_axle/Dataset/"
train_dataset = CustomDataset(dataset_path + "train/images", dataset_path + "train/number_of_axles_train.txt")
valid_dataset = CustomDataset(dataset_path + "valid/images", dataset_path + "valid/number_of_axles_valid.txt")
test_dataset = CustomDataset(dataset_path + "test/images", dataset_path + "test/number_of_axles_test.txt")

BATCH_SIZE = 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
class SequentialModel(nn.Module):
    def __init__(self):
        super(SequentialModel, self).__init__()
        self.model = models.resnet152(weights=(models.ResNet152_Weights.IMAGENET1K_V1))

        self.model.fc = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        x = self.model(x)
        return x

# Instantiate the model
model = SequentialModel()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
MAE = nn.L1Loss()

best_val_loss = float('inf')
patience = 3
counter = 0

for epoch in range(25):
    model.train()
    train_loss = 0.0
    mae_train_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.squeeze(), labels)
        mae_loss = MAE(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        mae_train_loss += mae_loss.item() * images.size(0)

    train_loss /= len(train_loader.dataset)
    mae_train_loss /= len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    mae_val_loss = 0.0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs.squeeze(), labels)
            mae_loss = MAE(outputs.squeeze(), labels)

            val_loss += loss.item() * images.size(0)
            mae_val_loss += mae_loss.item() * images.size(0)

    val_loss /= len(valid_loader.dataset)
    mae_val_loss /= len(valid_loader.dataset)

    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Train MAE: {mae_train_loss:.4f}, Val Loss: {val_loss:.4f}, Val MAE Loss: {mae_val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), f"best_resnet.pth")
        print(f"Best model saved at epoch {epoch+1}")
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping")
            model.load_state_dict(torch.load(f"best_resnet.pth"))
            break

Epoch 1, Train Loss: 2.3463, Train MAE: 1.1166, Val Loss: 0.5995, Val MAE Loss: 0.6330
Best model saved at epoch 1
Epoch 2, Train Loss: 0.9273, Train MAE: 0.7113, Val Loss: 0.3362, Val MAE Loss: 0.4113
Best model saved at epoch 2
Epoch 3, Train Loss: 0.7539, Train MAE: 0.6271, Val Loss: 0.5334, Val MAE Loss: 0.5536
Epoch 4, Train Loss: 0.5425, Train MAE: 0.5370, Val Loss: 0.3325, Val MAE Loss: 0.3904
Best model saved at epoch 4
Epoch 5, Train Loss: 0.3801, Train MAE: 0.4524, Val Loss: 0.7028, Val MAE Loss: 0.6176
Epoch 6, Train Loss: 0.2939, Train MAE: 0.3920, Val Loss: 0.3519, Val MAE Loss: 0.4345
Epoch 7, Train Loss: 0.2433, Train MAE: 0.3590, Val Loss: 0.3257, Val MAE Loss: 0.4032
Best model saved at epoch 7
Epoch 8, Train Loss: 0.2764, Train MAE: 0.3794, Val Loss: 0.1255, Val MAE Loss: 0.2664
Best model saved at epoch 8
Epoch 9, Train Loss: 0.1674, Train MAE: 0.3003, Val Loss: 0.1144, Val MAE Loss: 0.2409
Best model saved at epoch 9
Epoch 10, Train Loss: 0.1826, Train MAE: 0.3021, 

In [None]:
model.eval()
test_loss = 0.0
test_mae_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs.squeeze(), labels)
        mae = MAE(outputs.squeeze(), labels)

        test_loss += loss.item() * images.size(0)
        test_mae_loss += mae.item() * images.size(0)

test_loss /= len(test_loader.dataset)
test_mae_loss /= len(test_loader.dataset)

print(f"Test Loss: {test_loss:.4f} - Test MAE Loss: {test_mae_loss:.4f}")

Test Loss: 0.0983 - Test MAE Loss: 0.2354


In [None]:
def make_prediction(model, img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    img = transform(img)
    img = img.to(torch.float32)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img.unsqueeze(0))
        return outputs[0][0].item()

In [None]:
from sklearn.metrics import confusion_matrix

y_true = []
y_pred = []

with open(dataset_path+"test/number_of_axles_test.txt", 'r') as f:
    for i in f.readlines():
        img_file, label = i.split(",")
        y_true.append(int(label))

        img_file = os.path.join(dataset_path+"/test/images", img_file)

        no_round = make_prediction(model, img_file)
        predicao = round(no_round)
        if predicao != int(label):
            print(f"{int(label)}: {predicao} - {no_round}")
        y_pred.append(predicao)

confusion_matrix(y_true, y_pred)

7: 6 - 6.328375816345215
3: 4 - 3.5770022869110107
3: 4 - 3.5590291023254395
3: 4 - 3.524505615234375
3: 4 - 3.649664878845215
3: 2 - 2.169879198074341
4: 3 - 3.399392604827881
6: 5 - 4.894862651824951
6: 5 - 5.385087490081787
4: 3 - 2.83590030670166


In [None]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_true, y_pred, average="macro"))

(0.853077478077478, 0.8661522874288831, 0.854937154348919, None)


In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_true, y_pred))

0.908256880733945