In [1]:
import os
import torch
from torch import nn
from torch import optim
from torchmetrics import Accuracy
from torch.utils.data import DataLoader
from generate_landmark_data import label_dict_from_config_file
from custom_nn_utils import CustomImageDataset, NeuralNetwork, EarlyStopper

In [2]:
DATA_FOLDER_PATH = "./data/"
list_label = label_dict_from_config_file("hand_gesture.yaml")
train_path = os.path.join(DATA_FOLDER_PATH, "landmark_train.csv")
val_path = os.path.join(DATA_FOLDER_PATH, "landmark_val.csv")
test_path = os.path.join(DATA_FOLDER_PATH, "landmark_test.csv")
save_path = "models"
os.makedirs(save_path, exist_ok=True)

In [3]:
trainset = CustomImageDataset(train_path)
trainloader = DataLoader(trainset, batch_size=40, shuffle=True)

valset = CustomImageDataset(os.path.join(val_path))
val_loader = DataLoader(valset , batch_size=50, shuffle=False)

testset = CustomImageDataset(test_path)
test_loader = DataLoader(testset, batch_size=20, shuffle=False)

In [4]:
def train(trainloader, val_loader, model, loss_function, early_stopper, optimizer, max_epochs=300, save_path="models"):
    best_vloss = float("inf")
    best_model_path = None

    for epoch in range(max_epochs):
        # Training phase
        model.train(True)
        running_loss = 0.0
        acc_train = Accuracy(num_classes=len(list_label), task="multiclass") # Track training accuracy
        
        for inputs, labels in trainloader:
            optimizer.zero_grad()  # Reset gradients
            preds = model(inputs)  # Forward pass
            loss = loss_function(preds, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update model parameters

            acc_train.update(model.predict_with_known_class(inputs), labels)  # Update training accuracy
            running_loss += loss.item()  # Sum training loss

        avg_loss = running_loss / len(trainloader)  # Average training loss
        
        # Validation phase
        model.eval()
        running_vloss = 0.0
        acc_val = Accuracy(num_classes=len(list_label), task="multiclass")  # Track validation accuracy
        
        with torch.no_grad():
            for vinputs, vlabels in val_loader:
                preds = model(vinputs)  # Forward pass
                vloss = loss_function(preds, vlabels)  # Compute validation loss
                acc_val.update(model.predict_with_known_class(vinputs), vlabels)  # Update validation accuracy
                running_vloss += vloss.item()  # Sum validation loss

        avg_vloss = running_vloss / len(val_loader)  # Average validation loss

        # Log metrics for the epoch
        print(f"Epoch {epoch}:")
        print(f"Accuracy - Train: {acc_train.compute().item():.4f}, Val: {acc_val.compute().item():.4f}")
        print(f"Loss - Train: {avg_loss:.4f}, Val: {avg_vloss:.4f}\n")

        # Save the best model based on validation loss
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_path = os.path.join(save_path, "best_model.pth")
            print(f"Saving best model to {best_model_path}")
            torch.save(model.state_dict(), best_model_path)

        # Check if early stopping condition is met
        if early_stopper.early_stop(avg_vloss):
            print(f"Stopping training at epoch {epoch}, min val_loss: {early_stopper.watched_metrics:.4f}")
            break

    # Save the last model after training
    model_path = os.path.join(save_path, "last_model.pth")
    print(f"Saving last model to {model_path}")
    torch.save(model.state_dict(), model_path)

    return model, best_model_path

In [5]:
model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience=30, min_delta=0.01)

optimizer = optim.Adam(model.parameters(), lr=0.0001)

model, best_model_path = train(trainloader, val_loader, model, loss_function, early_stopper, optimizer)

Epoch 0:
Accuracy - Train: 0.3385, Val: 0.5125
Loss - Train: 1.5520, Val: 1.4078

Saving best model to models\best_model.pth
Epoch 1:
Accuracy - Train: 0.5086, Val: 0.7159
Loss - Train: 1.1989, Val: 0.7027

Saving best model to models\best_model.pth
Epoch 2:
Accuracy - Train: 0.7751, Val: 0.8798
Loss - Train: 0.6705, Val: 0.3357

Saving best model to models\best_model.pth
Epoch 3:
Accuracy - Train: 0.9232, Val: 0.9736
Loss - Train: 0.3201, Val: 0.1479

Saving best model to models\best_model.pth
Epoch 4:
Accuracy - Train: 0.9830, Val: 0.9793
Loss - Train: 0.1463, Val: 0.0610

Saving best model to models\best_model.pth
Epoch 5:
Accuracy - Train: 0.9851, Val: 0.9798
Loss - Train: 0.0761, Val: 0.0533

Saving best model to models\best_model.pth
Epoch 6:
Accuracy - Train: 0.9890, Val: 0.9846
Loss - Train: 0.0604, Val: 0.0440

Saving best model to models\best_model.pth
Epoch 7:
Accuracy - Train: 0.9881, Val: 0.9880
Loss - Train: 0.0465, Val: 0.0343

Saving best model to models\best_model.pth


In [6]:
# Initialize and load the best model
network = NeuralNetwork()
network.load_state_dict(torch.load(best_model_path, weights_only=False))

# Set the model to evaluation mode
network.eval()

# Initialize the accuracy tracker for multi-class classification
acc_test = Accuracy(num_classes=len(list_label), task="multiclass")

# Iterate through the test set and compute accuracy
for i, test_data in enumerate(test_loader):
    test_input, test_label = test_data
    preds = network(test_input)
    acc_test.update(preds, test_label)

# Print the model class name and accuracy
print(network.__class__.__name__)
print(f"Accuracy of model: {acc_test.compute().item():.4f}")

NeuralNetwork
Accuracy of model: 0.9899
