In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard.writer import SummaryWriter
from torch.utils.data import random_split
from datetime import datetime as dt
from tqdm import tqdm
import numpy as np
#import pandas as pd
import math
from pathlib import Path
from dataloader import *
from model import *
from typing import Optional
from timeit import default_timer as timer


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "c:\Users\lucyc\anaconda3\envs\ur_path_planning\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Users\lucyc\anaconda3\envs\ur_path_planning\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Users\lucyc\anaconda3\envs\ur_path_planning\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\lucyc\anaconda3\envs\ur_path_planning\lib\site-packages\traitlets\config\application.py", lin

In [2]:
datetime = dt.now().strftime('%Y-%m-%d_%H-%M-%S')
log_folder = f"runs/{str(datetime)}"
Path(log_folder).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter(log_dir=log_folder)
model_folder = Path('trained_models') / f"model-{dt.now().strftime('%Y-%m-%d_%H-%M-%S')}"
Path(model_folder).mkdir(parents=True, exist_ok=True)

In [3]:
is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [4]:
LR = 0.001
EPOCH = 25
NUM_HIDDEN_LAYER = 1
BATCH_SIZE_TRAININIG = 1
EARLY_STOP = False
ES_THRESHOLD = 5

In [9]:
DATA_FOLDER = Path(os.getcwd())/ 'data' / 'rob_data'
#print(DATA_FOLDER)
train_set = UR5OptPathDataset(DATA_FOLDER / 'train')
val_set = UR5OptPathDataset(DATA_FOLDER / 'test')
#train_set, val_set, test_set = random_split(dataset_all, [0.8, 0.1, 0.1])

#train_loader = get_dataloaders(train_set, batch_size=BATCH_SIZE_TRAININIG)
#val_loader = get_dataloaders(val_set, batch_size=4)
#test_loader = get_dataloaders(test_set, batch_size=4)
train_loader = get_dataloaders(train_set, batch_size=BATCH_SIZE_TRAININIG)
val_loader = get_dataloaders(val_set, batch_size=1)

num_waypts = int(train_set[0][1].size()[0]/6) # TODO: check if it can return the right numbers
print(train_set[0][0])

tensor([ 2.8449, -1.1345,  0.5934, -1.2043, -1.5883,  2.3387,  0.0042, -1.1519,
         1.3786, -2.1116, -1.5883,  2.3387])




In [10]:
model = FCNN(num_control=num_waypts, hidden_layer=NUM_HIDDEN_LAYER)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

In [None]:
lowest_val_loss = math.inf

for e in range(EPOCH):
        print(f"--------- Epoch {e + 1} ----------")

        model.train()
        train_losses = []
        val_losses = []

        print("---- TRAINING ----")
        for i, (inputs, coefs) in enumerate(tqdm(train_loader, desc="Training")):
            model.zero_grad()
            inputs=inputs.to(device)
            coefs = coefs.to(device)
            outputs = model(inputs).logits
            train_loss = criterion(outputs, coefs)
            train_losses.append(train_loss.item())

            train_loss.backward()
            optimizer.step()

        # log to tensorboard
        avg_train_loss = sum(train_losses) / len(train_losses)
        writer.add_scalar('Loss/train', avg_train_loss, e)
        print(f'Loss: {avg_train_loss}')

        """if debugging:
            # Log gradients and weights for debugging
            for layer_name, param in model.named_parameters():
                if 'weight' in layer_name:
                    if param.requires_grad == True:
                        gradient = param.grad
                        if type(gradient) is None:
                            raise TypeError(f"The gradient of {layer_name} is a NoneType Object!")
                        writer.add_histogram(f'{layer_name}/weights', param.data, e)
                        writer.add_histogram(f'{layer_name}/gradients', param.grad, e)"""
                    
        # validation loop
        print("--- VALIDATION ---")
        with torch.no_grad():
            model.eval()
            for inputs, coefs in tqdm(val_loader, desc="Validation"):
                inputs=inputs.to(device)
                eef_pos=eef_pos.to(device)
                labels = labels.to(device)
                outputs = model(inputs).logits
                val_loss = criterion(outputs, labels)
                val_losses.append(val_loss.item())

            # log to tensorboard
            avg_val_loss = sum(val_losses) / len(val_losses)
            writer.add_scalar('Loss/val', avg_val_loss, e)
            print(f'Loss: {round(avg_val_loss, 4)}')
            
            # save the model with the lowest val loss
            if avg_val_loss < lowest_val_loss:
                lowest_val_loss = avg_val_loss
                torch.save(model.state_dict(), model_folder / "model_low_loss.pth")
                early_stopping_counter = 0
            else:
                early_stopping_counter += 1
            
            if EARLY_STOP:
                if early_stopping_counter == ES_THRESHOLD:
                    print('Early Stopping')
                    break
