# **SETUP**

In [1]:

!pip install torch-geometric lightning wandb gymnasium
!pip install -U plotly
!pip install -U scikit-learn
!pip install -U kaleido
!pip install "notebook>=5.3" "ipywidgets>=7.5"
!mkdir Data
!mkdir Plots

mkdir: cannot create directory ‘Data’: File exists
mkdir: cannot create directory ‘Plots’: File exists


In [2]:
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data
import lightning as L
import torch
import torch.nn as nn
import wandb as wndb
from torch_geometric.nn import GATConv
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import gymnasium as gym
from gymnasium.envs.registration import register
import pdb
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# **MODEL AND RELATED STUFF**

In [3]:
class GreedyWorm(nn.Module):

    def __init__(self):
        super(GreedyWorm, self).__init__()

    def forward(self, data):
        graphs, positions = data
        return [graphs[i].x[positions[i]] for i in range(len(positions))]


In [4]:

class WormUpExamplesDataset(Dataset):

    def __init__(self, graphs: list[Data], actions: list[int], rewards: list[int]):
        self.data = list(zip(graphs, actions, rewards))

    def __getitem__(self, idx: int):
        return self.data[idx]

    def __len__(self):
        return len(self.data)

    def collate(self, data: list):
        graphs = []
        actions = []
        rewards = []
        for el in data:
            graphs.append(el[0])
            actions.append(el[1])
            rewards.append(el[2])
        return graphs, actions, torch.tensor(rewards, dtype=torch.float)

    def get_dataloader(self, batch_size: int, shuffle: bool = False):
        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, collate_fn=self.collate)




In [5]:
class GraphNN(nn.Module):

    def __init__(self, in_size, out_size, h_size, deep, activation, device="cpu"):
        super(GraphNN, self).__init__()
        self.activation = activation
        if deep == 1:
            self.layers = [GATConv(in_size, out_size)]  #.to(device)]
        else:
            self.layers = [GATConv(in_size, h_size)]  #.to(device)]
            for _ in range(deep - 2):
                self.layers.append(GATConv(h_size, h_size))  #.to(device))
            self.layers.append(GATConv(h_size, out_size))  #.to(device))

    def forward(self, data):
        edge_index = data[1]
        x = data[0]
        for layer in self.layers[:-1]:
            x = self.activation(layer(x, edge_index))
        #breakpoint()
        return self.layers[-1](x, edge_index)



In [6]:
class LinearNN(nn.Module):
    def __init__(self, in_size, out_size, h_size, deep, activation):
        super(LinearNN, self).__init__()
        if deep == 1:
            layers = [nn.Linear(in_size, out_size), activation]
        else:
            layers = [nn.Linear(in_size, h_size), activation]
            for _ in range(deep - 2):
                layers.append(nn.Linear(h_size, h_size))
                layers.append(activation)
            layers.append(nn.Linear(h_size, out_size))
        self.linear = nn.Sequential(*layers)

    def forward(self, data):
        return self.linear(data)

In [7]:

class IntelligentWorm(L.LightningModule):

    def __init__(self, linear: nn.Module, gnn: nn.Module, lr: float = 1e-3):
        super(IntelligentWorm, self).__init__()
        self.encoder = gnn
        self.decoder = linear
        self.loss = nn.MSELoss()
        self.validation_predictions = []
        self.validation_targets = []
        self.validation_loss = []
        self.train_loss = []
        self.best_val_loss = 100000000
        self.best_mae = 100000000
        self.best_rmse = 1000000000
        self.best_r2 = -10000000
        self.best_model = 0
        self.lr = lr

    def update_best_stats(self, val_loss, mae, rmse, r2):
        self.best_val_loss = val_loss
        self.best_mae = mae
        self.best_rmse = rmse
        self.best_r2 = r2

    def forward(self, data):
        actions = data[1]
        graphs = data[0]
        embeddings = []
        for i in range(len(graphs)):
            g = graphs[i]
            x = self.encoder((g.x, g.edge_index))
            embeddings.append(x[actions[i]])
        embeddings = torch.stack(embeddings)
        pred = self.decoder(embeddings)
        return pred.squeeze()

    def training_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        predictions = self.forward((graphs, actions))
        train_loss = self.loss(predictions, rewards)
        self.train_loss.append(train_loss)

        return train_loss

    def validation_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        #breakpoint()
        self.validation_targets.append(rewards)
        predictions = self.forward((graphs, actions))
        validation_loss = self.loss(predictions, rewards)
        self.validation_predictions.append(predictions)
        self.validation_loss.append(validation_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer



In [17]:
class WormCallback(L.Callback):

    def on_train_epoch_end(self, trainer, pl_module):

        epoch_mean = float(torch.stack(pl_module.train_loss).mean())
        print("training_epoch_mean loss = ", epoch_mean)
        # free up the memory
        pl_module.train_loss.clear()
        try:
            wndb.log({"train_loss": epoch_mean})
        except:
            pass

    def on_validation_epoch_end(self, trainer, pl_module: IntelligentWorm):
        #breakpoint()
        predictions = torch.cat(pl_module.validation_predictions, dim=0).squeeze()
        targets = torch.cat(pl_module.validation_targets, dim=0).squeeze()
        #
        r2 = r2_score(predictions, targets)
        mae = mean_absolute_error(predictions, targets)
        rmse = root_mean_squared_error(predictions, targets)
        mean_loss = float(torch.stack(pl_module.validation_loss).mean())

        pl_module.validation_loss.clear()
        pl_module.validation_predictions.clear()
        pl_module.validation_targets.clear()

        print("val_loss = ", mean_loss)
        print("mean_absolute_error = ", mae)
        print("root_mean_squared_error = ", rmse)
        print("r2 = ", r2)
        count = 0
        count += 1 if mean_loss < pl_module.best_val_loss else 0
        count += 1 if mae < pl_module.best_mae else 0
        count += 1 if rmse < pl_module.best_rmse else 0
        count += 1 if r2 > pl_module.best_r2 else 0
        if count >= 3 or count == 2 and mean_loss < pl_module.best_val_loss:
            pl_module.update_best_stats(mean_loss, mae, rmse, r2)
            pl_module.best_model -= 1
            pl_module.log("best_model", pl_module.best_model)
            gnn_state_dict = pl_module.encoder.state_dict()
            linear_state_dict = pl_module.decoder.state_dict()

            # Salva gli state dict su file
            torch.save(gnn_state_dict, "Model/gnn_checkpoint.pth")
            torch.save(linear_state_dict, "Model/linear_checkpoint.pth")
        else:
            pl_module.log("best_model", pl_module.best_model + 1)

        wndb.log({"val_loss": mean_loss, "mean_absolute_error": mae, "root_mean_squared_error": rmse, "r2": r2})




# **AGENT**

In [9]:
MODEL_VERSION = "model_V1_"
USE_SINGLE_WORM = False
USE_WORMHOLES = False

WORMHOLES = "wormholes_enabled_" if USE_WORMHOLES else "wormholes_disabled_"
WORMS = "single_worm_" if USE_SINGLE_WORM else "multi_worms_"



class WormsMasterAgent:

    def __init__(
            self,
            linear_part: nn.Module,
            gnn_part: nn.Module,
            initial_epsilon: float,
            epsilon_decay: float,
            final_epsilon: float,
            learning_rate: float = 1,
            discount_factor: float = 0.95,
            decay_after: int = 1,
            #trainer params
            batch_size: int = 8,
            episodes_for_batch: int = 20,
            trainer_deterministic: bool = True,
            trainer_max_epochs: int = 20,
            trainer_accelerator: str = "cpu"

    ):
        self.batch_size = batch_size
        self.episodes_for_batch = episodes_for_batch
        self.episode = 0
        self.linear = linear_part
        self.gnn = gnn_part
        self.learning_model = IntelligentWorm(linear_part, gnn_part)
        self.model = GreedyWorm()
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.decay_after = decay_after
        self.checkpoint_callback = ModelCheckpoint(dirpath="Model/", filename="worms_model", save_top_k=1,
                                                   mode='min', monitor='best_model')
        self.worm_callback = WormCallback()
        self.early_stopping_callback = EarlyStopping(monitor='best_model', mode='min', patience=3)
        self.trainer = L.Trainer(deterministic=trainer_deterministic,
                                 max_epochs=trainer_max_epochs, accelerator=trainer_accelerator,
                                 callbacks=[self.checkpoint_callback, self.worm_callback, self.early_stopping_callback])
        self.actual_rewards = np.array([])
        self.actual_observations = []
        self.actual_actions = []
        self.model_training_data = {
            "actions": [],
            "observations": [],
            "rewards": []
        }

    def get_action(self, observation, available_actions: list[int]) -> int:
        graph = observation["field"]
        worms = observation["worms"]

        self.actual_rewards = np.append(self.actual_rewards, 0)
        # with probability epsilon return a random action to explore the environment
        if np.random.random() < self.epsilon:
            i = np.random.randint(0, high=len(available_actions))
            return available_actions[i]

        # with probability (1 - epsilon) act greedily (exploit)
        else:
            self.model.eval()
            with torch.no_grad():
                idx = np.argmax(self.model(([graph] * len(available_actions), available_actions)))
                return available_actions[idx]

    def update(self, observation, action: int, reward: int, terminated: bool):
        #breakpoint()
        graph = observation["field"]
        worms = observation["worms"]
        self.actual_observations.append(graph)
        self.actual_actions.append(action)
        self.actual_rewards += self.lr * reward
        episode_reward = 0.0
        len_worms_placed = 0.0
        if terminated:
            episode_reward = float(self.actual_rewards[0])
            len_worms_placed = len(self.actual_actions)
            print(f"total reward = {episode_reward}, actions list = {self.actual_actions}")
            self.episode += 1
            self.model_training_data["actions"] += self.actual_actions.copy()
            self.actual_actions = []
            self.model_training_data["observations"] += self.actual_observations.copy()
            self.actual_observations = []
            self.model_training_data["rewards"] += self.actual_rewards.tolist()
            self.actual_rewards = np.array([])
            if self.episode % self.decay_after == 0:
                self.decay_epsilon()
            print(f"starting episode {self.episode}, epsilon = {self.epsilon}")
            if self.episode % self.episodes_for_batch == 0:
                self.train_model()
                self.model = self.learning_model
        return episode_reward, len_worms_placed

    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)

    def prepare_data(self):
        #breakpoint()
        print("preparing data to train")
        x = list(zip(self.model_training_data["observations"], self.model_training_data["actions"]))
        y = self.model_training_data["rewards"]
        x_train, x_val, train_rewards, val_rewards = train_test_split(x, y, test_size=0.2)
        del x, y
        train_graphs = []
        train_actions = []
        for el in x_train:
            train_graphs.append(el[0])
            train_actions.append(el[1])
        val_graphs = []
        val_actions = []
        for el in x_val:
            val_graphs.append(el[0])
            val_actions.append(el[1])
        train_dataset = WormUpExamplesDataset(train_graphs, train_actions, train_rewards)
        val_dataset = WormUpExamplesDataset(val_graphs, val_actions, val_rewards)
        train_dataloader = train_dataset.get_dataloader(self.batch_size, shuffle=True)
        val_dataloader = val_dataset.get_dataloader(self.batch_size, shuffle=False)
        return train_dataloader, val_dataloader

    def train_model(self):
        print("begin training")
        train_dataloader, val_dataloader = self.prepare_data()
        wndb.init(
            # set the wandb project where this run will be logged
            project="WormsWarmingUp",
            name= MODEL_VERSION + WORMHOLES + WORMS +  "training_ep_" + str(self.episode),
            # track hyperparameters and run metadata
            config={
                "learning_rate": self.learning_model.lr,
                "architecture": str(self.learning_model),
                "batch": self.episode // self.episodes_for_batch
            }
        )
        self.trainer.fit(self.learning_model, train_dataloader, val_dataloader)
        wndb.finish()
        gnn_state_dict = torch.load("Model/gnn_checkpoint.pth")
        linear_state_dict = torch.load("Model/linear_checkpoint.pth")
        self.linear.load_state_dict(linear_state_dict)
        self.gnn.load_state_dict(gnn_state_dict)
        self.learning_model = IntelligentWorm.load_from_checkpoint(checkpoint_path="Model/worms_model.ckpt", linear=self.linear, gnn=self.gnn)
        print("finished training")


# **INITIALIZE MODEL, AGENT AND ENVIRONMENT**

In [18]:

# device = "gpu" if
gnn_part = GraphNN(1,64,0,1,nn.ReLU())
linear_part = LinearNN(64,1,128,2, nn.ReLU())




In [19]:

agent = WormsMasterAgent(linear_part, gnn_part,0.95,0.005,0.15)


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [20]:
TABLE="00-example.txt"

register(
    id="worms_env",
    entry_point="worms_env:WormsEnv",
    max_episode_steps=300,
)
environment = gym.make('worms_env', env_file="Data/" + TABLE, render_mode="human", use_single_worm=USE_SINGLE_WORM, enable_wormholes=USE_WORMHOLES)



  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [13]:
wndb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmonteleone-1883922[0m ([33mmonteleone[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [21]:
!rm -r Model
!mkdir Model

In [25]:
def compute_mean_rewards_to_print(rewards):
  #for x in rewards:
    # if x < 0:
    #   return 0
  return sum(rewards) / len(rewards)


# **TRAINING**

In [22]:
MAX_EPISODES = 200

ACCUMULATE = 5
accumulate_reward_means = []
accumulate_worm_len_means = []
last_rewards = [0 for _ in range(ACCUMULATE)]
last_len_worms_placed = [0 for _ in range(ACCUMULATE)]
done = False
obs, info = environment.reset()
print(f"starting episode {agent.episode}, epsilon = {agent.epsilon}")
# play one episode
while agent.episode < MAX_EPISODES:

    action = agent.get_action(info, environment.available_movements)
    obs, reward, terminated, truncated, info = environment.step(action)
    episode_reward, len_worms_placed = agent.update(info, action, reward, terminated)

    if terminated:
        obs, info = environment.reset()
        last_rewards[agent.episode % ACCUMULATE] = episode_reward
        last_len_worms_placed[agent.episode % ACCUMULATE] = len_worms_placed
        if agent.episode > ACCUMULATE:

          mean_reward = compute_mean_rewards_to_print(last_rewards)
          mean_len_worms_placed = sum(last_len_worms_placed) / len(last_len_worms_placed)
          accumulate_reward_means.append(mean_reward)
          accumulate_worm_len_means.append(mean_len_worms_placed)



  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


starting episode 0, epsilon = 0.95


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


total reward = 81.0, actions list = [40, 30, 20, 10, 0, 1, 25, 26, 36, 35, 34, 33, 23, 55, 45, 44, 43, 53, 18, 17, 27, 5, 4, 3]
starting episode 1, epsilon = 0.945
total reward = 122.0, actions list = [39, 49, 48, 38, 28, 27, 51, 52, 42, 32, 31, 21, 11, 35, 25, 26, 36, 37, 58, 57, 47, 10, 20, 30]
starting episode 2, epsilon = 0.94
total reward = 100.0, actions list = [54, 53, 43, 44, 34, 24, 26, 25, 15, 16, 17, 27, 28, 50, 51, 41, 40, 30, 5, 6, 7, 47, 48, 58]
starting episode 3, epsilon = 0.9349999999999999
i got  -192
total reward = -134.0, actions list = [5, 4, 14, 15, 16, 6, 50, 40, 41, 42, 52, 51]
starting episode 4, epsilon = 0.9299999999999999
total reward = 105.0, actions list = [5, 15, 14, 13, 23, 33, 55, 56, 57, 58, 59, 49, 39, 30, 20, 10, 0, 1, 22, 32, 42, 26, 16, 17]
starting episode 5, epsilon = 0.9249999999999999
total reward = 91.0, actions list = [54, 44, 45, 55, 56, 46, 22, 23, 24, 25, 15, 5, 4, 9, 19, 18, 8, 7, 34, 33, 32, 38, 37, 27]
starting episode 6, epsilon = 0.91

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,72.10651
r2,-142.91454
root_mean_squared_error,88.85382
val_loss,7895.00244


INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  7870.56640625
mean_absolute_error =  77.90738
root_mean_squared_error =  88.71621
r2 =  -1434164.3523421003


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6461.45947265625
mean_absolute_error =  61.48914
root_mean_squared_error =  80.40482
r2 =  -97.31975619354728
training_epoch_mean loss =  6370.92919921875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6160.43115234375
mean_absolute_error =  57.567703
root_mean_squared_error =  78.526596
r2 =  -193.35524980605473
training_epoch_mean loss =  5759.14013671875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6012.21875
mean_absolute_error =  54.834507
root_mean_squared_error =  77.59668
r2 =  -96.90268016863396
training_epoch_mean loss =  5492.310546875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  5992.62158203125
mean_absolute_error =  54.497593
root_mean_squared_error =  77.48539
r2 =  -61.55585880145337
training_epoch_mean loss =  5539.14404296875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6044.13134765625
mean_absolute_error =  53.573208
root_mean_squared_error =  77.802475
r2 =  -54.62879547466068
training_epoch_mean loss =  5434.033203125


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6032.26123046875
mean_absolute_error =  53.83906
root_mean_squared_error =  77.72356
r2 =  -64.58774167927457
training_epoch_mean loss =  5690.23291015625


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6034.794921875
mean_absolute_error =  53.800564
root_mean_squared_error =  77.73856
r2 =  -65.27083130654042
training_epoch_mean loss =  5381.29345703125


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.10666414951859543, max=1.…

0,1
mean_absolute_error,█▃▂▁▁▁▁▁
r2,▁███████
root_mean_squared_error,█▃▂▁▁▁▁▁
train_loss,█▄▂▂▁▃▁
val_loss,█▃▂▁▁▁▁▁

0,1
mean_absolute_error,53.80056
r2,-65.27083
root_mean_squared_error,77.73856
train_loss,5381.29346
val_loss,6034.79492


finished training


  logger.warn(


total reward = 124.0, actions list = [38, 28, 29, 19, 9, 8, 42, 52, 53, 43, 33, 34, 44, 15, 16, 6, 7, 17, 39, 49, 48, 30, 20, 10]
starting episode 21, epsilon = 0.8449999999999999
total reward = 102.0, actions list = [45, 55, 54, 44, 34, 24, 40, 41, 31, 30, 20, 21, 22, 18, 19, 9, 8, 7, 29, 39, 49, 10, 0, 1]
starting episode 22, epsilon = 0.8399999999999999
total reward = 114.0, actions list = [48, 49, 39, 29, 28, 38, 25, 15, 16, 17, 27, 26, 36, 35, 34, 44, 43, 42, 14, 24, 23, 6, 5, 4]
starting episode 23, epsilon = 0.8349999999999999
total reward = 107.0, actions list = [2, 1, 11, 21, 31, 32, 56, 46, 47, 37, 36, 26, 27, 9, 8, 7, 17, 18, 54, 55, 45, 16, 6, 5]
starting episode 24, epsilon = 0.8299999999999998
total reward = 119.0, actions list = [23, 22, 32, 42, 41, 51, 9, 19, 18, 28, 38, 39, 29, 40, 30, 20, 21, 11, 52, 53, 43, 59, 49, 48]
starting episode 25, epsilon = 0.8249999999999998
total reward = 116.0, actions list = [39, 29, 28, 38, 37, 27, 47, 46, 56, 57, 58, 59, 49, 24, 25, 35

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  7507.98095703125
mean_absolute_error =  58.045208
root_mean_squared_error =  86.648605
r2 =  -78.46078196741392


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,58.04521
r2,-78.46078
root_mean_squared_error,86.64861
val_loss,7507.98096


finished training


  logger.warn(


total reward = 120.0, actions list = [32, 42, 52, 53, 54, 55, 1, 2, 12, 11, 21, 20, 10, 29, 28, 27, 37, 36, 39, 49, 59, 9, 8, 7]
starting episode 41, epsilon = 0.7449999999999998
total reward = 104.0, actions list = [56, 57, 47, 48, 58, 59, 19, 18, 28, 29, 39, 38, 37, 15, 14, 4, 5, 6, 43, 44, 34, 0, 1, 11]
starting episode 42, epsilon = 0.7399999999999998
total reward = 112.0, actions list = [9, 19, 29, 39, 49, 48, 34, 33, 32, 31, 30, 40, 50, 27, 28, 18, 17, 16, 59, 58, 57, 10, 11, 12]
starting episode 43, epsilon = 0.7349999999999998
i got  -192
total reward = -104.0, actions list = [9, 19, 18, 28, 29, 39, 31, 21, 22, 12, 13, 23, 33, 52, 42, 41, 40, 50, 51]
starting episode 44, epsilon = 0.7299999999999998
total reward = 110.0, actions list = [9, 8, 18, 28, 27, 26, 57, 56, 46, 45, 44, 43, 42, 17, 16, 15, 25, 24, 0, 10, 20, 19, 29, 39]
starting episode 45, epsilon = 0.7249999999999998
i got  -192
total reward = -118.0, actions list = [7, 8, 18, 28, 29, 19, 15, 25, 24, 34, 35, 36, 46, 0

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  4812.3515625
mean_absolute_error =  60.553642
root_mean_squared_error =  69.37112
r2 =  -45.481510992300805


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,60.55364
r2,-45.48151
root_mean_squared_error,69.37112
val_loss,4812.35156


finished training


  logger.warn(


total reward = 124.0, actions list = [9, 19, 18, 17, 7, 8, 52, 51, 50, 40, 41, 31, 32, 55, 56, 57, 58, 59, 26, 16, 6, 39, 49, 48]
starting episode 61, epsilon = 0.6449999999999997
total reward = 102.0, actions list = [31, 41, 42, 52, 51, 50, 46, 56, 55, 45, 35, 25, 15, 16, 26, 27, 17, 18, 4, 3, 13, 5, 6, 7]
starting episode 62, epsilon = 0.6399999999999997
i got  -192
total reward = -104.0, actions list = [9, 19, 18, 28, 29, 39, 52, 53, 43, 42, 41, 51, 50, 37, 47, 48, 38]
starting episode 63, epsilon = 0.6349999999999997
total reward = 128.0, actions list = [9, 8, 18, 28, 29, 19, 52, 53, 43, 42, 41, 51, 50, 17, 27, 26, 25, 35, 37, 36, 46, 16, 15, 5]
starting episode 64, epsilon = 0.6299999999999997
i got  -192
total reward = -108.0, actions list = [38, 28, 29, 39, 49, 59, 45, 44, 43, 33, 23, 13, 12, 51, 52, 42, 32, 31, 40, 41]
starting episode 65, epsilon = 0.6249999999999997
total reward = 121.0, actions list = [9, 19, 29, 39, 38, 48, 27, 17, 18, 8, 7, 6, 5, 4, 14, 24, 25, 26, 36, 37,

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  5159.677734375
mean_absolute_error =  54.05352
root_mean_squared_error =  71.830894
r2 =  -42.053898445349056


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,54.05352
r2,-42.0539
root_mean_squared_error,71.83089
val_loss,5159.67773


finished training


  logger.warn(


total reward = 123.0, actions list = [11, 1, 0, 10, 20, 21, 9, 19, 29, 39, 49, 48, 47, 42, 43, 53, 54, 44, 52, 51, 41, 15, 14, 13]
starting episode 81, epsilon = 0.5449999999999996
total reward = 117.0, actions list = [9, 19, 29, 39, 38, 48, 40, 30, 31, 21, 11, 1, 0, 34, 33, 32, 22, 23, 42, 52, 51, 49, 59, 58]
starting episode 82, epsilon = 0.5399999999999996
total reward = 109.0, actions list = [57, 56, 55, 45, 46, 47, 26, 25, 15, 16, 6, 5, 4, 1, 11, 12, 13, 3, 27, 37, 36, 9, 19, 18]
starting episode 83, epsilon = 0.5349999999999996
total reward = 136.0, actions list = [54, 53, 43, 42, 52, 51, 3, 2, 12, 11, 10, 20, 30, 9, 19, 18, 28, 29, 39, 49, 59, 26, 27, 17]
starting episode 84, epsilon = 0.5299999999999996
i got  -192
total reward = -139.0, actions list = [36, 26, 16, 15, 5, 4, 9, 8, 18, 17, 7, 6]
starting episode 85, epsilon = 0.5249999999999996
total reward = 121.0, actions list = [9, 19, 29, 39, 49, 59, 54, 53, 43, 42, 52, 51, 41, 26, 16, 15, 5, 4, 50, 40, 30, 45, 55, 56]
start

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  6251.3583984375
mean_absolute_error =  59.10254
root_mean_squared_error =  79.06553
r2 =  -44.0301628121251


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,59.10254
r2,-44.03016
root_mean_squared_error,79.06553
val_loss,6251.3584


finished training


  logger.warn(


i got  -192
total reward = -159.0, actions list = [9, 19, 18, 28, 38, 39, 29]
starting episode 101, epsilon = 0.4449999999999995
total reward = 117.0, actions list = [25, 24, 14, 15, 5, 4, 9, 8, 18, 19, 29, 39, 49, 0, 10, 11, 21, 22, 38, 28, 27, 52, 53, 54]
starting episode 102, epsilon = 0.4399999999999995
total reward = 132.0, actions list = [9, 19, 29, 39, 38, 48, 52, 51, 50, 40, 41, 42, 32, 49, 59, 58, 57, 56, 15, 5, 6, 16, 26, 36]
starting episode 103, epsilon = 0.4349999999999995
total reward = 125.0, actions list = [56, 57, 58, 48, 47, 46, 3, 13, 12, 11, 10, 0, 1, 9, 19, 29, 39, 49, 52, 42, 43, 15, 16, 26]
starting episode 104, epsilon = 0.4299999999999995
total reward = 132.0, actions list = [9, 8, 18, 19, 29, 39, 52, 51, 41, 40, 30, 31, 21, 59, 49, 48, 38, 37, 15, 5, 6, 42, 32, 22]
starting episode 105, epsilon = 0.4249999999999995
total reward = 137.0, actions list = [9, 8, 18, 19, 29, 39, 50, 51, 52, 42, 32, 31, 41, 59, 49, 48, 58, 57, 15, 14, 4, 5, 6, 16]
starting episode 1

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  4431.23681640625
mean_absolute_error =  48.74517
root_mean_squared_error =  66.56754
r2 =  -61.366593337051945


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,48.74517
r2,-61.36659
root_mean_squared_error,66.56754
val_loss,4431.23682


finished training


  logger.warn(


total reward = 124.0, actions list = [43, 44, 45, 46, 56, 55, 9, 19, 29, 39, 49, 59, 58, 52, 51, 50, 40, 41, 42, 32, 31, 26, 27, 37]
starting episode 121, epsilon = 0.3449999999999994
total reward = 127.0, actions list = [9, 19, 29, 39, 49, 48, 52, 51, 41, 42, 32, 33, 43, 15, 5, 6, 16, 26, 12, 13, 3, 47, 37, 27]
starting episode 122, epsilon = 0.3399999999999994
total reward = 133.0, actions list = [38, 28, 18, 19, 29, 39, 9, 8, 7, 17, 16, 6, 5, 52, 42, 32, 31, 30, 59, 49, 48, 51, 50, 40]
starting episode 123, epsilon = 0.3349999999999994
total reward = 125.0, actions list = [9, 8, 18, 19, 29, 28, 52, 51, 41, 42, 32, 33, 23, 13, 12, 11, 1, 2, 59, 49, 39, 15, 25, 26]
starting episode 124, epsilon = 0.3299999999999994
total reward = 136.0, actions list = [9, 8, 18, 19, 29, 39, 14, 15, 5, 6, 16, 26, 36, 10, 11, 12, 22, 32, 2, 3, 4, 52, 42, 41]
starting episode 125, epsilon = 0.3249999999999994
total reward = 138.0, actions list = [9, 8, 18, 19, 29, 39, 52, 51, 41, 31, 32, 42, 43, 59, 49, 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  5839.8271484375
mean_absolute_error =  51.47076
root_mean_squared_error =  76.41876
r2 =  -50.32803119746918


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,51.47076
r2,-50.32803
root_mean_squared_error,76.41876
val_loss,5839.82715


finished training


  logger.warn(


total reward = 133.0, actions list = [9, 19, 18, 28, 29, 39, 52, 51, 41, 40, 30, 31, 32, 38, 48, 49, 59, 58, 26, 27, 17, 15, 16, 6]
starting episode 141, epsilon = 0.24499999999999933
total reward = 138.0, actions list = [9, 19, 29, 39, 49, 59, 22, 32, 42, 52, 51, 41, 31, 15, 5, 6, 16, 26, 53, 43, 44, 56, 57, 58]
starting episode 142, epsilon = 0.23999999999999932
i got  -192
total reward = -74.0, actions list = [9, 19, 29, 39, 38, 48, 5, 15, 16, 26, 36, 37, 47, 42, 52, 53, 43, 44, 56, 57, 58, 59, 49]
starting episode 143, epsilon = 0.23499999999999932
total reward = 126.0, actions list = [9, 19, 29, 39, 38, 48, 52, 42, 43, 53, 54, 55, 56, 49, 59, 58, 57, 47, 15, 5, 6, 26, 25, 24]
starting episode 144, epsilon = 0.22999999999999932
total reward = 128.0, actions list = [9, 8, 18, 28, 29, 19, 52, 51, 50, 40, 41, 42, 32, 59, 58, 57, 56, 46, 49, 39, 38, 15, 5, 4]
starting episode 145, epsilon = 0.2249999999999993
total reward = 132.0, actions list = [9, 19, 29, 28, 27, 26, 42, 52, 53, 43, 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  5900.794921875
mean_absolute_error =  47.57679
root_mean_squared_error =  76.816635
r2 =  -101.94997324625841


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,47.57679
r2,-101.94997
root_mean_squared_error,76.81664
val_loss,5900.79492


finished training


  logger.warn(


i got  -192
total reward = -113.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 31, 32, 33, 43, 42]
starting episode 161, epsilon = 0.15
total reward = 137.0, actions list = [9, 19, 29, 28, 27, 26, 52, 42, 41, 51, 50, 40, 30, 59, 49, 39, 38, 48, 15, 16, 6, 5, 4, 3]
starting episode 162, epsilon = 0.15
total reward = 131.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 31, 21, 15, 16, 6, 5, 4, 26, 36, 37, 56, 57, 47]
starting episode 163, epsilon = 0.15
total reward = 131.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 31, 21, 15, 5, 6, 16, 26, 11, 10, 20, 56, 55, 45]
starting episode 164, epsilon = 0.15
total reward = 137.0, actions list = [9, 19, 18, 28, 29, 39, 42, 52, 53, 43, 33, 32, 31, 59, 49, 48, 58, 57, 15, 5, 6, 51, 41, 40]
starting episode 165, epsilon = 0.15
total reward = 139.0, actions list = [9, 8, 18, 19, 29, 28, 42, 52, 51, 41, 31, 32, 22, 26, 27, 37, 38, 39, 59, 49, 48, 15, 5, 6]
starting episode 166, epsilon = 0.15
total reward = 136.0,

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  8259.25
mean_absolute_error =  68.768394
root_mean_squared_error =  90.88042
r2 =  -79.50465792317242


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,68.76839
r2,-79.50466
root_mean_squared_error,90.88042
val_loss,8259.25


finished training


  logger.warn(


total reward = 124.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 31, 21, 2, 12, 11, 10, 0, 15, 5, 6, 34, 35, 45]
starting episode 181, epsilon = 0.15
total reward = 137.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 22, 23, 26, 27, 28, 18, 17, 15, 16, 6, 5, 4, 3]
starting episode 182, epsilon = 0.15
total reward = 126.0, actions list = [9, 19, 29, 39, 49, 48, 52, 42, 41, 51, 50, 40, 30, 15, 5, 6, 16, 26, 59, 58, 57, 56, 55, 45]
starting episode 183, epsilon = 0.15
total reward = 133.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 31, 30, 15, 5, 6, 16, 26, 56, 46, 47, 4, 3, 2]
starting episode 184, epsilon = 0.15
total reward = 142.0, actions list = [9, 19, 29, 39, 49, 59, 42, 52, 51, 41, 31, 32, 22, 15, 5, 6, 16, 26, 53, 43, 44, 48, 58, 57]
starting episode 185, epsilon = 0.15
total reward = 135.0, actions list = [9, 19, 29, 39, 49, 59, 42, 52, 53, 43, 44, 45, 55, 15, 16, 6, 5, 4, 13, 12, 11, 51, 41, 31]
starting episode 186, epsilon = 0.15


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  3706.081298828125
mean_absolute_error =  42.061497
root_mean_squared_error =  60.87759
r2 =  -35.55147048759867


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,42.0615
r2,-35.55147
root_mean_squared_error,60.87759
val_loss,3706.0813


finished training


# **PLOT RESULTS OF TRAINING**

In [26]:


trace1 = go.Scatter(x=[i for i in range(len(accumulate_reward_means))], y=accumulate_reward_means, mode='lines', name="mean_rewards")


trace2 = go.Scatter(x=[i for i in range(len(accumulate_worm_len_means))], y=accumulate_worm_len_means, mode='lines', name="mean_lengths")

fig1 = go.Figure(trace1)
fig2 = go.Figure(trace2)






# Aggiunta del layout (opzionale)
fig1.update_layout(title='reward results', xaxis_title='X', yaxis_title='Y')



# Aggiunta del layout (opzionale)
fig2.update_layout(title='episode lengths results', xaxis_title='X', yaxis_title='Y')

fig1.write_image("Plots/rewards_" + MODEL_VERSION + WORMHOLES + WORMS[:-1] + ".png")
fig1.write_image("Plots/ep_lengths_" + MODEL_VERSION + WORMHOLES + WORMS[:-1] + ".png")

# Visualizzazione del grafico
fig1.show()

fig2.show()