# **SETUP**

In [20]:

!pip install torch-geometric lightning wandb gymnasium
!pip install -U plotly
!pip install -U scikit-learn
!pip install -U kaleido
!pip install "notebook>=5.3" "ipywidgets>=7.5"
!mkdir Data
!mkdir Plots

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1
mkdir: cannot create directory ‘Data’: File exists
mkdir: cannot create directory ‘Plots’: File exists


In [2]:
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data
import lightning as L
import torch
import torch.nn as nn
import wandb as wndb
from torch_geometric.nn import GATConv
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import gymnasium as gym
from gymnasium.envs.registration import register
import pdb
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# **MODEL AND RELATED STUFF**

In [3]:
class GreedyWorm(nn.Module):

    def __init__(self):
        super(GreedyWorm, self).__init__()

    def forward(self, data):
        graphs, positions = data
        return [graphs[i].x[positions[i]] for i in range(len(positions))]


In [4]:

class WormUpExamplesDataset(Dataset):

    def __init__(self, graphs: list[Data], actions: list[int], rewards: list[int]):
        self.data = list(zip(graphs, actions, rewards))

    def __getitem__(self, idx: int):
        return self.data[idx]

    def __len__(self):
        return len(self.data)

    def collate(self, data: list):
        graphs = []
        actions = []
        rewards = []
        for el in data:
            graphs.append(el[0])
            actions.append(el[1])
            rewards.append(el[2])
        return graphs, actions, torch.tensor(rewards, dtype=torch.float)

    def get_dataloader(self, batch_size: int, shuffle: bool = False):
        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, collate_fn=self.collate)




In [5]:
class GraphNN(nn.Module):

    def __init__(self, in_size, out_size, h_size, deep, activation, device="cpu"):
        super(GraphNN, self).__init__()
        self.activation = activation
        if deep == 1:
            self.layers = [GATConv(in_size, out_size)]  #.to(device)]
        else:
            self.layers = [GATConv(in_size, h_size)]  #.to(device)]
            for _ in range(deep - 2):
                self.layers.append(GATConv(h_size, h_size))  #.to(device))
            self.layers.append(GATConv(h_size, out_size))  #.to(device))

    def forward(self, data):
        edge_index = data[1]
        x = data[0]
        for layer in self.layers[:-1]:
            x = self.activation(layer(x, edge_index))
        #breakpoint()
        return self.layers[-1](x, edge_index)



In [6]:
class LinearNN(nn.Module):
    def __init__(self, in_size, out_size, h_size, deep, activation):
        super(LinearNN, self).__init__()
        if deep == 1:
            layers = [nn.Linear(in_size, out_size), activation]
        else:
            layers = [nn.Linear(in_size, h_size), activation]
            for _ in range(deep - 2):
                layers.append(nn.Linear(h_size, h_size))
                layers.append(activation)
            layers.append(nn.Linear(h_size, out_size))
        self.linear = nn.Sequential(*layers)

    def forward(self, data):
        return self.linear(data)

In [7]:

class IntelligentWorm(L.LightningModule):

    def __init__(self, linear: nn.Module, gnn: nn.Module, lr: float = 1e-3):
        super(IntelligentWorm, self).__init__()
        self.encoder = gnn
        self.decoder = linear
        self.loss = nn.MSELoss()
        self.validation_predictions = []
        self.validation_targets = []
        self.validation_loss = []
        self.train_loss = []
        self.best_val_loss = 100000000
        self.best_mae = 100000000
        self.best_rmse = 1000000000
        self.best_r2 = -1
        self.best_model = 0
        self.lr = lr

    def update_best_stats(self, val_loss, mae, rmse, r2):
        self.best_val_loss = val_loss
        self.best_mae = mae
        self.best_rmse = rmse
        self.best_r2 = r2

    def forward(self, data):
        actions = data[1]
        graphs = data[0]
        embeddings = []
        for i in range(len(graphs)):
            g = graphs[i]
            x = self.encoder((g.x, g.edge_index))
            embeddings.append(x[actions[i]])
        embeddings = torch.stack(embeddings)
        pred = self.decoder(embeddings)
        return pred.squeeze()

    def training_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        predictions = self.forward((graphs, actions))
        train_loss = self.loss(predictions, rewards)
        self.train_loss.append(train_loss)

        return train_loss

    def validation_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        #breakpoint()
        self.validation_targets.append(rewards)
        predictions = self.forward((graphs, actions))
        validation_loss = self.loss(predictions, rewards)
        self.validation_predictions.append(predictions)
        self.validation_loss.append(validation_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer



In [8]:
class WormCallback(L.Callback):

    def on_train_epoch_end(self, trainer, pl_module):

        epoch_mean = float(torch.stack(pl_module.train_loss).mean())
        print("training_epoch_mean loss = ", epoch_mean)
        # free up the memory
        pl_module.train_loss.clear()
        try:
            wndb.log({"train_loss": epoch_mean})
        except:
            pass

    def on_validation_epoch_end(self, trainer, pl_module: IntelligentWorm):
        predictions = torch.stack(pl_module.validation_predictions).squeeze()
        targets = torch.stack(pl_module.validation_targets).squeeze()
        #breakpoint()
        r2 = r2_score(predictions, targets)
        mae = mean_absolute_error(predictions, targets)
        rmse = root_mean_squared_error(predictions, targets)
        mean_loss = float(torch.stack(pl_module.validation_loss).mean())

        pl_module.validation_loss.clear()
        pl_module.validation_predictions.clear()
        pl_module.validation_targets.clear()

        print("val_loss = ", mean_loss)
        print("mean_absolute_error = ", mae)
        print("root_mean_squared_error = ", rmse)
        print("r2 = ", r2)
        count = 0
        count += 1 if mean_loss < pl_module.best_val_loss else 0
        count += 1 if mae < pl_module.best_mae else 0
        count += 1 if rmse < pl_module.best_rmse else 0
        count += 1 if r2 > pl_module.best_r2 else 0
        if count >= 3 or count == 2 and mean_loss < pl_module.best_val_loss:
            pl_module.update_best_stats(mean_loss, mae, rmse, r2)
            pl_module.best_model -= 1
            pl_module.log("best_model", pl_module.best_model)
            gnn_state_dict = pl_module.encoder.state_dict()
            linear_state_dict = pl_module.decoder.state_dict()

            # Salva gli state dict su file
            torch.save(gnn_state_dict, "Model/gnn_checkpoint.pth")
            torch.save(linear_state_dict, "Model/linear_checkpoint.pth")
        else:
            pl_module.log("best_model", pl_module.best_model + 1)

        wndb.log({"val_loss": mean_loss, "mean_absolute_error": mae, "root_mean_squared_error": rmse, "r2": r2})




# **AGENT**

In [26]:
MODEL_VERSION = "model_V1_"
USE_SINGLE_WORM = False
USE_WORMHOLES = False

WORMHOLES = "wormholes_enabled_" if USE_WORMHOLES else "wormholes_disabled_"
WORMS = "single_worm_" if USE_SINGLE_WORM else "multi_worms_"



class WormsMasterAgent:

    def __init__(
            self,
            linear_part: nn.Module,
            gnn_part: nn.Module,
            initial_epsilon: float,
            epsilon_decay: float,
            final_epsilon: float,
            learning_rate: float = 1,
            discount_factor: float = 0.95,
            decay_after: int = 1,
            #trainer params
            batch_size: int = 8,
            episodes_for_batch: int = 20,
            trainer_deterministic: bool = True,
            trainer_max_epochs: int = 20,
            trainer_accelerator: str = "cpu"

    ):
        self.batch_size = batch_size
        self.episodes_for_batch = episodes_for_batch
        self.episode = 0
        self.linear = linear_part
        self.gnn = gnn_part
        self.learning_model = IntelligentWorm(linear_part, gnn_part)
        self.model = GreedyWorm()
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.decay_after = decay_after
        self.checkpoint_callback = ModelCheckpoint(dirpath="Model/", filename="worms_model", save_top_k=1,
                                                   mode='min', monitor='best_model')
        self.worm_callback = WormCallback()
        self.early_stopping_callback = EarlyStopping(monitor='best_model', mode='min', patience=3)
        self.trainer = L.Trainer(deterministic=trainer_deterministic,
                                 max_epochs=trainer_max_epochs, accelerator=trainer_accelerator,
                                 callbacks=[self.checkpoint_callback, self.worm_callback, self.early_stopping_callback])
        self.actual_rewards = np.array([])
        self.actual_observations = []
        self.actual_actions = []
        self.model_training_data = {
            "actions": [],
            "observations": [],
            "rewards": []
        }

    def get_action(self, observation, available_actions: list[int]) -> int:
        graph = observation["field"]
        worms = observation["worms"]

        self.actual_rewards = np.append(self.actual_rewards, 0)
        # with probability epsilon return a random action to explore the environment
        if np.random.random() < self.epsilon:
            i = np.random.randint(0, high=len(available_actions))
            return available_actions[i]

        # with probability (1 - epsilon) act greedily (exploit)
        else:
            self.model.eval()
            with torch.no_grad():
                idx = np.argmax(self.model(([graph] * len(available_actions), available_actions)))
                return available_actions[idx]

    def update(self, observation, action: int, reward: int, terminated: bool):
        #breakpoint()
        graph = observation["field"]
        worms = observation["worms"]
        self.actual_observations.append(graph)
        self.actual_actions.append(action)
        self.actual_rewards += self.lr * reward
        episode_reward = 0.0
        len_worms_placed = 0.0
        if terminated:
            episode_reward = float(self.actual_rewards[0])
            len_worms_placed = len(self.actual_actions)
            print(f"total reward = {self.actual_rewards[0]}, actions list = {self.actual_actions}")
            self.episode += 1
            self.model_training_data["actions"] += self.actual_actions.copy()
            self.actual_actions = []
            self.model_training_data["observations"] += self.actual_observations.copy()
            self.actual_observations = []
            self.model_training_data["rewards"] += self.actual_rewards.tolist()
            self.actual_rewards = np.array([])
            if self.episode % self.decay_after == 0:
                self.decay_epsilon()
            print(f"starting episode {self.episode}, epsilon = {self.epsilon}")
            if self.episode % self.episodes_for_batch == 0:
                self.train_model()
                self.model = self.learning_model
        return episode_reward, len_worms_placed

    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)

    def prepare_data(self):
        #breakpoint()
        print("preparing data to train")
        x = list(zip(self.model_training_data["observations"], self.model_training_data["actions"]))
        y = self.model_training_data["rewards"]
        x_train, x_val, train_rewards, val_rewards = train_test_split(x, y, test_size=0.2)
        del x, y
        train_graphs = []
        train_actions = []
        for el in x_train:
            train_graphs.append(el[0])
            train_actions.append(el[1])
        val_graphs = []
        val_actions = []
        for el in x_val:
            val_graphs.append(el[0])
            val_actions.append(el[1])
        train_dataset = WormUpExamplesDataset(train_graphs, train_actions, train_rewards)
        val_dataset = WormUpExamplesDataset(val_graphs, val_actions, val_rewards)
        train_dataloader = train_dataset.get_dataloader(self.batch_size, shuffle=True)
        val_dataloader = val_dataset.get_dataloader(self.batch_size, shuffle=False)
        return train_dataloader, val_dataloader

    def train_model(self):
        print("begin training")
        train_dataloader, val_dataloader = self.prepare_data()
        wndb.init(
            # set the wandb project where this run will be logged
            project="WormsWarmingUp",
            name= MODEL_VERSION + WORMHOLES + WORMS +  "training_ep_" + str(self.episode),
            # track hyperparameters and run metadata
            config={
                "learning_rate": self.learning_model.lr,
                "architecture": str(self.learning_model),
                "batch": self.episode // self.episodes_for_batch
            }
        )
        self.trainer.fit(self.learning_model, train_dataloader, val_dataloader)
        wndb.finish()
        gnn_state_dict = torch.load("Model/gnn_checkpoint.pth")
        linear_state_dict = torch.load("Model/linear_checkpoint.pth")
        self.linear.load_state_dict(linear_state_dict)
        self.gnn.load_state_dict(gnn_state_dict)
        self.learning_model = IntelligentWorm.load_from_checkpoint(checkpoint_path="Model/worms_model.ckpt", linear=self.linear, gnn=self.gnn)
        print("finished training")


# **INITIALIZE MODEL, AGENT AND ENVIRONMENT**

In [11]:

# device = "gpu" if
gnn_part = GraphNN(1,64,0,1,nn.ReLU())
linear_part = LinearNN(64,1,128,2, nn.ReLU())




In [12]:

agent = WormsMasterAgent(linear_part, gnn_part,0.95,0.005,0.15)


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [13]:
TABLE="00-example.txt"

register(
    id="worms_env",
    entry_point="worms_env:WormsEnv",
    max_episode_steps=300,
)
environment = gym.make('worms_env', env_file="Data/" + TABLE, render_mode="human", use_single_worm=USE_SINGLE_WORM, enable_wormholes=USE_WORMHOLES)



In [14]:
wndb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [15]:
!rm -r Model
!mkdir Model

rm: cannot remove 'Model': No such file or directory


In [16]:
def compute_mean_rewards_to_print(rewards):
  for x in rewards:
    if x < 0:
      return 0
  return sum(rewards) / len(rewards)


# **TRAINING**

In [17]:
MAX_EPISODES = 200

ACCUMULATE = 5
accumulate_reward_means = []
accumulate_worm_len_means = []
last_rewards = [0 for _ in range(ACCUMULATE)]
last_len_worms_placed = [0 for _ in range(ACCUMULATE)]
done = False
obs, info = environment.reset()
print(f"starting episode {agent.episode}, epsilon = {agent.epsilon}")
# play one episode
while agent.episode < MAX_EPISODES:

    action = agent.get_action(info, environment.available_movements)
    obs, reward, terminated, truncated, info = environment.step(action)
    episode_reward, len_worms_placed = agent.update(info, action, reward, terminated)

    if terminated:
        obs, info = environment.reset()
        last_rewards[agent.episode % ACCUMULATE] = episode_reward
        last_len_worms_placed[agent.episode % ACCUMULATE] = len_worms_placed
        if agent.episode > ACCUMULATE:

          mean_reward = compute_mean_rewards_to_print(last_rewards)
          mean_len_worms_placed = sum(last_len_worms_placed) / len(last_len_worms_placed)
          accumulate_reward_means.append(mean_reward)
          accumulate_worm_len_means.append(mean_len_worms_placed)



  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


starting episode 0, epsilon = 0.95


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


total reward = 32.0, actions list = [55, 54, 53, 52, 51, 50]
starting episode 1, epsilon = 0.945
total reward = 26.0, actions list = [59, 58, 57, 56, 46, 36]
starting episode 2, epsilon = 0.94
total reward = 31.0, actions list = [17, 18, 19, 29, 28, 38]
starting episode 3, epsilon = 0.9349999999999999
total reward = 23.0, actions list = [17, 27, 28, 29, 30, 20]
starting episode 4, epsilon = 0.9299999999999999
total reward = 22.0, actions list = [26, 25, 24, 23, 22, 21]
starting episode 5, epsilon = 0.9249999999999999
total reward = 28.0, actions list = [25, 24, 14, 4, 5, 15]
starting episode 6, epsilon = 0.9199999999999999
total reward = 26.0, actions list = [2, 3, 4, 14, 24, 25]
starting episode 7, epsilon = 0.9149999999999999
total reward = 21.0, actions list = [0, 1, 11, 12, 22, 32]
starting episode 8, epsilon = 0.9099999999999999
total reward = 31.0, actions list = [22, 32, 42, 43, 53, 52]
starting episode 9, epsilon = 0.9049999999999999
total reward = 33.0, actions list = [21, 11,

[34m[1mwandb[0m: Currently logged in as: [33mmonteleone-1883922[0m ([33mmonteleone[0m). Use [1m`wandb login --relogin`[0m to force relogin


total reward = 26.0, actions list = [47, 37, 38, 39, 40, 41]
starting episode 20, epsilon = 0.8499999999999999
begin training
preparing data to train


INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  412879.625
mean_absolute_error =  178.89937
root_mean_squared_error =  244.49265
r2 =  -100656.20048147168


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (12) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  2702.4580078125
mean_absolute_error =  27.022497
root_mean_squared_error =  33.54637
r2 =  -1615.356282840229
training_epoch_mean loss =  175910.75


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  745918.5
mean_absolute_error =  192.0932
root_mean_squared_error =  320.41644
r2 =  -302.435257514152
training_epoch_mean loss =  166868.171875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  683194.0625
mean_absolute_error =  182.95837
root_mean_squared_error =  305.92355
r2 =  -88.2199047934508
training_epoch_mean loss =  410780.625


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  28000.654296875
mean_absolute_error =  46.5879
root_mean_squared_error =  71.43642
r2 =  -33.411299364325195
training_epoch_mean loss =  200970.4375


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▇▁██▂
r2,▁████
root_mean_squared_error,▆▁██▂
train_loss,▁▁█▂
val_loss,▅▁█▇▁

0,1
mean_absolute_error,46.5879
r2,-33.4113
root_mean_squared_error,71.43642
train_loss,200970.4375
val_loss,28000.6543


finished training


  logger.warn(


total reward = 24.0, actions list = [5, 15, 14, 24, 23, 22]
starting episode 21, epsilon = 0.8449999999999999
total reward = 28.0, actions list = [46, 56, 55, 54, 53, 52]
starting episode 22, epsilon = 0.8399999999999999
total reward = 32.0, actions list = [15, 16, 6, 5, 4, 3]
starting episode 23, epsilon = 0.8349999999999999
total reward = 33.0, actions list = [9, 10, 11, 21, 22, 32]
starting episode 24, epsilon = 0.8299999999999998
total reward = 29.0, actions list = [36, 26, 25, 15, 5, 4]
starting episode 25, epsilon = 0.8249999999999998
total reward = 36.0, actions list = [9, 8, 18, 19, 29, 28]
starting episode 26, epsilon = 0.8199999999999998
total reward = 35.0, actions list = [29, 19, 9, 10, 0, 1]
starting episode 27, epsilon = 0.8149999999999998
total reward = 23.0, actions list = [37, 27, 28, 29, 30, 31]
starting episode 28, epsilon = 0.8099999999999998
total reward = 31.0, actions list = [8, 18, 17, 16, 15, 5]
starting episode 29, epsilon = 0.8049999999999998
total reward = 1

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (24) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


val_loss =  3632.458251953125
mean_absolute_error =  29.63372
root_mean_squared_error =  35.85539
r2 =  -5744.519358691892


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,29.63372
r2,-5744.51936
root_mean_squared_error,35.85539
val_loss,3632.45825


finished training


  logger.warn(


total reward = 29.0, actions list = [18, 28, 29, 39, 38, 48]
starting episode 41, epsilon = 0.7449999999999998
total reward = 31.0, actions list = [21, 31, 30, 20, 19, 9]
starting episode 42, epsilon = 0.7399999999999998
total reward = 32.0, actions list = [9, 10, 20, 21, 31, 41]
starting episode 43, epsilon = 0.7349999999999998
total reward = 39.0, actions list = [31, 41, 51, 52, 42, 32]
starting episode 44, epsilon = 0.7299999999999998
total reward = 17.0, actions list = [23, 22, 12, 2, 3, 4]
starting episode 45, epsilon = 0.7249999999999998
total reward = 26.0, actions list = [7, 17, 16, 26, 36, 35]
starting episode 46, epsilon = 0.7199999999999998
total reward = 31.0, actions list = [5, 6, 16, 15, 14, 4]
starting episode 47, epsilon = 0.7149999999999997
total reward = 31.0, actions list = [5, 15, 16, 6, 7, 8]
starting episode 48, epsilon = 0.7099999999999997
total reward = 23.0, actions list = [7, 17, 18, 19, 20, 30]
starting episode 49, epsilon = 0.7049999999999997
total reward = 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  264.076904296875
mean_absolute_error =  14.111439
root_mean_squared_error =  14.967973
r2 =  -65255.70510814085


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,14.11144
r2,-65255.70511
root_mean_squared_error,14.96797
val_loss,264.0769


finished training


  logger.warn(


total reward = 36.0, actions list = [9, 19, 29, 28, 18, 8]
starting episode 61, epsilon = 0.6449999999999997
total reward = 39.0, actions list = [38, 39, 29, 19, 9, 10]
starting episode 62, epsilon = 0.6399999999999997
total reward = 32.0, actions list = [20, 30, 29, 39, 49, 59]
starting episode 63, epsilon = 0.6349999999999997
total reward = 29.0, actions list = [40, 50, 51, 52, 53, 54]
starting episode 64, epsilon = 0.6299999999999997
total reward = 33.0, actions list = [41, 42, 32, 31, 21, 22]
starting episode 65, epsilon = 0.6249999999999997
total reward = 25.0, actions list = [27, 26, 16, 15, 5, 4]
starting episode 66, epsilon = 0.6199999999999997
total reward = 33.0, actions list = [15, 5, 6, 16, 26, 25]
starting episode 67, epsilon = 0.6149999999999997
total reward = 23.0, actions list = [15, 5, 4, 3, 2, 12]
starting episode 68, epsilon = 0.6099999999999997
total reward = 34.0, actions list = [9, 19, 20, 21, 31, 41]
starting episode 69, epsilon = 0.6049999999999996
total reward 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  7514.81005859375
mean_absolute_error =  43.481796
root_mean_squared_error =  55.005634
r2 =  -1740.2934306589946


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,43.4818
r2,-1740.29343
root_mean_squared_error,55.00563
val_loss,7514.81006


finished training


  logger.warn(


total reward = 30.0, actions list = [9, 8, 18, 19, 20, 30]
starting episode 81, epsilon = 0.5449999999999996
total reward = 33.0, actions list = [9, 19, 18, 28, 29, 30]
starting episode 82, epsilon = 0.5399999999999996
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 83, epsilon = 0.5349999999999996
total reward = 31.0, actions list = [29, 39, 40, 41, 31, 21]
starting episode 84, epsilon = 0.5299999999999996
total reward = 39.0, actions list = [31, 32, 42, 52, 51, 41]
starting episode 85, epsilon = 0.5249999999999996
total reward = 30.0, actions list = [9, 8, 18, 19, 20, 30]
starting episode 86, epsilon = 0.5199999999999996
total reward = 34.0, actions list = [11, 21, 31, 41, 51, 52]
starting episode 87, epsilon = 0.5149999999999996
total reward = 31.0, actions list = [52, 51, 41, 42, 43, 33]
starting episode 88, epsilon = 0.5099999999999996
total reward = 30.0, actions list = [9, 10, 11, 21, 31, 30]
starting episode 89, epsilon = 0.5049999999999996
total re

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  4084.673583984375
mean_absolute_error =  29.847843
root_mean_squared_error =  34.975685
r2 =  -1584.571468720732


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,29.84784
r2,-1584.57147
root_mean_squared_error,34.97569
val_loss,4084.67358


finished training


  logger.warn(


total reward = 32.0, actions list = [11, 1, 0, 10, 9, 19]
starting episode 101, epsilon = 0.4449999999999995
total reward = 34.0, actions list = [49, 39, 40, 30, 29, 19]
starting episode 102, epsilon = 0.4399999999999995
total reward = 33.0, actions list = [38, 39, 49, 59, 58, 48]
starting episode 103, epsilon = 0.4349999999999995
total reward = 26.0, actions list = [9, 10, 11, 1, 2, 12]
starting episode 104, epsilon = 0.4299999999999995
total reward = 25.0, actions list = [56, 46, 47, 57, 58, 59]
starting episode 105, epsilon = 0.4249999999999995
total reward = 34.0, actions list = [31, 41, 51, 50, 49, 39]
starting episode 106, epsilon = 0.4199999999999995
total reward = 33.0, actions list = [6, 5, 15, 25, 26, 16]
starting episode 107, epsilon = 0.4149999999999995
total reward = 36.0, actions list = [9, 19, 18, 28, 29, 39]
starting episode 108, epsilon = 0.4099999999999995
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 109, epsilon = 0.40499999999999947
t

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  3779.802001953125
mean_absolute_error =  32.696037
root_mean_squared_error =  38.961662
r2 =  -3734.0614738884474


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,32.69604
r2,-3734.06147
root_mean_squared_error,38.96166
val_loss,3779.802


finished training


  logger.warn(


total reward = 33.0, actions list = [9, 19, 29, 30, 20, 21]
starting episode 121, epsilon = 0.3449999999999994
total reward = 35.0, actions list = [9, 19, 29, 28, 18, 17]
starting episode 122, epsilon = 0.3399999999999994
total reward = 35.0, actions list = [9, 19, 29, 30, 40, 41]
starting episode 123, epsilon = 0.3349999999999994
total reward = 35.0, actions list = [57, 58, 59, 49, 39, 29]
starting episode 124, epsilon = 0.3299999999999994
total reward = 33.0, actions list = [9, 19, 20, 10, 11, 21]
starting episode 125, epsilon = 0.3249999999999994
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 126, epsilon = 0.3199999999999994
total reward = 39.0, actions list = [9, 19, 29, 39, 49, 50]
starting episode 127, epsilon = 0.3149999999999994
total reward = 34.0, actions list = [9, 19, 20, 21, 31, 41]
starting episode 128, epsilon = 0.3099999999999994
total reward = 31.0, actions list = [18, 19, 9, 8, 7, 6]
starting episode 129, epsilon = 0.3049999999999994
tot

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  377.06097412109375
mean_absolute_error =  16.248169
root_mean_squared_error =  17.273169
r2 =  -1138.62161559873


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,16.24817
r2,-1138.62162
root_mean_squared_error,17.27317
val_loss,377.06097


finished training


  logger.warn(


total reward = 33.0, actions list = [9, 19, 20, 30, 31, 41]
starting episode 141, epsilon = 0.24499999999999933
total reward = 37.0, actions list = [9, 19, 29, 30, 31, 41]
starting episode 142, epsilon = 0.23999999999999932
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 143, epsilon = 0.23499999999999932
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 144, epsilon = 0.22999999999999932
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 145, epsilon = 0.2249999999999993
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 146, epsilon = 0.2199999999999993
total reward = 34.0, actions list = [9, 19, 20, 21, 31, 41]
starting episode 147, epsilon = 0.2149999999999993
total reward = 37.0, actions list = [21, 31, 41, 51, 52, 42]
starting episode 148, epsilon = 0.2099999999999993
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 149, epsilon = 0.2049999999999

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  3856.5302734375
mean_absolute_error =  33.395832
root_mean_squared_error =  39.275116
r2 =  -10832836.964217167


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,33.39583
r2,-10832836.96422
root_mean_squared_error,39.27512
val_loss,3856.53027


finished training


  logger.warn(


total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 161, epsilon = 0.15
total reward = 23.0, actions list = [12, 2, 3, 4, 5, 15]
starting episode 162, epsilon = 0.15
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 163, epsilon = 0.15
total reward = 33.0, actions list = [25, 15, 5, 6, 16, 26]
starting episode 164, epsilon = 0.15
total reward = 32.0, actions list = [9, 19, 29, 30, 40, 50]
starting episode 165, epsilon = 0.15
total reward = 31.0, actions list = [14, 15, 16, 6, 5, 4]
starting episode 166, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 167, epsilon = 0.15
total reward = 37.0, actions list = [9, 8, 18, 19, 29, 39]
starting episode 168, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 169, epsilon = 0.15
total reward = 37.0, actions list = [9, 19, 29, 30, 31, 41]
starting episode 170, epsilon = 0.15
total reward = 42.0, actions list = [

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  432.0791015625
mean_absolute_error =  16.494253
root_mean_squared_error =  17.981972
r2 =  -15677.913700687463


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,16.49425
r2,-15677.9137
root_mean_squared_error,17.98197
val_loss,432.0791


finished training


  logger.warn(


total reward = 36.0, actions list = [9, 19, 18, 28, 29, 39]
starting episode 181, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 182, epsilon = 0.15
total reward = 36.0, actions list = [9, 19, 18, 28, 29, 39]
starting episode 183, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 184, epsilon = 0.15
total reward = 38.0, actions list = [9, 19, 29, 39, 38, 48]
starting episode 185, epsilon = 0.15
total reward = 31.0, actions list = [9, 8, 7, 6, 16, 15]
starting episode 186, epsilon = 0.15
total reward = 29.0, actions list = [7, 6, 5, 15, 16, 26]
starting episode 187, epsilon = 0.15
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 188, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 189, epsilon = 0.15
total reward = 35.0, actions list = [9, 19, 29, 39, 40, 30]
starting episode 190, epsilon = 0.15
total reward = 42.0, actions list =

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  11364.267578125
mean_absolute_error =  61.358852
root_mean_squared_error =  66.21193
r2 =  -56276679426.898636


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,61.35885
r2,-56276679426.89864
root_mean_squared_error,66.21193
val_loss,11364.26758


finished training


# **PLOT RESULTS OF TRAINING**

In [25]:


trace1 = go.Scatter(x=[i for i in range(len(accumulate_reward_means))], y=accumulate_reward_means, mode='lines', name="mean_rewards")


trace2 = go.Scatter(x=[i for i in range(len(accumulate_worm_len_means))], y=accumulate_worm_len_means, mode='lines', name="mean_lengths")

fig1 = go.Figure(trace1)
fig2 = go.Figure(trace2)


# Aggiunta del layout (opzionale)
fig1.update_layout(title='reward results', xaxis_title='X', yaxis_title='Y')

# Aggiunta del layout (opzionale)
fig2.update_layout(title='episode lengths results', xaxis_title='X', yaxis_title='Y')
# Visualizzazione del grafico
fig1.show()

fig2.show()