# **SETUP**

In [1]:

!pip install torch-geometric lightning wandb gymnasium plotly
!pip install -U scikit-learn
!pip install "notebook>=5.3" "ipywidgets>=7.5"




In [2]:
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data
import lightning as L
import torch
import torch.nn as nn
import wandb as wndb
from torch_geometric.nn import GATConv
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import gymnasium as gym
from gymnasium.envs.registration import register
import pdb
import plotly.graph_objects as go
import plotly.express as px

# **MODEL AND RELATED STUFF**

In [3]:
class GreedyWorm(nn.Module):

    def __init__(self):
        super(GreedyWorm, self).__init__()

    def forward(self, data):
        graphs, positions = data
        return [graphs[i].x[positions[i]] for i in range(len(positions))]


In [4]:

class WormUpExamplesDataset(Dataset):

    def __init__(self, graphs: list[Data], actions: list[int], rewards: list[int]):
        self.data = list(zip(graphs, actions, rewards))

    def __getitem__(self, idx: int):
        return self.data[idx]

    def __len__(self):
        return len(self.data)

    def collate(self, data: list):
        graphs = []
        actions = []
        rewards = []
        for el in data:
            graphs.append(el[0])
            actions.append(el[1])
            rewards.append(el[2])
        return graphs, actions, torch.tensor(rewards, dtype=torch.float)

    def get_dataloader(self, batch_size: int, shuffle: bool = False):
        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, collate_fn=self.collate)




In [5]:
class GraphNN(nn.Module):

    def __init__(self, in_size, out_size, h_size, deep, activation, device="cpu"):
        super(GraphNN, self).__init__()
        self.activation = activation
        if deep == 1:
            self.layers = [GATConv(in_size, out_size)]  #.to(device)]
        else:
            self.layers = [GATConv(in_size, h_size)]  #.to(device)]
            for _ in range(deep - 2):
                self.layers.append(GATConv(h_size, h_size))  #.to(device))
            self.layers.append(GATConv(h_size, out_size))  #.to(device))

    def forward(self, data):
        edge_index = data[1]
        x = data[0]
        for layer in self.layers[:-1]:
            x = self.activation(layer(x, edge_index))
        #breakpoint()
        return self.layers[-1](x, edge_index)



In [6]:
class LinearNN(nn.Module):
    def __init__(self, in_size, out_size, h_size, deep, activation):
        super(LinearNN, self).__init__()
        if deep == 1:
            layers = [nn.Linear(in_size, out_size), activation]
        else:
            layers = [nn.Linear(in_size, h_size), activation]
            for _ in range(deep - 2):
                layers.append(nn.Linear(h_size, h_size))
                layers.append(activation)
            layers.append(nn.Linear(h_size, out_size))
        self.linear = nn.Sequential(*layers)

    def forward(self, data):
        return self.linear(data)

In [7]:

class IntelligentWorm(L.LightningModule):

    def __init__(self, linear: nn.Module, gnn: nn.Module, lr: float = 1e-3):
        super(IntelligentWorm, self).__init__()
        self.encoder = gnn
        self.decoder = linear
        self.loss = nn.MSELoss()
        self.validation_predictions = []
        self.validation_targets = []
        self.validation_loss = []
        self.train_loss = []
        self.best_val_loss = 100000000
        self.best_mae = 100000000
        self.best_rmse = 1000000000
        self.best_r2 = -1
        self.best_model = 0
        self.lr = lr

    def update_best_stats(self, val_loss, mae, rmse, r2):
        self.best_val_loss = val_loss
        self.best_mae = mae
        self.best_rmse = rmse
        self.best_r2 = r2

    def forward(self, data):
        actions = data[1]
        graphs = data[0]
        embeddings = []
        for i in range(len(graphs)):
            g = graphs[i]
            x = self.encoder((g.x, g.edge_index))
            embeddings.append(x[actions[i]])
        embeddings = torch.stack(embeddings)
        pred = self.decoder(embeddings)
        return pred.squeeze()

    def training_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        predictions = self.forward((graphs, actions))
        train_loss = self.loss(predictions, rewards)
        self.train_loss.append(train_loss)

        return train_loss

    def validation_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        #breakpoint()
        self.validation_targets.append(rewards)
        predictions = self.forward((graphs, actions))
        validation_loss = self.loss(predictions, rewards)
        self.validation_predictions.append(predictions)
        self.validation_loss.append(validation_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer



In [8]:
class WormCallback(L.Callback):

    def on_train_epoch_end(self, trainer, pl_module):

        epoch_mean = float(torch.stack(pl_module.train_loss).mean())
        print("training_epoch_mean loss = ", epoch_mean)
        # free up the memory
        pl_module.train_loss.clear()
        try:
            wndb.log({"train_loss": epoch_mean})
        except:
            pass

    def on_validation_epoch_end(self, trainer, pl_module: IntelligentWorm):
        predictions = torch.stack(pl_module.validation_predictions).squeeze()
        targets = torch.stack(pl_module.validation_targets).squeeze()
        #breakpoint()
        r2 = r2_score(predictions, targets)
        mae = mean_absolute_error(predictions, targets)
        rmse = root_mean_squared_error(predictions, targets)
        mean_loss = float(torch.stack(pl_module.validation_loss).mean())

        pl_module.validation_loss.clear()
        pl_module.validation_predictions.clear()
        pl_module.validation_targets.clear()

        print("val_loss = ", mean_loss)
        print("mean_absolute_error = ", mae)
        print("root_mean_squared_error = ", rmse)
        print("r2 = ", r2)
        count = 0
        count += 1 if mean_loss < pl_module.best_val_loss else 0
        count += 1 if mae < pl_module.best_mae else 0
        count += 1 if rmse < pl_module.best_rmse else 0
        count += 1 if r2 > pl_module.best_r2 else 0
        if count >= 3 or count == 2 and mean_loss < pl_module.best_val_loss:
            pl_module.update_best_stats(mean_loss, mae, rmse, r2)
            pl_module.best_model -= 1
            pl_module.log("best_model", pl_module.best_model)
            gnn_state_dict = pl_module.encoder.state_dict()
            linear_state_dict = pl_module.decoder.state_dict()

            # Salva gli state dict su file
            torch.save(gnn_state_dict, "Model/gnn_checkpoint.pth")
            torch.save(linear_state_dict, "Model/linear_checkpoint.pth")
        else:
            pl_module.log("best_model", pl_module.best_model + 1)

        wndb.log({"val_loss": mean_loss, "mean_absolute_error": mae, "root_mean_squared_error": rmse, "r2": r2})




# **AGENT**

In [9]:


class WormsMasterAgent:

    def __init__(
            self,
            linear_part: nn.Module,
            gnn_part: nn.Module,
            initial_epsilon: float,
            epsilon_decay: float,
            final_epsilon: float,
            learning_rate: float = 1,
            discount_factor: float = 0.95,
            decay_after: int = 1,
            #trainer params
            batch_size: int = 8,
            episodes_for_batch: int = 20,
            trainer_deterministic: bool = True,
            trainer_max_epochs: int = 20,
            trainer_accelerator: str = "cpu"

    ):
        self.batch_size = batch_size
        self.episodes_for_batch = episodes_for_batch
        self.episode = 0
        self.linear = linear_part
        self.gnn = gnn_part
        self.learning_model = IntelligentWorm(linear_part, gnn_part)
        self.model = GreedyWorm()
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.decay_after = decay_after
        self.checkpoint_callback = ModelCheckpoint(dirpath="Model/", filename="worms_model", save_top_k=1,
                                                   mode='min', monitor='best_model')
        self.worm_callback = WormCallback()
        self.early_stopping_callback = EarlyStopping(monitor='best_model', mode='min', patience=3)
        self.trainer = L.Trainer(deterministic=trainer_deterministic,
                                 max_epochs=trainer_max_epochs, accelerator=trainer_accelerator,
                                 callbacks=[self.checkpoint_callback, self.worm_callback, self.early_stopping_callback])
        self.actual_rewards = np.array([])
        self.actual_observations = []
        self.actual_actions = []
        self.model_training_data = {
            "actions": [],
            "observations": [],
            "rewards": []
        }

    def get_action(self, observation, available_actions: list[int]) -> int:
        graph = observation["field"]
        worms = observation["worms"]

        self.actual_rewards = np.append(self.actual_rewards, 0)
        # with probability epsilon return a random action to explore the environment
        if np.random.random() < self.epsilon:
            i = np.random.randint(0, high=len(available_actions))
            return available_actions[i]

        # with probability (1 - epsilon) act greedily (exploit)
        else:
            self.model.eval()
            with torch.no_grad():
                idx = np.argmax(self.model(([graph] * len(available_actions), available_actions)))
                return available_actions[idx]

    def update(self, observation, action: int, reward: int, terminated: bool):
        #breakpoint()
        graph = observation["field"]
        worms = observation["worms"]
        self.actual_observations.append(graph)
        self.actual_actions.append(action)
        self.actual_rewards += self.lr * reward
        episode_reward = 0.0
        len_worms_placed = 0.0
        if terminated:
            episode_reward = float(self.actual_rewards[0])
            len_worms_placed = len(self.actual_actions)
            print(f"total reward = {self.actual_rewards[0]}, actions list = {self.actual_actions}")
            self.episode += 1
            self.model_training_data["actions"] += self.actual_actions.copy()
            self.actual_actions = []
            self.model_training_data["observations"] += self.actual_observations.copy()
            self.actual_observations = []
            self.model_training_data["rewards"] += self.actual_rewards.tolist()
            self.actual_rewards = np.array([])
            if self.episode % self.decay_after == 0:
                self.decay_epsilon()
            print(f"starting episode {self.episode}, epsilon = {self.epsilon}")
            if self.episode % self.episodes_for_batch == 0:
                self.train_model()
                self.model = self.learning_model
        return episode_reward, len_worms_placed

    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)

    def prepare_data(self):
        #breakpoint()
        print("preparing data to train")
        x = list(zip(self.model_training_data["observations"], self.model_training_data["actions"]))
        y = self.model_training_data["rewards"]
        x_train, x_val, train_rewards, val_rewards = train_test_split(x, y, test_size=0.2)
        del x, y
        train_graphs = []
        train_actions = []
        for el in x_train:
            train_graphs.append(el[0])
            train_actions.append(el[1])
        val_graphs = []
        val_actions = []
        for el in x_val:
            val_graphs.append(el[0])
            val_actions.append(el[1])
        train_dataset = WormUpExamplesDataset(train_graphs, train_actions, train_rewards)
        val_dataset = WormUpExamplesDataset(val_graphs, val_actions, val_rewards)
        train_dataloader = train_dataset.get_dataloader(self.batch_size, shuffle=True)
        val_dataloader = val_dataset.get_dataloader(self.batch_size, shuffle=False)
        return train_dataloader, val_dataloader

    def train_model(self):
        print("begin training")
        train_dataloader, val_dataloader = self.prepare_data()
        wndb.init(
            # set the wandb project where this run will be logged
            project="WormsWarmingUp",

            # track hyperparameters and run metadata
            config={
                "learning_rate": self.learning_model.lr,
                "architecture": str(self.learning_model),
                "batch": self.episode // self.episodes_for_batch
            }
        )
        self.trainer.fit(self.learning_model, train_dataloader, val_dataloader)
        wndb.finish()
        gnn_state_dict = torch.load("Model/gnn_checkpoint.pth")
        linear_state_dict = torch.load("Model/linear_checkpoint.pth")
        self.linear.load_state_dict(linear_state_dict)
        self.gnn.load_state_dict(gnn_state_dict)
        self.learning_model = IntelligentWorm.load_from_checkpoint(checkpoint_path="Model/worms_model.ckpt", linear=self.linear, gnn=self.gnn)
        print("finished training")


# **INITIALIZE MODEL, AGENT AND ENVIRONMENT**

In [10]:

# device = "gpu" if
gnn_part = GraphNN(1,64,0,1,nn.ReLU())
linear_part = LinearNN(64,1,128,2, nn.ReLU())




In [11]:

agent = WormsMasterAgent(linear_part, gnn_part,0.95,0.005,0.15)


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [12]:

register(
    id="worms_env",
    entry_point="worms_env:WormsEnv",
    max_episode_steps=300,
)
environment = gym.make('worms_env', env_file="Data/00-example.txt", render_mode="human")



In [13]:
wndb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmonteleone-1883922[0m ([33mmonteleone[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [14]:
!rm -r Model
!mkdir Model

In [15]:
def compute_mean_rewards_to_print(rewards):
  for x in rewards:
    if x < 0:
      return 0
  return sum(rewards) / len(rewards)


# **TRAINING**

In [16]:
MAX_EPISODES = 200

ACCUMULATE = 5
accumulate_reward_means = []
accumulate_worm_len_means = []
last_rewards = [0 for _ in range(ACCUMULATE)]
last_len_worms_placed = [0 for _ in range(ACCUMULATE)]
done = False
obs, info = environment.reset()
print(f"starting episode {agent.episode}, epsilon = {agent.epsilon}")
# play one episode
while agent.episode < MAX_EPISODES:

    action = agent.get_action(info, environment.available_movements)
    obs, reward, terminated, truncated, info = environment.step(action)
    episode_reward, len_worms_placed = agent.update(info, action, reward, terminated)

    if terminated:
        obs, info = environment.reset()
        last_rewards[agent.episode % ACCUMULATE] = episode_reward
        last_len_worms_placed[agent.episode % ACCUMULATE] = len_worms_placed
        if agent.episode > ACCUMULATE:

          mean_reward = compute_mean_rewards_to_print(last_rewards)
          mean_len_worms_placed = sum(last_len_worms_placed) / len(last_len_worms_placed)
          accumulate_reward_means.append(mean_reward)
          accumulate_worm_len_means.append(mean_len_worms_placed)



  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


starting episode 0, epsilon = 0.95


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


total reward = 29.0, actions list = [49, 48, 47, 37, 38, 39]
starting episode 1, epsilon = 0.945
total reward = 26.0, actions list = [33, 43, 53, 52, 51, 50]
starting episode 2, epsilon = 0.94
total reward = 25.0, actions list = [11, 10, 20, 21, 31, 30]
starting episode 3, epsilon = 0.9349999999999999
total reward = 15.0, actions list = [34, 33, 23, 24, 25, 15]
starting episode 4, epsilon = 0.9299999999999999
total reward = 17.0, actions list = [13, 12, 2, 3, 4, 14]
starting episode 5, epsilon = 0.9249999999999999
total reward = 21.0, actions list = [36, 26, 25, 24, 23, 33]
starting episode 6, epsilon = 0.9199999999999999
total reward = 22.0, actions list = [1, 2, 3, 4, 14, 13]
starting episode 7, epsilon = 0.9149999999999999
total reward = 17.0, actions list = [34, 35, 45, 55, 54, 53]
starting episode 8, epsilon = 0.9099999999999999
total reward = 20.0, actions list = [46, 36, 35, 34, 44, 54]
starting episode 9, epsilon = 0.9049999999999999
total reward = 34.0, actions list = [16, 17,

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (12) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


val_loss =  263.6229553222656
mean_absolute_error =  13.628512
root_mean_squared_error =  14.266096
r2 =  -255302.87809638045


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  233.251708984375
mean_absolute_error =  12.269467
root_mean_squared_error =  13.785998
r2 =  -1431.7002907365643
training_epoch_mean loss =  25586.4765625


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  204.6767578125
mean_absolute_error =  11.040971
root_mean_squared_error =  12.187254
r2 =  -124.75984064187494
training_epoch_mean loss =  438845.03125


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  175.81968688964844
mean_absolute_error =  9.913454
root_mean_squared_error =  11.591068
r2 =  -60.217200294495825
training_epoch_mean loss =  341355.71875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  150.60348510742188
mean_absolute_error =  9.074605
root_mean_squared_error =  11.543114
r2 =  -32.30079709422269
training_epoch_mean loss =  458856.46875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  138.16021728515625
mean_absolute_error =  9.211816
root_mean_squared_error =  11.434214
r2 =  -12.842894511715802
training_epoch_mean loss =  139747.515625


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  137.75636291503906
mean_absolute_error =  9.729332
root_mean_squared_error =  10.838366
r2 =  -40.05316062403425
training_epoch_mean loss =  46784.01953125


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  142.3435821533203
mean_absolute_error =  10.143568
root_mean_squared_error =  11.003672
r2 =  -6.129290870773058
training_epoch_mean loss =  29120.873046875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  146.0303497314453
mean_absolute_error =  10.359103
root_mean_squared_error =  11.611755
r2 =  -7.284173469011771
training_epoch_mean loss =  13255.640625


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  144.5819854736328
mean_absolute_error =  10.286989
root_mean_squared_error =  11.002955
r2 =  -9.005134553360358
training_epoch_mean loss =  51516.5


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,█▆▄▂▁▁▂▃▃▃
r2,▁█████████
root_mean_squared_error,█▇▄▃▂▂▁▁▃▁
train_loss,▁█▆█▃▂▁▁▂
val_loss,█▆▅▃▂▁▁▁▁▁

0,1
mean_absolute_error,10.28699
r2,-9.00513
root_mean_squared_error,11.00296
train_loss,51516.5
val_loss,144.58199


finished training


  logger.warn(


total reward = 28.0, actions list = [35, 25, 26, 16, 15, 14]
starting episode 21, epsilon = 0.8449999999999999
total reward = 21.0, actions list = [16, 15, 14, 24, 34, 44]
starting episode 22, epsilon = 0.8399999999999999
total reward = 28.0, actions list = [17, 18, 8, 7, 6, 5]
starting episode 23, epsilon = 0.8349999999999999
total reward = 30.0, actions list = [5, 15, 16, 26, 36, 37]
starting episode 24, epsilon = 0.8299999999999998
total reward = 18.0, actions list = [34, 24, 25, 35, 45, 46]
starting episode 25, epsilon = 0.8249999999999998
total reward = 29.0, actions list = [42, 52, 53, 43, 33, 32]
starting episode 26, epsilon = 0.8199999999999998
total reward = 24.0, actions list = [33, 43, 42, 32, 31, 30]
starting episode 27, epsilon = 0.8149999999999998
total reward = 35.0, actions list = [9, 10, 20, 30, 29, 19]
starting episode 28, epsilon = 0.8099999999999998
total reward = 28.0, actions list = [4, 5, 15, 16, 6, 7]
starting episode 29, epsilon = 0.8049999999999998
total rewar

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (24) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


val_loss =  2110.0107421875
mean_absolute_error =  18.173725
root_mean_squared_error =  22.881384
r2 =  -39.04407336440411


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,18.17373
r2,-39.04407
root_mean_squared_error,22.88138
val_loss,2110.01074


finished training


  logger.warn(


total reward = 36.0, actions list = [59, 58, 48, 49, 39, 29]
starting episode 41, epsilon = 0.7449999999999998
total reward = 33.0, actions list = [53, 52, 51, 41, 42, 43]
starting episode 42, epsilon = 0.7399999999999998
total reward = 26.0, actions list = [50, 40, 41, 31, 32, 33]
starting episode 43, epsilon = 0.7349999999999998
total reward = 28.0, actions list = [37, 47, 57, 58, 48, 49]
starting episode 44, epsilon = 0.7299999999999998
total reward = 31.0, actions list = [0, 10, 20, 19, 29, 39]
starting episode 45, epsilon = 0.7249999999999998
total reward = 27.0, actions list = [39, 40, 41, 31, 21, 20]
starting episode 46, epsilon = 0.7199999999999998
total reward = 26.0, actions list = [9, 10, 20, 21, 22, 23]
starting episode 47, epsilon = 0.7149999999999997
total reward = 24.0, actions list = [36, 26, 25, 35, 45, 46]
starting episode 48, epsilon = 0.7099999999999997
total reward = 38.0, actions list = [1, 11, 10, 9, 19, 29]
starting episode 49, epsilon = 0.7049999999999997
total

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


val_loss =  1955.866455078125
mean_absolute_error =  16.99466
root_mean_squared_error =  21.827692
r2 =  -71.87649843745314


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,16.99466
r2,-71.8765
root_mean_squared_error,21.82769
val_loss,1955.86646


finished training


  logger.warn(


total reward = 34.0, actions list = [49, 39, 40, 30, 29, 19]
starting episode 61, epsilon = 0.6449999999999997
total reward = 36.0, actions list = [9, 19, 18, 28, 29, 39]
starting episode 62, epsilon = 0.6399999999999997
total reward = 37.0, actions list = [32, 42, 52, 51, 41, 40]
starting episode 63, epsilon = 0.6349999999999997
total reward = 36.0, actions list = [51, 52, 42, 32, 31, 30]
starting episode 64, epsilon = 0.6299999999999997
total reward = 30.0, actions list = [7, 8, 9, 10, 11, 21]
starting episode 65, epsilon = 0.6249999999999997
total reward = 30.0, actions list = [9, 8, 18, 19, 20, 30]
starting episode 66, epsilon = 0.6199999999999997
total reward = 37.0, actions list = [9, 19, 29, 28, 38, 48]
starting episode 67, epsilon = 0.6149999999999997
total reward = 37.0, actions list = [9, 19, 29, 30, 31, 41]
starting episode 68, epsilon = 0.6099999999999997
total reward = 32.0, actions list = [9, 10, 11, 1, 2, 3]
starting episode 69, epsilon = 0.6049999999999996
total reward 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


val_loss =  5912.0185546875
mean_absolute_error =  39.68194
root_mean_squared_error =  52.77365
r2 =  -83.20733489442608


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,39.68194
r2,-83.20733
root_mean_squared_error,52.77365
val_loss,5912.01855


finished training


  logger.warn(


total reward = 34.0, actions list = [50, 40, 41, 51, 52, 42]
starting episode 81, epsilon = 0.5449999999999996
total reward = 28.0, actions list = [36, 26, 16, 15, 25, 35]
starting episode 82, epsilon = 0.5399999999999996
total reward = 31.0, actions list = [9, 8, 7, 17, 18, 19]
starting episode 83, epsilon = 0.5349999999999996
total reward = 35.0, actions list = [17, 18, 28, 29, 19, 9]
starting episode 84, epsilon = 0.5299999999999996
total reward = 29.0, actions list = [9, 10, 0, 1, 2, 3]
starting episode 85, epsilon = 0.5249999999999996
total reward = 32.0, actions list = [53, 52, 42, 41, 40, 30]
starting episode 86, epsilon = 0.5199999999999996
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 87, epsilon = 0.5149999999999996
total reward = 36.0, actions list = [48, 58, 59, 49, 39, 29]
starting episode 88, epsilon = 0.5099999999999996
total reward = 39.0, actions list = [31, 41, 51, 52, 42, 32]
starting episode 89, epsilon = 0.5049999999999996
total rewar

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  2015.452392578125
mean_absolute_error =  21.43805
root_mean_squared_error =  26.613934
r2 =  -49.35293945700647


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,21.43805
r2,-49.35294
root_mean_squared_error,26.61393
val_loss,2015.45239


finished training


  logger.warn(


total reward = 39.0, actions list = [38, 39, 29, 19, 9, 10]
starting episode 101, epsilon = 0.4449999999999995
total reward = 35.0, actions list = [9, 19, 29, 30, 31, 21]
starting episode 102, epsilon = 0.4399999999999995
total reward = 20.0, actions list = [45, 46, 56, 55, 54, 53]
starting episode 103, epsilon = 0.4349999999999995
total reward = 30.0, actions list = [54, 53, 43, 42, 52, 51]
starting episode 104, epsilon = 0.4299999999999995
total reward = 32.0, actions list = [38, 39, 29, 28, 18, 19]
starting episode 105, epsilon = 0.4249999999999995
total reward = 24.0, actions list = [58, 57, 56, 46, 47, 48]
starting episode 106, epsilon = 0.4199999999999995
total reward = 36.0, actions list = [9, 19, 29, 39, 38, 37]
starting episode 107, epsilon = 0.4149999999999995
total reward = 33.0, actions list = [46, 36, 26, 16, 15, 5]
starting episode 108, epsilon = 0.4099999999999995
total reward = 33.0, actions list = [9, 10, 11, 21, 31, 41]
starting episode 109, epsilon = 0.40499999999999

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114472455559129, max=1.0…

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  142.77499389648438
mean_absolute_error =  10.454962
root_mean_squared_error =  11.321288
r2 =  -83.37193607601225


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,10.45496
r2,-83.37194
root_mean_squared_error,11.32129
val_loss,142.77499


finished training


  logger.warn(


total reward = 39.0, actions list = [9, 19, 29, 39, 49, 50]
starting episode 121, epsilon = 0.3449999999999994
total reward = 31.0, actions list = [9, 8, 18, 17, 16, 15]
starting episode 122, epsilon = 0.3399999999999994
total reward = 38.0, actions list = [9, 19, 29, 39, 40, 41]
starting episode 123, epsilon = 0.3349999999999994
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 124, epsilon = 0.3299999999999994
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 125, epsilon = 0.3249999999999994
total reward = 39.0, actions list = [9, 19, 29, 39, 49, 50]
starting episode 126, epsilon = 0.3199999999999994
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 127, epsilon = 0.3149999999999994
total reward = 25.0, actions list = [47, 46, 56, 57, 58, 59]
starting episode 128, epsilon = 0.3099999999999994
total reward = 37.0, actions list = [39, 40, 41, 51, 52, 42]
starting episode 129, epsilon = 0.3049999999999994


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  5180.12890625
mean_absolute_error =  37.339657
root_mean_squared_error =  41.547447
r2 =  -65.32116873097951


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,37.33966
r2,-65.32117
root_mean_squared_error,41.54745
val_loss,5180.12891


finished training


  logger.warn(


total reward = 38.0, actions list = [9, 19, 29, 39, 38, 48]
starting episode 141, epsilon = 0.24499999999999933
total reward = 36.0, actions list = [9, 8, 18, 28, 29, 19]
starting episode 142, epsilon = 0.23999999999999932
total reward = 27.0, actions list = [17, 18, 19, 20, 21, 31]
starting episode 143, epsilon = 0.23499999999999932
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 144, epsilon = 0.22999999999999932
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 145, epsilon = 0.2249999999999993
total reward = 34.0, actions list = [9, 19, 20, 21, 31, 41]
starting episode 146, epsilon = 0.2199999999999993
total reward = 39.0, actions list = [10, 9, 19, 29, 39, 38]
starting episode 147, epsilon = 0.2149999999999993
total reward = 32.0, actions list = [9, 8, 7, 6, 5, 15]
starting episode 148, epsilon = 0.2099999999999993
total reward = 33.0, actions list = [9, 10, 11, 21, 31, 41]
starting episode 149, epsilon = 0.2049999999999993
t

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  192.361328125
mean_absolute_error =  10.811497
root_mean_squared_error =  11.815353
r2 =  -11823.721473440726


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,10.8115
r2,-11823.72147
root_mean_squared_error,11.81535
val_loss,192.36133


finished training


  logger.warn(


total reward = 37.0, actions list = [9, 19, 29, 30, 31, 41]
starting episode 161, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 162, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 163, epsilon = 0.15
total reward = 34.0, actions list = [9, 19, 20, 21, 31, 41]
starting episode 164, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 165, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 166, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 167, epsilon = 0.15
total reward = 41.0, actions list = [9, 19, 29, 39, 49, 48]
starting episode 168, epsilon = 0.15
total reward = 22.0, actions list = [34, 35, 36, 26, 16, 15]
starting episode 169, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 170, epsilon = 0.15
total reward = 42.0, actions 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  1849.6005859375
mean_absolute_error =  19.740242
root_mean_squared_error =  23.974468
r2 =  -6645.423776121258


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,19.74024
r2,-6645.42378
root_mean_squared_error,23.97447
val_loss,1849.60059


finished training


  logger.warn(


total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 181, epsilon = 0.15
total reward = 18.0, actions list = [46, 56, 55, 54, 44, 43]
starting episode 182, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 183, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 184, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 185, epsilon = 0.15
total reward = 35.0, actions list = [9, 19, 29, 28, 18, 17]
starting episode 186, epsilon = 0.15
total reward = 37.0, actions list = [9, 8, 18, 19, 29, 39]
starting episode 187, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 188, epsilon = 0.15
total reward = 37.0, actions list = [33, 32, 42, 52, 51, 41]
starting episode 189, epsilon = 0.15
total reward = 42.0, actions list = [9, 19, 29, 39, 49, 59]
starting episode 190, epsilon = 0.15
total reward = 39.0, actions 

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /content/Model exists and is not empty.
INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


val_loss =  3827.97509765625
mean_absolute_error =  30.139393
root_mean_squared_error =  39.08167
r2 =  -1298.6326355847805


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,30.13939
r2,-1298.63264
root_mean_squared_error,39.08167
val_loss,3827.9751


finished training


In [18]:
from plotly.subplots import make_subplots

trace1 = go.Scatter(x=[i for i in range(len(accumulate_reward_means))], y=accumulate_reward_means, mode='lines', name="mean_rewards")


trace2 = go.Scatter(x=[i for i in range(len(accumulate_worm_len_means))], y=accumulate_worm_len_means, mode='lines', name="mean_lengths")
# Creazione della figura
fig = make_subplots(rows=1, cols=2)

# Aggiunta delle tracce alla figura
fig.add_trace(trace1, row=1, col=1)
fig.add_trace(trace2, row=1, col=2)

# Aggiunta del layout (opzionale)
fig.update_layout(title='training results', xaxis_title='X', yaxis_title='Y')

# Visualizzazione del grafico
fig.show()