# **SETUP**

In [1]:

!pip install torch-geometric lightning wandb gymnasium
!pip install -U plotly
!pip install -U scikit-learn
!pip install -U kaleido
!pip install "notebook>=5.3" "ipywidgets>=7.5"
!mkdir Data
!mkdir Plots

Collecting torch-geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning
  Downloading lightning-2.2.0.post0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb
  Downloading wandb-0.16.3-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  D

In [2]:
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data
import lightning as L
import torch
import torch.nn as nn
import wandb as wndb
from torch_geometric.nn import GATConv
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import gymnasium as gym
from gymnasium.envs.registration import register
import pdb
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# **MODEL AND RELATED STUFF**

In [3]:
class GreedyWorm(nn.Module):

    def __init__(self):
        super(GreedyWorm, self).__init__()

    def forward(self, data):
        graphs, positions = data
        return [graphs[i].x[positions[i]] for i in range(len(positions))]


In [4]:

class WormUpExamplesDataset(Dataset):

    def __init__(self, graphs: list[Data], actions: list[int], rewards: list[int]):
        self.data = list(zip(graphs, actions, rewards))

    def __getitem__(self, idx: int):
        return self.data[idx]

    def __len__(self):
        return len(self.data)

    def collate(self, data: list):
        graphs = []
        actions = []
        rewards = []
        for el in data:
            graphs.append(el[0])
            actions.append(el[1])
            rewards.append(el[2])
        return graphs, actions, torch.tensor(rewards, dtype=torch.float)

    def get_dataloader(self, batch_size: int, shuffle: bool = False):
        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, collate_fn=self.collate)




In [5]:
class GraphNN(nn.Module):

    def __init__(self, in_size, out_size, h_size, deep, activation, device="cpu"):
        super(GraphNN, self).__init__()
        self.activation = activation
        if deep == 1:
            self.layers = [GATConv(in_size, out_size)]  #.to(device)]
        else:
            self.layers = [GATConv(in_size, h_size)]  #.to(device)]
            for _ in range(deep - 2):
                self.layers.append(GATConv(h_size, h_size))  #.to(device))
            self.layers.append(GATConv(h_size, out_size))  #.to(device))

    def forward(self, data):
        edge_index = data[1]
        x = data[0]
        for layer in self.layers[:-1]:
            x = self.activation(layer(x, edge_index))
        #breakpoint()
        return self.layers[-1](x, edge_index)



In [6]:
class LinearNN(nn.Module):
    def __init__(self, in_size, out_size, h_size, deep, activation):
        super(LinearNN, self).__init__()
        if deep == 1:
            layers = [nn.Linear(in_size, out_size), activation]
        else:
            layers = [nn.Linear(in_size, h_size), activation]
            for _ in range(deep - 2):
                layers.append(nn.Linear(h_size, h_size))
                layers.append(activation)
            layers.append(nn.Linear(h_size, out_size))
        self.linear = nn.Sequential(*layers)

    def forward(self, data):
        return self.linear(data)

In [17]:

class IntelligentWorm(L.LightningModule):

    def __init__(self, linear: nn.Module, gnn: nn.Module, lr: float = 1e-3):
        super(IntelligentWorm, self).__init__()
        self.encoder = gnn
        self.decoder = linear
        self.loss = nn.MSELoss()
        self.validation_predictions = []
        self.validation_targets = []
        self.validation_loss = []
        self.train_loss = []
        self.best_val_loss = 100000000
        self.best_mae = 100000000
        self.best_rmse = 1000000000
        self.best_r2 = -10000000
        self.best_model = 0
        self.lr = lr

    def update_best_stats(self, val_loss, mae, rmse, r2):
        self.best_val_loss = val_loss
        self.best_mae = mae
        self.best_rmse = rmse
        self.best_r2 = r2

    def forward(self, data):
        actions = data[1]
        graphs = data[0]
        embeddings = []
        for i in range(len(graphs)):
            g = graphs[i]
            x = self.encoder((g.x, g.edge_index))
            embeddings.append(x[actions[i]])
        embeddings = torch.stack(embeddings)
        pred = self.decoder(embeddings)
        return pred.squeeze()

    def training_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        predictions = self.forward((graphs, actions))
        train_loss = self.loss(predictions, rewards)
        self.train_loss.append(train_loss)

        return train_loss

    def validation_step(self, batch, batch_idx):
        graphs, actions, rewards = batch
        #breakpoint()
        if rewards == []:
          print("ERROR!!!!!!")
        self.validation_targets.append(rewards)
        predictions = self.forward((graphs, actions))
        validation_loss = self.loss(predictions, rewards)
        self.validation_predictions.append(predictions)
        self.validation_loss.append(validation_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer



In [8]:
class WormCallback(L.Callback):

    def on_train_epoch_end(self, trainer, pl_module):

        epoch_mean = float(torch.stack(pl_module.train_loss).mean())
        print("training_epoch_mean loss = ", epoch_mean)
        # free up the memory
        pl_module.train_loss.clear()
        try:
            wndb.log({"train_loss": epoch_mean})
        except:
            pass

    def on_validation_epoch_end(self, trainer, pl_module: IntelligentWorm):
        #breakpoint()
        predictions = torch.cat(pl_module.validation_predictions, dim=0).squeeze()
        targets = torch.cat(pl_module.validation_targets, dim=0).squeeze()
        #
        r2 = r2_score(predictions, targets)
        mae = mean_absolute_error(predictions, targets)
        rmse = root_mean_squared_error(predictions, targets)
        mean_loss = float(torch.stack(pl_module.validation_loss).mean())

        pl_module.validation_loss.clear()
        pl_module.validation_predictions.clear()
        pl_module.validation_targets.clear()

        print("val_loss = ", mean_loss)
        print("mean_absolute_error = ", mae)
        print("root_mean_squared_error = ", rmse)
        print("r2 = ", r2)
        count = 0
        count += 1 if mean_loss < pl_module.best_val_loss else 0
        count += 1 if mae < pl_module.best_mae else 0
        count += 1 if rmse < pl_module.best_rmse else 0
        count += 1 if r2 > pl_module.best_r2 else 0
        if count >= 3 or count == 2 and mean_loss < pl_module.best_val_loss:
            pl_module.update_best_stats(mean_loss, mae, rmse, r2)
            pl_module.best_model -= 1
            pl_module.log("best_model", pl_module.best_model)
            gnn_state_dict = pl_module.encoder.state_dict()
            linear_state_dict = pl_module.decoder.state_dict()

            # Salva gli state dict su file
            torch.save(gnn_state_dict, "Model/gnn_checkpoint.pth")
            torch.save(linear_state_dict, "Model/linear_checkpoint.pth")
        else:
            pl_module.log("best_model", pl_module.best_model + 1)

        wndb.log({"val_loss": mean_loss, "mean_absolute_error": mae, "root_mean_squared_error": rmse, "r2": r2})




# **AGENT**

In [18]:
MODEL_VERSION = "model_V1_"
USE_SINGLE_WORM = False
USE_WORMHOLES = False

WORMHOLES = "wormholes_enabled_" if USE_WORMHOLES else "wormholes_disabled_"
WORMS = "single_worm_" if USE_SINGLE_WORM else "multi_worms_"



class WormsMasterAgent:

    def __init__(
            self,
            linear_part: nn.Module,
            gnn_part: nn.Module,
            initial_epsilon: float,
            epsilon_decay: float,
            final_epsilon: float,
            learning_rate: float = 1,
            discount_factor: float = 0.95,
            decay_after: int = 1,
            #trainer params
            batch_size: int = 8,
            episodes_for_batch: int = 20,
            trainer_deterministic: bool = True,
            trainer_max_epochs: int = 20,
            trainer_accelerator: str = "cpu"

    ):
        self.batch_size = batch_size
        self.episodes_for_batch = episodes_for_batch
        self.episode = 0
        self.linear = linear_part
        self.gnn = gnn_part
        self.learning_model = IntelligentWorm(linear_part, gnn_part)
        self.model = GreedyWorm()
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.decay_after = decay_after
        self.checkpoint_callback = ModelCheckpoint(dirpath="Model/", filename="worms_model", save_top_k=1,
                                                   mode='min', monitor='best_model')
        self.worm_callback = WormCallback()
        self.early_stopping_callback = EarlyStopping(monitor='best_model', mode='min', patience=3)
        self.trainer = L.Trainer(deterministic=trainer_deterministic,
                                 max_epochs=trainer_max_epochs, accelerator=trainer_accelerator,
                                 callbacks=[self.checkpoint_callback, self.worm_callback, self.early_stopping_callback])
        self.actual_rewards = np.array([])
        self.actual_observations = []
        self.actual_actions = []
        self.model_training_data = {
            "actions": [],
            "observations": [],
            "rewards": []
        }

    def get_action(self, observation, available_actions: list[int]) -> int:
        graph = observation["field"]
        worms = observation["worms"]

        self.actual_rewards = np.append(self.actual_rewards, 0)
        if len(available_actions) == 1:
          return available_actions[0]
        # with probability epsilon return a random action to explore the environment
        if np.random.random() < self.epsilon:
            i = np.random.randint(0, high=len(available_actions))
            return available_actions[i]

        # with probability (1 - epsilon) act greedily (exploit)
        else:
            self.model.eval()
            with torch.no_grad():
                idx = np.argmax(self.model(([graph] * len(available_actions), available_actions)))
                return available_actions[idx]

    def update(self, observation, action: int, reward: int, terminated: bool):
        #breakpoint()
        graph = observation["field"]
        worms = observation["worms"]
        self.actual_observations.append(graph)
        self.actual_actions.append(action)
        self.actual_rewards += self.lr * reward
        episode_reward = 0.0
        len_worms_placed = 0.0
        if terminated:
            episode_reward = float(self.actual_rewards[0])
            len_worms_placed = len(self.actual_actions)
            print(f"total reward = {episode_reward}, actions list = {self.actual_actions}")
            self.episode += 1
            self.model_training_data["actions"] += self.actual_actions.copy()
            self.actual_actions = []
            self.model_training_data["observations"] += self.actual_observations.copy()
            self.actual_observations = []
            self.model_training_data["rewards"] += self.actual_rewards.tolist()
            self.actual_rewards = np.array([])
            if self.episode % self.decay_after == 0:
                self.decay_epsilon()
            print(f"starting episode {self.episode}, epsilon = {self.epsilon}")
            if self.episode % self.episodes_for_batch == 0:
                self.train_model()
                self.model = self.learning_model
        return episode_reward, len_worms_placed

    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)

    def prepare_data(self):
        #breakpoint()
        print("preparing data to train")
        x = list(zip(self.model_training_data["observations"], self.model_training_data["actions"]))
        y = self.model_training_data["rewards"]
        x_train, x_val, train_rewards, val_rewards = train_test_split(x, y, test_size=0.2)
        del x, y
        train_graphs = []
        train_actions = []
        for el in x_train:
            train_graphs.append(el[0])
            train_actions.append(el[1])
        val_graphs = []
        val_actions = []
        for el in x_val:
            val_graphs.append(el[0])
            val_actions.append(el[1])
        train_dataset = WormUpExamplesDataset(train_graphs, train_actions, train_rewards)
        val_dataset = WormUpExamplesDataset(val_graphs, val_actions, val_rewards)
        train_dataloader = train_dataset.get_dataloader(self.batch_size, shuffle=True)
        val_dataloader = val_dataset.get_dataloader(self.batch_size, shuffle=False)
        return train_dataloader, val_dataloader

    def train_model(self):
        print("begin training")
        train_dataloader, val_dataloader = self.prepare_data()
        wndb.init(
            # set the wandb project where this run will be logged
            project="WormsWarmingUp",
            name= MODEL_VERSION + WORMHOLES + WORMS +  "training_ep_" + str(self.episode),
            # track hyperparameters and run metadata
            config={
                "learning_rate": self.learning_model.lr,
                "architecture": str(self.learning_model),
                "batch": self.episode // self.episodes_for_batch
            }
        )
        self.trainer.fit(self.learning_model, train_dataloader, val_dataloader)
        wndb.finish()
        gnn_state_dict = torch.load("Model/gnn_checkpoint.pth")
        linear_state_dict = torch.load("Model/linear_checkpoint.pth")
        self.linear.load_state_dict(linear_state_dict)
        self.gnn.load_state_dict(gnn_state_dict)
        self.learning_model = IntelligentWorm.load_from_checkpoint(checkpoint_path="Model/worms_model.ckpt", linear=self.linear, gnn=self.gnn)
        print("finished training")


# **INITIALIZE MODEL, AGENT AND ENVIRONMENT**

In [25]:

# device = "gpu" if
gnn_part = GraphNN(1,64,0,1,nn.ReLU())
linear_part = LinearNN(64,1,128,2, nn.ReLU())




In [26]:

agent = WormsMasterAgent(linear_part, gnn_part,0.95,0.005,0.1)


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [27]:
TABLE="00-example.txt"

register(
    id="worms_env",
    entry_point="worms_env:WormsEnv",
    max_episode_steps=300,
)
environment = gym.make('worms_env', env_file="Data/" + TABLE, render_mode="human", use_single_worm=USE_SINGLE_WORM, enable_wormholes=USE_WORMHOLES)




[33mWARN: Overriding environment worms_env already in registry.[0m



In [13]:
wndb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [28]:
!rm -r Model
!mkdir Model

In [15]:
def compute_mean_rewards_to_print(rewards):
  #for x in rewards:
    # if x < 0:
    #   return 0
  return sum(rewards) / len(rewards)


# **TRAINING**

In [29]:
MAX_EPISODES = 200

ACCUMULATE = 5
accumulate_reward_means = []
accumulate_worm_len_means = []
last_rewards = [0 for _ in range(ACCUMULATE)]
last_len_worms_placed = [0 for _ in range(ACCUMULATE)]
done = False
obs, info = environment.reset()
print(f"starting episode {agent.episode}, epsilon = {agent.epsilon}")
# play one episode
while agent.episode < MAX_EPISODES:

    action = agent.get_action(info, environment.available_movements)
    obs, reward, terminated, truncated, info = environment.step(action)
    episode_reward, len_worms_placed = agent.update(info, action, reward, terminated)

    if terminated:
        obs, info = environment.reset()
        last_rewards[agent.episode % ACCUMULATE] = episode_reward
        last_len_worms_placed[agent.episode % ACCUMULATE] = len_worms_placed
        if agent.episode > ACCUMULATE:

          mean_reward = compute_mean_rewards_to_print(last_rewards)
          mean_len_worms_placed = sum(last_len_worms_placed) / len(last_len_worms_placed)
          accumulate_reward_means.append(mean_reward)
          accumulate_worm_len_means.append(mean_len_worms_placed)




[33mWARN: The obs returned by the `reset()` method was expecting a numpy array, actual type: <class 'gymnasium.spaces.box.Box'>[0m


[33mWARN: Casting input x to numpy array.[0m


[33mWARN: The obs returned by the `reset()` method is not within the observation space.[0m



starting episode 0, epsilon = 0.95



[33mWARN: The obs returned by the `step()` method was expecting a numpy array, actual type: <class 'gymnasium.spaces.box.Box'>[0m


[33mWARN: The obs returned by the `step()` method is not within the observation space.[0m



total reward = 116.0, actions list = [2, 3, 4, 5, 6, 7, 47, 57, 58, 59, 49, 48, 38, 39, 29, 28, 27, 37, 19, 9, 8, 0, 1, 11]
starting episode 1, epsilon = 0.945
total reward = 111.0, actions list = [31, 32, 22, 23, 24, 14, 28, 18, 19, 9, 8, 7, 17, 50, 40, 30, 20, 10, 3, 2, 12, 6, 5, 15]
starting episode 2, epsilon = 0.94
total reward = 111.0, actions list = [15, 5, 4, 14, 24, 34, 37, 36, 46, 47, 57, 56, 55, 32, 31, 21, 22, 12, 29, 19, 9, 44, 43, 42]
starting episode 3, epsilon = 0.9349999999999999
total reward = 101.0, actions list = [39, 38, 28, 29, 19, 9, 35, 45, 44, 34, 33, 32, 42, 50, 51, 52, 53, 54, 55, 56, 57, 22, 23, 13]
starting episode 4, epsilon = 0.9299999999999999
total reward = 108.0, actions list = [17, 18, 8, 9, 19, 29, 26, 16, 15, 14, 24, 34, 33, 5, 4, 3, 13, 23, 2, 12, 22, 48, 49, 59]
starting episode 5, epsilon = 0.9249999999999999
total reward = -128.0, actions list = [23, 22, 12, 2, 1, 11, 28, 29, 39, 38, 37, 36, 46, 10, 0]
starting episode 6, epsilon = 0.91999999999

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]


The number of training batches (46) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



val_loss =  2846.92431640625
mean_absolute_error =  41.49134
root_mean_squared_error =  53.35658
r2 =  -198350.01911724702


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  6190.19580078125
mean_absolute_error =  62.348373
root_mean_squared_error =  78.11247
r2 =  -84.1594716330309
training_epoch_mean loss =  5741.07421875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  5851.158203125
mean_absolute_error =  57.43225
root_mean_squared_error =  76.26617
r2 =  -24.42462300255729
training_epoch_mean loss =  5382.31396484375


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  5323.349609375
mean_absolute_error =  53.901108
root_mean_squared_error =  72.52229
r2 =  -74.29461310579826
training_epoch_mean loss =  5172.1044921875


Validation: |          | 0/? [00:00<?, ?it/s]

val_loss =  5204.271484375
mean_absolute_error =  52.08854
root_mean_squared_error =  71.85873
r2 =  -49.43483209811927
training_epoch_mean loss =  5143.31884765625


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁█▆▅▅
r2,▁████
root_mean_squared_error,▁█▇▆▆
train_loss,█▄▁▁
val_loss,▁█▇▆▆

0,1
mean_absolute_error,52.08854
r2,-49.43483
root_mean_squared_error,71.85873
train_loss,5143.31885
val_loss,5204.27148


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 122.0, actions list = [43, 53, 52, 42, 32, 22, 3, 13, 12, 2, 1, 0, 10, 8, 9, 19, 18, 28, 26, 27, 17, 50, 51, 41]
starting episode 21, epsilon = 0.8449999999999999
total reward = -81.0, actions list = [50, 51, 52, 42, 43, 33, 58, 59, 49, 39, 29, 28, 38, 54, 55, 45, 35, 36, 19, 9, 8, 53]
starting episode 22, epsilon = 0.8399999999999999
total reward = 114.0, actions list = [31, 41, 51, 52, 42, 32, 48, 49, 59, 58, 57, 56, 46, 43, 33, 23, 24, 14, 9, 8, 18, 20, 21, 22]
starting episode 23, epsilon = 0.8349999999999999
total reward = -109.0, actions list = [48, 58, 57, 47, 37, 36, 46, 56, 55, 54, 53, 43, 42, 51, 41, 31, 21, 22, 52]
starting episode 24, epsilon = 0.8299999999999998
total reward = 114.0, actions list = [28, 29, 19, 18, 8, 7, 17, 16, 26, 27, 37, 47, 57, 48, 58, 59, 49, 39, 2, 12, 22, 4, 3, 13]
starting episode 25, epsilon = 0.8249999999999998
total reward = -156.0, actions list = [3, 4, 14, 13, 12, 2, 21, 20, 10, 0, 1, 11]
starting episode 26, epsilon = 0.8199999


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  8548.6865234375
mean_absolute_error =  74.985725
root_mean_squared_error =  92.45911
r2 =  -124.92848259936673


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,74.98573
r2,-124.92848
root_mean_squared_error,92.45911
val_loss,8548.68652


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 108.0, actions list = [3, 4, 5, 15, 16, 17, 44, 43, 33, 34, 35, 36, 26, 52, 53, 54, 55, 45, 51, 41, 31, 56, 46, 47]
starting episode 41, epsilon = 0.7449999999999998
total reward = 109.0, actions list = [9, 8, 7, 6, 16, 15, 20, 10, 11, 12, 22, 23, 33, 47, 46, 56, 55, 54, 49, 39, 29, 38, 48, 58]
starting episode 42, epsilon = 0.7399999999999998
total reward = 114.0, actions list = [5, 15, 14, 13, 23, 24, 52, 51, 41, 42, 32, 33, 43, 37, 38, 39, 29, 19, 35, 45, 55, 6, 16, 26]
starting episode 43, epsilon = 0.7349999999999998
total reward = 103.0, actions list = [9, 19, 18, 17, 27, 28, 35, 45, 44, 43, 53, 52, 51, 22, 23, 13, 3, 2, 4, 5, 15, 38, 39, 49]
starting episode 44, epsilon = 0.7299999999999998
total reward = 116.0, actions list = [22, 12, 2, 3, 4, 14, 9, 8, 18, 19, 29, 28, 27, 45, 35, 34, 33, 32, 52, 51, 50, 5, 15, 16]
starting episode 45, epsilon = 0.7249999999999998
total reward = 134.0, actions list = [37, 27, 28, 38, 39, 29, 19, 9, 8, 7, 6, 16, 26, 25, 15, 5, 4, 


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  11714.6171875
mean_absolute_error =  87.32051
root_mean_squared_error =  108.234085
r2 =  -115.10893342749638


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,87.32051
r2,-115.10893
root_mean_squared_error,108.23409
val_loss,11714.61719


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = -113.0, actions list = [9, 8, 18, 17, 16, 15, 19, 29, 28, 38, 39, 49, 59, 6, 7]
starting episode 61, epsilon = 0.6449999999999997
total reward = 109.0, actions list = [42, 52, 51, 50, 40, 30, 9, 19, 18, 28, 38, 39, 49, 23, 22, 12, 2, 3, 4, 14, 24, 6, 7, 17]
starting episode 62, epsilon = 0.6399999999999997
total reward = 111.0, actions list = [9, 19, 18, 17, 16, 26, 43, 44, 45, 35, 36, 46, 47, 6, 5, 4, 3, 2, 34, 33, 32, 42, 52, 51]
starting episode 63, epsilon = 0.6349999999999997
total reward = 128.0, actions list = [42, 43, 53, 52, 51, 41, 10, 0, 1, 11, 12, 22, 23, 39, 49, 59, 58, 48, 9, 8, 7, 19, 18, 17]
starting episode 64, epsilon = 0.6299999999999997
total reward = 127.0, actions list = [9, 19, 18, 28, 29, 39, 52, 42, 41, 51, 50, 40, 30, 6, 5, 15, 16, 26, 58, 48, 47, 44, 43, 33]
starting episode 65, epsilon = 0.6249999999999997
total reward = 129.0, actions list = [9, 19, 29, 28, 27, 26, 17, 7, 6, 5, 4, 3, 2, 40, 41, 31, 30, 20, 51, 52, 42, 16, 15, 14]
starting epi


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  6325.8017578125
mean_absolute_error =  58.946167
root_mean_squared_error =  79.53491
r2 =  -93.30464333353048


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,58.94617
r2,-93.30464
root_mean_squared_error,79.53491
val_loss,6325.80176


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = -122.0, actions list = [0, 10, 20, 30, 31, 41, 9, 8, 18, 28, 27, 17, 16, 39, 29, 19]
starting episode 81, epsilon = 0.5449999999999996
total reward = 123.0, actions list = [9, 19, 29, 39, 38, 37, 1, 0, 10, 11, 21, 22, 32, 51, 41, 31, 30, 20, 52, 42, 43, 3, 13, 12]
starting episode 82, epsilon = 0.5399999999999996
total reward = 85.0, actions list = [15, 5, 4, 3, 2, 12, 31, 21, 22, 23, 24, 34, 33, 28, 38, 48, 47, 46, 58, 57, 56, 27, 26, 36]
starting episode 83, epsilon = 0.5349999999999996
total reward = 116.0, actions list = [46, 56, 55, 54, 53, 52, 9, 19, 29, 39, 49, 59, 58, 35, 25, 15, 16, 6, 26, 27, 37, 20, 10, 0]
starting episode 84, epsilon = 0.5299999999999996
total reward = 122.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 43, 33, 32, 47, 37, 27, 26, 16, 6, 7, 8, 24, 25, 15]
starting episode 85, epsilon = 0.5249999999999996
total reward = 133.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 50, 40, 41, 42, 32, 6, 5, 15, 16, 26, 25, 24, 34, 47, 46, 36]


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  13190.3515625
mean_absolute_error =  97.76258
root_mean_squared_error =  114.84926
r2 =  -190.87261400574033


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,97.76258
r2,-190.87261
root_mean_squared_error,114.84926
val_loss,13190.35156


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 123.0, actions list = [19, 9, 8, 7, 6, 5, 38, 39, 49, 59, 58, 48, 47, 35, 34, 24, 14, 15, 52, 51, 41, 29, 28, 18]
starting episode 101, epsilon = 0.4449999999999995
total reward = 120.0, actions list = [44, 43, 33, 34, 35, 45, 9, 19, 29, 39, 49, 59, 58, 52, 42, 41, 51, 50, 11, 10, 0, 6, 5, 15]
starting episode 102, epsilon = 0.4399999999999995
total reward = 121.0, actions list = [51, 52, 53, 54, 55, 56, 9, 19, 29, 28, 27, 26, 16, 43, 44, 45, 46, 36, 50, 40, 41, 17, 18, 8]
starting episode 103, epsilon = 0.4349999999999995
total reward = 128.0, actions list = [9, 19, 29, 39, 38, 37, 58, 59, 49, 48, 47, 46, 56, 52, 51, 50, 40, 41, 6, 5, 15, 17, 16, 26]
starting episode 104, epsilon = 0.4299999999999995
total reward = 133.0, actions list = [9, 19, 29, 39, 38, 48, 52, 51, 41, 31, 30, 20, 10, 6, 7, 17, 16, 15, 5, 4, 3, 49, 59, 58]
starting episode 105, epsilon = 0.4249999999999995
total reward = 138.0, actions list = [9, 19, 29, 39, 49, 48, 52, 53, 43, 42, 41, 51, 50, 6, 5, 


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  7417.86669921875
mean_absolute_error =  75.465576
root_mean_squared_error =  86.12704
r2 =  -106.99189675793109


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,75.46558
r2,-106.9919
root_mean_squared_error,86.12704
val_loss,7417.8667


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 128.0, actions list = [8, 9, 19, 18, 17, 16, 57, 56, 46, 47, 37, 38, 39, 6, 5, 15, 25, 26, 48, 49, 59, 41, 51, 52]
starting episode 121, epsilon = 0.3449999999999994
total reward = 113.0, actions list = [9, 8, 18, 19, 29, 39, 52, 51, 41, 31, 21, 22, 32, 47, 37, 38, 28, 27, 45, 44, 43, 34, 35, 36]
starting episode 122, epsilon = 0.3399999999999994
total reward = 142.0, actions list = [9, 19, 29, 28, 27, 26, 5, 6, 16, 17, 7, 8, 18, 52, 51, 41, 42, 32, 59, 58, 48, 49, 39, 38]
starting episode 123, epsilon = 0.3349999999999994
total reward = 126.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 40, 30, 31, 32, 11, 12, 2, 1, 0, 6, 5, 15, 48, 38, 28]
starting episode 124, epsilon = 0.3299999999999994
total reward = 131.0, actions list = [9, 8, 18, 19, 29, 39, 49, 59, 58, 48, 47, 46, 56, 52, 51, 50, 40, 41, 6, 5, 15, 42, 32, 31]
starting episode 125, epsilon = 0.3249999999999994
total reward = 119.0, actions list = [26, 36, 37, 38, 28, 29, 19, 9, 8, 7, 6, 5, 4, 14, 24, 23, 


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  8026.4482421875
mean_absolute_error =  73.19032
root_mean_squared_error =  89.59045
r2 =  -91.28323626418708


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,73.19032
r2,-91.28324
root_mean_squared_error,89.59045
val_loss,8026.44824


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 129.0, actions list = [8, 9, 19, 29, 39, 38, 53, 52, 51, 50, 40, 41, 42, 43, 33, 32, 22, 12, 6, 5, 15, 44, 45, 35]
starting episode 141, epsilon = 0.24499999999999933
total reward = 132.0, actions list = [9, 19, 18, 17, 16, 6, 5, 4, 3, 2, 1, 0, 10, 11, 21, 31, 41, 51, 52, 42, 32, 39, 29, 28]
starting episode 142, epsilon = 0.23999999999999932
total reward = 129.0, actions list = [9, 19, 29, 28, 18, 17, 52, 42, 41, 51, 50, 40, 30, 27, 26, 16, 6, 5, 59, 49, 39, 56, 46, 45]
starting episode 143, epsilon = 0.23499999999999932
total reward = 142.0, actions list = [9, 19, 29, 39, 49, 59, 13, 12, 2, 3, 4, 5, 6, 52, 51, 41, 42, 32, 17, 27, 26, 16, 15, 25]
starting episode 144, epsilon = 0.22999999999999932
total reward = 131.0, actions list = [9, 19, 29, 39, 38, 48, 41, 51, 52, 53, 43, 42, 32, 6, 5, 15, 16, 26, 49, 59, 58, 56, 46, 45]
starting episode 145, epsilon = 0.2249999999999993
total reward = 134.0, actions list = [9, 19, 29, 39, 49, 48, 52, 51, 41, 42, 32, 22, 12, 6, 5, 


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  11553.9765625
mean_absolute_error =  87.68582
root_mean_squared_error =  107.48942
r2 =  -133.57982155700108


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,87.68582
r2,-133.57982
root_mean_squared_error,107.48942
val_loss,11553.97656


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 128.0, actions list = [9, 19, 29, 39, 49, 59, 52, 53, 43, 42, 41, 51, 50, 6, 5, 15, 16, 26, 14, 4, 3, 56, 46, 45]
starting episode 161, epsilon = 0.14499999999999924
total reward = -72.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 31, 21, 18, 17, 16, 6, 5, 56, 46, 45, 7, 8]
starting episode 162, epsilon = 0.13999999999999924
total reward = 136.0, actions list = [9, 19, 29, 39, 49, 59, 52, 42, 41, 51, 50, 40, 30, 6, 5, 15, 16, 26, 53, 43, 33, 17, 27, 37]
starting episode 163, epsilon = 0.13499999999999923
total reward = 134.0, actions list = [9, 8, 18, 19, 29, 39, 52, 51, 41, 31, 32, 22, 21, 6, 5, 4, 3, 2, 26, 16, 15, 42, 43, 53]
starting episode 164, epsilon = 0.12999999999999923
total reward = 141.0, actions list = [9, 19, 29, 39, 49, 59, 53, 52, 42, 43, 33, 32, 22, 6, 5, 4, 3, 2, 51, 41, 31, 15, 16, 26]
starting episode 165, epsilon = 0.12499999999999922
total reward = 125.0, actions list = [9, 19, 29, 39, 49, 59, 52, 42, 41, 51, 50, 40, 30, 6, 16, 15, 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113632655552565, max=1.0…


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  3309.328125
mean_absolute_error =  41.076973
root_mean_squared_error =  57.52676
r2 =  -49.41100215619215


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,41.07697
r2,-49.411
root_mean_squared_error,57.52676
val_loss,3309.32812


finished training



[33mWARN: env.available_movements to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.available_movements` for environment variables or `env.get_wrapper_attr('available_movements')` that will search the reminding wrappers.[0m



total reward = 139.0, actions list = [9, 19, 29, 39, 49, 59, 44, 43, 53, 52, 42, 41, 51, 32, 31, 21, 11, 10, 3, 4, 5, 6, 16, 15]
starting episode 181, epsilon = 0.1
total reward = 135.0, actions list = [9, 19, 29, 39, 49, 59, 52, 42, 32, 33, 43, 53, 54, 6, 5, 15, 16, 26, 51, 41, 31, 13, 3, 2]
starting episode 182, epsilon = 0.1
total reward = 132.0, actions list = [9, 19, 29, 39, 38, 48, 52, 51, 41, 42, 32, 31, 21, 6, 5, 15, 16, 26, 49, 59, 58, 56, 46, 45]
starting episode 183, epsilon = 0.1
total reward = 128.0, actions list = [9, 19, 29, 39, 49, 59, 13, 3, 4, 5, 6, 16, 15, 52, 51, 41, 42, 32, 56, 46, 45, 55, 54, 53]
starting episode 184, epsilon = 0.1
total reward = 147.0, actions list = [9, 19, 29, 28, 27, 26, 52, 51, 41, 42, 32, 22, 12, 6, 5, 4, 3, 2, 59, 49, 39, 16, 15, 25]
starting episode 185, epsilon = 0.1
total reward = 131.0, actions list = [9, 19, 29, 39, 49, 59, 52, 51, 41, 42, 32, 22, 21, 6, 5, 15, 25, 26, 16, 17, 7, 56, 46, 45]
starting episode 186, epsilon = 0.1
total re


Checkpoint directory /content/Model exists and is not empty.

INFO: 
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type     | Params
-------------------------------------
0 | encoder | GraphNN  | 0     
1 | decoder | LinearNN | 8.4 K 
2 | loss    | MSELoss  | 0     
-------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val_loss =  3385.259765625
mean_absolute_error =  44.44587
root_mean_squared_error =  58.182983
r2 =  -40.733637339631024


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_absolute_error,▁
r2,▁
root_mean_squared_error,▁
val_loss,▁

0,1
mean_absolute_error,44.44587
r2,-40.73364
root_mean_squared_error,58.18298
val_loss,3385.25977


finished training


# **PLOT RESULTS OF TRAINING**

In [30]:


trace1 = go.Scatter(x=[i for i in range(len(accumulate_reward_means))], y=accumulate_reward_means, mode='lines', name="mean_rewards")


trace2 = go.Scatter(x=[i for i in range(len(accumulate_worm_len_means))], y=accumulate_worm_len_means, mode='lines', name="mean_lengths")

fig1 = go.Figure(trace1)
fig2 = go.Figure(trace2)






# Aggiunta del layout (opzionale)
fig1.update_layout(title='reward results', xaxis_title='X', yaxis_title='Y')



# Aggiunta del layout (opzionale)
fig2.update_layout(title='episode lengths results', xaxis_title='X', yaxis_title='Y')

fig1.write_image("Plots/rewards_" + MODEL_VERSION + WORMHOLES + WORMS[:-1] + ".png")
fig2.write_image("Plots/ep_lengths_" + MODEL_VERSION + WORMHOLES + WORMS[:-1] + ".png")

# Visualizzazione del grafico
fig1.show()

fig2.show()