In [1]:
%load_ext autoreload
%autoreload 2

import thread_the_needle as ttn


from vi_ppo.actor_critic import ActorCritic
from vi_ppo.nets.mlp import Mlp
from vi_ppo.nets.cnn import Cnn
from vi_ppo.modules import ThreadTheNeedleModule
import lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger


In [2]:
# Initialise the environment
env = ttn.make("thread_the_needle")

# make the actor critic model
d = env.observation_space.shape
n_a = env.action_space.n
hidden_dims = 16


print("Observation space: ", d) 
print("Action space: ", n_a)

Observation space:  (1, 64, 64)
Action space:  4


In [3]:
env.reset()

(array([[[ 0,  0,  1, ..., 24, 22, 19],
         [ 0,  0,  1, ..., 25, 22, 20],
         [ 0,  0,  1, ..., 26, 23, 20],
         ...,
         [ 4,  5,  6, ..., 16, 15, 14],
         [ 3,  4,  5, ..., 15, 14, 13],
         [ 3,  3,  4, ..., 13, 12, 11]]], shape=(1, 64, 64)),
 {})

In [4]:
feature_extractor_config = Cnn.config_cls(
    input_channels=1, 
    channels=[8,16,1], 
    kernel_sizes=[8,4,1], 
    strides=[2,2,1], 
    padding=[0,0,0],
    flatten_output=True,
    activation="silu",
    )
# feature_extractor_config = Cnn.config_cls(
#     input_channels=1, 
#     channels=[32,32,64,1], 
#     kernel_sizes=[8,4,3,1], 
#     strides=[4,2,1,1], 
#     padding=[1,1,1,1],
#     flatten_output=True,
#     activation="elu",
#     )
Cnn(feature_extractor_config).calculate_output_shape(input_shape=(1,64,64))

torch.Size([1, 169])

In [None]:

feature_extractor = Cnn(feature_extractor_config)

embedding_dims = feature_extractor.calculate_output_shape(input_shape=(1,64,64))[1]


actor_config = Mlp.config_cls(
    input_dims=embedding_dims, 
    output_dims=n_a, 
    hidden_dims=hidden_dims,
    n_layers=1, 
    activation="silu",
)
critic_config = Mlp.config_cls(
    input_dims=embedding_dims, 
    output_dims=1, 
    hidden_dims=hidden_dims, 
    n_layers=1, 
    activation="silu",
)
ac_config = ActorCritic.config_cls(
    clip_epsilon=0.2, 
    value_coeff=0.5, 
    entropy_coeff=0.01
)

model = ActorCritic(
    ac_config, 
    actor_net=Mlp(actor_config), 
    critic=Mlp(critic_config), 
    feature_extractor=feature_extractor
    )


# module

In [6]:

config = ThreadTheNeedleModule.config_class(lr=3e-4)
module = ThreadTheNeedleModule(actor_critic=model, env=env, config=config)

logger = TensorBoardLogger("../lightning_logs", name="thread_the_needle")
trainer = pl.Trainer(max_epochs=100, logger=logger)

trainer.fit(module)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type        | Params | Mode 
-----------------------------------------------------
0 | actor_critic | ActorCritic | 8.7 K  | train
-----------------------------------------------------
8.7 K     Trainable params
0         Non-trainable params
8.7 K     Total params
0.035     Total estimated model params size (MB)
24        Modules in train mode
0         Modules in eval mode
/Users/nicholasfranklin/miniconda3/envs/vi_ppo/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/nicholasfranklin/miniconda3/envs/vi_ppo/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (1

Training: |          | 0/? [00:00<?, ?it/s]

  return torch.tensor(obs, dtype=torch.float32).to(self.device)

Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined