This is an exercise to get familiar with the library `ReinforcementLearning.jl`

## 1. A gentle example of using ReinforcementLearning.jl

In [1]:
import Pkg;
# uncomment the following if you have not installed them
# Pkg.add("ReinforcementLearning");
# Pkg.add("Flux");
# Pkg.add("StableRNGs");
# Pkg.add("Distributions");
using Flux: InvDecay;
using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Distributions;

In [2]:
env = RandomWalk1D()

# RandomWalk1D

## Traits

| Trait Type        |                Value |
|:----------------- | --------------------:|
| NumAgentStyle     |        SingleAgent() |
| DynamicStyle      |         Sequential() |
| InformationStyle  | PerfectInformation() |
| ChanceStyle       |      Deterministic() |
| RewardStyle       |     TerminalReward() |
| UtilityStyle      |         GeneralSum() |
| ActionStyle       |   MinimalActionSet() |
| StateStyle        | Observation{Int64}() |
| DefaultStateStyle | Observation{Int64}() |

## Is Environment Terminated?

No

## State Space

`Base.OneTo(7)`

## Action Space

`Base.OneTo(2)`

## Current State

```
4
```


### random policy

In [3]:
A = action_space(env)
while true
    env(rand(A))
    is_terminated(env) && break
end

In [4]:
run(
    RandomPolicy(),
    RandomWalk1D(),
    StopAfterEpisode(10),
    TotalRewardPerEpisode())

            ⠀⠀⠀⠀⠀⠀⠀⠀⠀[97;1mTotal reward per episode[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀ 
            [38;5;8m┌────────────────────────────────────────┐[0m 
          [38;5;8m1[0m [38;5;8m│[0m⠀⠀⠀⠀[38;5;2m⣷[0m⠀⠀⠀⠀⠀⠀⠀⠀[38;5;2m⣇[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;2m⣸[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⢠[0m[38;5;2m⢻[0m⠀⠀⠀⠀⠀⠀⠀[38;5;2m⢰[0m[38;5;2m⢹[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;2m⡏[0m[38;5;2m⡆[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⢸[0m⠀[38;5;2m⡇[0m⠀⠀⠀⠀⠀⠀[38;5;2m⢸[0m[38;5;2m⠈[0m[38;5;2m⡆[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;2m⢰[0m[38;5;2m⠁[0m[38;5;2m⡇[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⡎[0m⠀[38;5;2m⢇[0m⠀⠀⠀⠀⠀⠀[38;5;2m⡇[0m⠀[38;5;2m⡇[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;2m⢸[0m⠀[38;5;2m⢸[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⡇[0m⠀[38;5;2m⢸[0m⠀⠀⠀⠀⠀[38;5;2m⢀[0m[38;5;2m⠇[0m⠀[38;5;2m⢸[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38

TotalRewardPerEpisode([-1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, -1.0, -1.0, -1.0], 0.0, true)

### tabular policy

In [5]:
# create a tabular policy
S = state_space(env);
A = action_space(env);
NS, NA = length(S),A;
tabular_policy = TabularPolicy(;table=Dict(zip(1:NS, fill(2,NS))));

In [6]:
run(
   tabular_policy,
   RandomWalk1D(),
   StopAfterEpisode(10),
   TotalRewardPerEpisode()
)

           ⠀⠀⠀⠀⠀⠀⠀⠀⠀[97;1mTotal reward per episode[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀ 
           [38;5;8m┌────────────────────────────────────────┐[0m 
         [38;5;8m2[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
          [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
   Score  [38;5;8m[0m [38;5;8m│[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m

TotalRewardPerEpisode([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.0, true)

### `QBasedPolicy`

In [8]:
# `MonteCarloLearner + EpsilonGreedyExplorer`
policy = QBasedPolicy(
   learner = MonteCarloLearner(;
           approximator=TabularQApproximator(
               ;n_state = NS,
               n_action = NA,
               opt = InvDecay(1.0)
           )
       ),
   explorer = EpsilonGreedyExplorer(0.1)
)

typename(QBasedPolicy)
├─ learner => typename(MonteCarloLearner)
│  ├─ approximator => typename(TabularApproximator)
│  │  ├─ table => 2×7 Matrix{Float64}
│  │  └─ optimizer => typename(InvDecay)
│  │     ├─ gamma => 1.0
│  │     └─ state => typename(IdDict)
│  ├─ γ => 1.0
│  ├─ kind => typename(ReinforcementLearningZoo.FirstVisit)
│  └─ sampling => typename(ReinforcementLearningZoo.NoSampling)
└─ explorer => typename(EpsilonGreedyExplorer)
   ├─ ϵ_stable => 0.1
   ├─ ϵ_init => 1.0
   ├─ warmup_steps => 0
   ├─ decay_steps => 0
   ├─ step => 1
   ├─ rng => typename(Random._GLOBAL_RNG)
   └─ is_training => true


In [9]:
run(
   policy,
   RandomWalk1D(),
   StopAfterEpisode(10),
   TotalRewardPerEpisode()
)

            ⠀⠀⠀⠀⠀⠀⠀⠀⠀[97;1mTotal reward per episode[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀ 
            [38;5;8m┌────────────────────────────────────────┐[0m 
          [38;5;8m0[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
   Score   [38;5;8m[0m [38;5;8m│[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0m[38;5;2m⠤[0

TotalRewardPerEpisode([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], 0.0, true)

### wrap the policy + trajectory into the 'agent'

In [10]:
agent = Agent(policy=policy, trajectory=VectorSARTTrajectory())

typename(Agent)
├─ policy => typename(QBasedPolicy)
│  ├─ learner => typename(MonteCarloLearner)
│  │  ├─ approximator => typename(TabularApproximator)
│  │  │  ├─ table => 2×7 Matrix{Float64}
│  │  │  └─ optimizer => typename(InvDecay)
│  │  │     ├─ gamma => 1.0
│  │  │     └─ state => typename(IdDict)
│  │  ├─ γ => 1.0
│  │  ├─ kind => typename(ReinforcementLearningZoo.FirstVisit)
│  │  └─ sampling => typename(ReinforcementLearningZoo.NoSampling)
│  └─ explorer => typename(EpsilonGreedyExplorer)
│     ├─ ϵ_stable => 0.1
│     ├─ ϵ_init => 1.0
│     ├─ warmup_steps => 0
│     ├─ decay_steps => 0
│     ├─ step => 31
│     ├─ rng => typename(Random._GLOBAL_RNG)
│     └─ is_training => true
└─ trajectory => typename(Trajectory)
   └─ traces => typename(NamedTuple)
      ├─ state => 0-element Vector{Int64}
      ├─ action => 0-element Vector{Int64}
      ├─ reward => 0-element Vector{Float32}
      └─ terminal => 0-element Vector{Bool}


In [11]:
run(agent, env, StopAfterEpisode(10), TotalRewardPerEpisode())

            ⠀⠀⠀⠀⠀⠀⠀⠀⠀[97;1mTotal reward per episode[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀ 
            [38;5;8m┌────────────────────────────────────────┐[0m 
          [38;5;8m1[0m [38;5;8m│[0m⠀⠀⠀⠀[38;5;2m⡏[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;2m⠉[0m[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⢠[0m[38;5;2m⠃[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀[38;5;2m⢸[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
           [38;5;8m[0m [38;5;8

TotalRewardPerEpisode([-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.0, true)

## 2. PPO algorithm for pendulum problem (built-in experiment)

In [12]:

function RL.Experiment(
    ::Val{:JuliaRL},
    ::Val{:PPO},
    ::Val{:Pendulum},
    ::Nothing;
    save_dir = nothing,
    seed = 123,
)
    rng = StableRNG(seed)
    inner_env = PendulumEnv(T = Float32, rng = rng)
    A = action_space(inner_env)
    low = A.left
    high = A.right
    ns = length(state(inner_env))

    N_ENV = 8
    UPDATE_FREQ = 2048
    env = MultiThreadEnv([
        PendulumEnv(T = Float32, rng = StableRNG(hash(seed + i))) |>
        env -> ActionTransformedEnv(env, action_mapping = x -> clamp(x * 2, low, high)) for i in 1:N_ENV
    ])

    init = glorot_uniform(rng)

    agent = Agent(
        policy = PPOPolicy(
            approximator = ActorCritic(
                actor = GaussianNetwork(
                    pre = Chain(
                        Dense(ns, 64, relu; init = glorot_uniform(rng)),
                        Dense(64, 64, relu; init = glorot_uniform(rng)),
                    ),
                    μ = Chain(Dense(64, 1, tanh; init = glorot_uniform(rng)), vec),
                    logσ = Chain(Dense(64, 1; init = glorot_uniform(rng)), vec),
                ),
                critic = Chain(
                    Dense(ns, 64, relu; init = glorot_uniform(rng)),
                    Dense(64, 64, relu; init = glorot_uniform(rng)),
                    Dense(64, 1; init = glorot_uniform(rng)),
                ),
                optimizer = ADAM(3e-4),
            ) |> gpu,
            γ = 0.99f0,
            λ = 0.95f0,
            clip_range = 0.2f0,
            max_grad_norm = 0.5f0,
            n_epochs = 10,
            n_microbatches = 32,
            actor_loss_weight = 1.0f0,
            critic_loss_weight = 0.5f0,
            entropy_loss_weight = 0.00f0,
            dist = Normal,
            rng = rng,
            update_freq = UPDATE_FREQ,
        ),
        trajectory = PPOTrajectory(;
            capacity = UPDATE_FREQ,
            state = Matrix{Float32} => (ns, N_ENV),
            action = Vector{Float32} => (N_ENV,),
            action_log_prob = Vector{Float32} => (N_ENV,),
            reward = Vector{Float32} => (N_ENV,),
            terminal = Vector{Bool} => (N_ENV,),
        ),
    )

    stop_condition = StopAfterStep(50_000, is_show_progress=!haskey(ENV, "CI"))
    hook = TotalBatchRewardPerEpisode(N_ENV)
    Experiment(agent, env, stop_condition, hook, "# Play Pendulum with PPO")
end

[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Manifest.toml`


In [13]:
endPkg.add("Plots")
using Plots
using Statistics
ex = E`JuliaRL_PPO_Pendulum`
run(ex)
# n = minimum(map(length, ex.hook.rewards))
# m = mean([@view(x[1:n]) for x in ex.hook.rewards])
# s = std([@view(x[1:n]) for x in ex.hook.rewards])
# plot(m,ribbon=s)

[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.7/Manifest.toml`
┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /home/richard/.julia/packages/Flux/7nTyc/src/functor.jl:187


# Play Pendulum with PPO


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:50[39m9:52[39m


               ⠀⠀⠀⠀⠀⠀⠀[97;1mAvg total reward per episode[0m⠀⠀⠀⠀⠀⠀⠀ 
               [38;5;8m┌────────────────────────────────────────┐[0m 
             [38;5;8m0[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;1m⢠[0m[38;5;1m⣾[0m[38;5;1m⣷[0m[38;5;1m⡿[0m[38;5;1m⣶[0m[38;5;1m⣿[0m[38;5;1m⣿[0m[38;5;1m⣴[0m[38;5;1m⣷[0m[38;5;1m⠃[0m⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
              [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;1m⢠[0m[38;5;1m⣴[0m[38;5;1m⣸[0m[38;5;1m⠟[0m[38;5;3m⣯[0m[38;5;7m⣧[0m[38;5;6m⣿[0m[38;5;3m⣿[0m[38;5;6m⣶[0m[38;5;7m⣾[0m[38;5;3m⣸[0m[38;5;2m⡆[0m⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
              [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;1m⣼[0m[38;5;1m⣿[0m[38;5;3m⣿[0m[38;5;2m⣿[0m[38;5;6m⣿[0m[38;5;6m⣧[0m[38;5;6m⣿[0m[38;5;6m⣿[0m[38;5;6m⣿[0m[38;5;6m⣿[0m[38;5;6m⢻[0m[38;5;4m⡆[0m⠀⠀⠀⠀⠀⠀[38;5;8m│[0m [38;5;8m[0m
              [38;5;8m[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;1m⢀[0m[38;5;1

# Play Pendulum with PPO


typename(Experiment)
├─ policy => typename(Agent)
│  ├─ policy => typename(PPOPolicy)
│  │  ├─ approximator => typename(ActorCritic)
│  │  │  ├─ actor => typename(GaussianNetwork)
│  │  │  │  ├─ pre => typename(Chain)
│  │  │  │  │  └─ layers
│  │  │  │  │     ├─ 1
│  │  │  │  │     │  └─ typename(Dense)
│  │  │  │  │     │     ├─ weight => 64×3 Matrix{Float32}
│  │  │  │  │     │     ├─ bias => 64-element Vector{Float32}
│  │  │  │  │     │     └─ σ => typename(typeof(relu))
│  │  │  │  │     └─ 2
│  │  │  │  │        └─ typename(Dense)
│  │  │  │  │           ├─ weight => 64×64 Matrix{Float32}
│  │  │  │  │           ├─ bias => 64-element Vector{Float32}
│  │  │  │  │           └─ σ => typename(typeof(relu))
│  │  │  │  ├─ μ => typename(Chain)
│  │  │  │  │  └─ layers
│  │  │  │  │     ├─ 1
│  │  │  │  │     │  └─ typename(Dense)
│  │  │  │  │     │     ├─ weight => 1×64 Matrix{Float32}
│  │  │  │  │     │     ├─ bias => 1-element Vector{Float32}
│  │  │  │  │     │     └─ σ => typen