### Notatnik do weryfikacji liczby parametrów danych architektur

In [45]:
BASE_PATH=r'C:\initial_model\historic_synthetic_dnnevo\RANDOM\FINAL_MODELS'

In [6]:
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.env_util import make_vec_env
import torch as th
import torch.nn as nn



def params_analysis(model):
    def count_params(module):
        return sum(p.numel() for p in module.parameters())

    policy = model.policy
    print("=== TOTAL POLICY PARAMS ===")
    total_policy_params = count_params(policy)
    print(f"Total policy params: {total_policy_params}\n")

    # Keep track of which parameter names we've already counted
    counted_names = set()


    def add_group(name, module):
        """Print param count for a module and mark its params as counted."""
        if module is None:
            print(f"{name}: <None>")
            return
        n = count_params(module)
        print(f"{name:20s}: {n:8d} params")
        for pname, _ in module.named_parameters():
            counted_names.add(pname)


    print("=== MAIN GROUPS ===")
    # 1) Feature extractor
    add_group("features_extractor", policy.features_extractor)

    # 2) LSTMs (actor / critic / shared)
    if hasattr(policy, "lstm_actor") and policy.lstm_actor is not None:
        add_group("lstm_actor", policy.lstm_actor)
    if hasattr(policy, "lstm_critic") and policy.lstm_critic is not None:
        add_group("lstm_critic", policy.lstm_critic)
    if hasattr(policy, "lstm") and policy.lstm is not None:
        add_group("lstm_shared", policy.lstm)

    # 3) MLP extractor (policy + value nets inside)
    add_group("mlp_extractor", policy.mlp_extractor)

    # 4) Heads: action + value
    add_group("action_net", policy.action_net)
    add_group("value_net", policy.value_net)

    # 5) Distribution-specific params (e.g., log_std for continuous actions)
    if hasattr(policy.action_dist, "log_std") and isinstance(
        getattr(policy.action_dist, "log_std", None), th.nn.Parameter
    ):
        print("=== ACTION DIST PARAMS ===")
        print(f"log_std: {policy.action_dist.log_std.numel():8d} params")
        counted_names.add("action_dist.log_std")


    print("\n=== BREAKDOWN INSIDE MLP EXTRACTOR ===")
    # Optional: split MLP extractor into policy and value parts
    if hasattr(policy.mlp_extractor, "policy_net"):
        add_group("mlp_policy_net", policy.mlp_extractor.policy_net)
    if hasattr(policy.mlp_extractor, "value_net"):
        add_group("mlp_value_net", policy.mlp_extractor.value_net)


    # ---------------------------------------------------------------------
    # NEW: print feature extractor configuration
    # ---------------------------------------------------------------------
    fe = policy.features_extractor

    print("\n=== FEATURE EXTRACTOR CONFIG ===")
    print(f"Class: {fe.__class__.__name__}")
    print("\nPyTorch module repr:\n")
    print(fe)  # this prints the full nn.Module structure

    print("\nNon-module attributes (likely hyperparameters):")
    for attr, val in fe.__dict__.items():
        # skip internal / framework stuff
        if attr.startswith("_") or attr in ("training",):
            continue
        # skip submodules and parameters to avoid huge prints
        if isinstance(val, (th.nn.Module, th.nn.Parameter)):
            continue
        print(f"  {attr}: {val!r}")


    print("\n=== UNACCOUNTED POLICY PARAMS (by name) ===")
    # Find any params we didn't assign to a group
    unaccounted = []
    for pname, p in policy.named_parameters():
        if pname not in counted_names:
            unaccounted.append((pname, p.numel()))

    if not unaccounted:
        print("All parameters accounted for in the groups above.")
    else:
        for pname, numel in unaccounted:
            print(f"{pname:40s}: {numel:8d} params")

    print("\n=== CHECK SUM ===")
    group_sum = 0
    for pname, p in policy.named_parameters():
        if pname in counted_names:
            group_sum += p.numel()
    group_sum += sum(numel for _, numel in unaccounted)
    print(f"Sum of groups + leftovers: {group_sum}")
    print(f"Total policy params       : {total_policy_params}")


    # Raw policy_kwargs as stored by the algorithm
    print("policy_kwargs from model:")
    print(model.policy_kwargs)



#### 
Model 1 - model z wszystkimi metrykami wejściowymi

In [None]:
MODEL_PATH =r'.\FINAL_MODELS\ATTENTION\attention_MlpLstmPolicy_mlplstm_attention_contrib_metric.zip'

In [8]:
from sb3_contrib import RecurrentPPO
model = RecurrentPPO.load(MODEL_PATH)

In [9]:
params_analysis(model)

=== TOTAL POLICY PARAMS ===
Total policy params: 15159

=== MAIN GROUPS ===
features_extractor  :      175 params
lstm_actor          :     5248 params
lstm_critic         :     5248 params
mlp_extractor       :     4224 params
action_net          :      231 params
value_net           :       33 params

=== BREAKDOWN INSIDE MLP EXTRACTOR ===
mlp_policy_net      :     2112 params
mlp_value_net       :     2112 params

=== FEATURE EXTRACTOR CONFIG ===
Class: AdaptiveAttentionFeatureExtractor

PyTorch module repr:

AdaptiveAttentionFeatureExtractor(
  (ln_e): LayerNorm((7,), eps=1e-05, elementwise_affine=True)
  (Wq_c): Linear(in_features=7, out_features=4, bias=False)
  (Wk_c): Linear(in_features=7, out_features=4, bias=False)
  (out_act): Tanh()
)

Non-module attributes (likely hyperparameters):
  n_metrics: 7
  d_embed: 7
  d_k: 4
  n_heads: 1
  head_agg: 'mean'
  final_out_dim: 7
  mode: 'generalized'
  attn_norm: 'row_softmax'
  attn_temp: 1.2
  qk_mode: 'hybrid'
  use_posenc: True
 

Model 2 - model z 3-ema metrykami wejściowymi

In [None]:
MODEL_PATH_2=r'c:\initial_model\recurrentppo\MlpLstmPolicy\recurrentppo_MlpLstmPolicy_mlplstm.zip'

In [43]:
from sb3_contrib import RecurrentPPO
model = RecurrentPPO.load(MODEL_PATH_2)

In [44]:
params_analysis(model)

=== TOTAL POLICY PARAMS ===
Total policy params: 38088

=== MAIN GROUPS ===
features_extractor  :        0 params
lstm_actor          :    17664 params
lstm_critic         :    17664 params
mlp_extractor       :     2624 params
action_net          :      119 params
value_net           :       17 params

=== BREAKDOWN INSIDE MLP EXTRACTOR ===
mlp_policy_net      :     1312 params
mlp_value_net       :     1312 params

=== FEATURE EXTRACTOR CONFIG ===
Class: FlattenExtractor

PyTorch module repr:

FlattenExtractor(
  (flatten): Flatten(start_dim=1, end_dim=-1)
)

Non-module attributes (likely hyperparameters):

=== UNACCOUNTED POLICY PARAMS (by name) ===
mlp_extractor.policy_net.0.weight       :     1024 params
mlp_extractor.policy_net.0.bias         :       16 params
mlp_extractor.policy_net.2.weight       :      256 params
mlp_extractor.policy_net.2.bias         :       16 params
mlp_extractor.value_net.0.weight        :     1024 params
mlp_extractor.value_net.0.bias          :       1

### Liczba parametrów architektury Attention

In [None]:
MODEL_PATH_ATT=rf'{BASE_PATH}\ATTENTION\attention_MlpLstmPolicy_mlplstm_att_tunedtolstm.zip'

In [48]:
from sb3_contrib import RecurrentPPO
model = RecurrentPPO.load(MODEL_PATH_ATT)


In [49]:
params_analysis(model)

=== TOTAL POLICY PARAMS ===
Total policy params: 40311

=== MAIN GROUPS ===
features_extractor  :      175 params
lstm_actor          :    18688 params
lstm_critic         :    18688 params
mlp_extractor       :     2624 params
action_net          :      119 params
value_net           :       17 params

=== BREAKDOWN INSIDE MLP EXTRACTOR ===
mlp_policy_net      :     1312 params
mlp_value_net       :     1312 params

=== FEATURE EXTRACTOR CONFIG ===
Class: AdaptiveAttentionFeatureExtractor

PyTorch module repr:

AdaptiveAttentionFeatureExtractor(
  (ln_e): LayerNorm((7,), eps=1e-05, elementwise_affine=True)
  (Wq_c): Linear(in_features=7, out_features=4, bias=False)
  (Wk_c): Linear(in_features=7, out_features=4, bias=False)
  (out_act): Tanh()
)

Non-module attributes (likely hyperparameters):
  n_metrics: 7
  d_embed: 7
  d_k: 4
  n_heads: 1
  head_agg: 'mean'
  final_out_dim: 7
  mode: 'diagonal'
  attn_norm: 'diag_softmax'
  attn_temp: 1.2
  qk_mode: 'hybrid'
  use_posenc: True
  u