In [None]:
from stable_baselines3 import PPO
import sys
import os

sys.path.append(os.getcwd()+'../src/')
%load_ext autoreload
%autoreload 2
from sparse_autoencoder import AdaptiveSparseAE_FeatureExtractor, AdaptiveAETrainer, pretrain_ae, train_sparse_ae_model
from run import build_env

path = 'models/sparse_autoencoder_MlpPolicy_model.zip'


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
env = build_env({}, {})
fe = AdaptiveSparseAE_FeatureExtractor(env.observation_space)
ae = fe.autoencoder

In [None]:
# 2. Rebuild pretrained autoencoder
# (Use same architecture as during training)
pretrained_ae = Autoencoder(
    input_dim=env.observation_space.shape[0],
    hidden_dims=[128, 64],
    max_latent_dim=8
)
pretrained_ae.load_state_dict(torch.load("pretrained_ae_weights.pth"))

# 3. Prepare policy_kwargs with reconstructed autoencoder
policy_kwargs = {
    "features_extractor_class": AdaptiveSparseAE_FeatureExtractor,
    "features_extractor_kwargs": {
        "max_latent_dim": 8,
        "hidden_dims": [128, 64],
        "sparsity_coeff": 0.1,
        "threshold": 0.2,
        "pretrained_ae": pretrained_ae
    },
    "net_arch": [dict(pi=[64, 64], vf=[64, 64])]
}

# 4. Load model with custom objects
model = PPO.load(
    "ppo_custom_model.zip",
    env=env,
    custom_objects={'policy_kwargs': policy_kwargs}
)

In [5]:
import pandas as pd
import pandas as pd
import plotly.graph_objects as go
import plotly.subplots as sp
import os

# Read CSV files (replace filenames with your actual paths)
base_path = r'C:\Users\ultramarine\Desktop\ppo_magisterka\trainloop_driver_official\trainloop-driver'
df1 = pd.read_csv(os.path.join(base_path, 'evaluation_results_Attention.csv'))
df2 = pd.read_csv(os.path.join(base_path, 'evaluation_results_PCA.csv'))
df3 = pd.read_csv(os.path.join(base_path, 'evaluation_results_Sparse_autoencoder.csv'))
df4 = pd.read_csv(os.path.join(base_path, 'evaluation_results_CnnPolicy.csv'))
df5 = pd.read_csv(os.path.join(base_path, 'evaluation_results_Sparse_autoencoder_temporal_spatial.csv'))
df6 = pd.read_csv(os.path.join(base_path, 'evaluation_results_PPO.csv'))

# Create subplots for rewardsss and episode lengths
fig = sp.make_subplots(
    rows=2, cols=1,
    subplot_titles=('Rewards per Episode', 'Episode Lengths'),
    vertical_spacing=0.1
)
colors = ['blue', 'red', 'green', 'black', 'orange', 'yellow']
names = ['Attention', 'PCA', 'Sparse_autoencoder', 'CnnPolicy', 'Sparse_autoencoder_temporal_spatial', 'PPO']
for i, df in enumerate([df1, df2, df3, df4, df5, df6]):
    # Add rewards traces
    fig.add_trace(
        go.Scatter(x=df.index, y=df['rewards'], name=f'{names[i]}: Rewards', line=dict(color=colors[i])),
        row=1, col=1
    )

for j, df in enumerate([df1, df2, df3, df4, df5, df6]):

    # Add episode length traces
    fig.add_trace(
        go.Scatter(x=df.index, y=df['episode_lenghts'], name=f'{names[j]}: Lengths', line=dict(color=colors[j])),
        row=2, col=1
    )


# Update layout
fig.update_layout(
    height=800,
    title_text='Training Metrics Comparison',
    showlegend=True
)
fig.update_xaxes(title_text='Episode', row=1, col=1)
fig.update_yaxes(title_text='Total rewards', row=1, col=1)
fig.update_xaxes(title_text='Episode', row=2, col=1)
fig.update_yaxes(title_text='Steps per Episode', row=2, col=1)

fig.show()