In [2]:
import torch
import numpy as np

In [3]:
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
import os
import sys
sys.path.append("/home/thatblueboy/DOP")

from env.wrapper import DreamWrapper
# Load the trained model
model = PPO.load("/home/thatblueboy/DOP/logs/Ant-v5_PPO_42/models/dreamer_5_steps/model.zip", device='cpu')

# Create a new environment for testing
env = gym.make("Ant-v5", render_mode="human")
wrapped_env = DreamWrapper(env, n_future_steps = 5, n_steps=1024)  # Use the trained model to augment future observations

# Wrap in DummyVecEnv for stable_baselines3 compatibility
# wrapped_env = DummyVecEnv([lambda: wrapped_env])
hyperparams = {
    "policy": "MlpPolicy",
    "n_steps": 512,
    "batch_size": 32,
    "gamma": 0.98,
    "learning_rate": 1.90609e-05,
    "ent_coef": 4.9646e-07,
    "clip_range": 0.1,
    "n_epochs": 10,
    "gae_lambda": 0.8,
    "max_grad_norm": 0.6,
    "vf_coef": 0.677239,
    "verbose": 1,
    "device":"cpu",
    "tensorboard_log": "./ppo_ant_tensorboard/"
}

In [4]:
dreamer = wrapped_env.dreamer
weights = torch.load("/home/thatblueboy/DOP/logs/Ant-v5_PPO_42/dreamers/dreamer_5_steps/dreamer_state_dict.pth")  # Load weights

for key in weights.keys():
    weights[key] = weights[key].float()
    # print(key
random_weights = {}
# for key, tensor in weights.items():
#     random_weights[key] = torch.zeros_like(tensor)  # Random weights with the same shap
wrapped_env.dreamer.load_state_dict(weights)

  weights = torch.load("/home/thatblueboy/DOP/logs/Ant-v5_PPO_42/dreamers/dreamer_5_steps/dreamer_state_dict.pth")  # Load weights


<All keys matched successfully>

In [25]:
states = []
actions = []
next_states = []

In [26]:
obs, _ = wrapped_env.reset()
states.append(obs)

# Run the trained agent in the environment
for _ in range(1000):  # Run for 1000 steps
    action, _ = model.predict(obs, deterministic=True)  # Use deterministic actions
    print(action)
    actions.append(action)
    obs, reward, done, _, info = wrapped_env.step(action)
    next_states.append(obs)
    states.append(obs)

    if done:
        break

[ 0.02994049  0.22676495 -0.08598981  0.01590433 -0.18607633 -0.3208894
  0.16990478 -0.01990563]
[ 1.         -0.7168041   0.12665467 -0.28723574 -0.7823367   0.3505749
  0.19557256  0.09911212]
[ 0.410409   -0.67329776  0.2873574  -0.5738215  -0.15150408  0.41514623
  0.06822074  0.20874651]
[ 0.11371136 -0.63019174  0.23504941 -0.48934776 -0.03535377  0.42789477
 -0.11918036  0.2858947 ]
[ 0.17492789 -0.21449529 -0.19740291 -0.19903122 -0.03054493  0.33057404
 -0.06494433  0.3025145 ]
[ 0.50788754 -0.12959231 -0.11703054 -0.16324322 -0.14607023  0.16492668
  0.05007003  0.15929487]
[ 0.8222175  -0.07953218 -0.07344233 -0.03737906 -0.25386238  0.21968414
  0.17472011  0.13456325]
[-0.17872515 -0.16858323 -0.25650707  0.01801855  0.45343053  0.26555207
 -0.07157341  0.05163095]
[-0.5220651   0.17454104  0.03516813 -0.34198186  0.3199504  -0.21205254
 -0.13864237  0.05088202]
[-0.7189444   0.45578432  0.0667368  -0.19644277  0.5812134  -0.24806012
  0.1557281  -0.0215752 ]
[-0.21986322

In [27]:
print(len(states))
print(len(actions))
print(len(next_states))

1001
1000
1000


In [28]:
states = states[:-1]

In [29]:
print(len(states))

1000


In [30]:
predictor_p = dreamer.dreamer_p
predictor_d = dreamer.dreamer_d

In [32]:
states = torch.tensor(np.array(states)).float()

In [33]:
actions =torch.Tensor(np.array(actions)).float()

In [46]:
next_states = torch.Tensor(np.array(next_states)).float()

In [34]:
states.dtype

torch.float32

In [35]:
print(states.shape)

torch.Size([1000, 630])


In [18]:
predicted_actions = predictor_p(states[..., :105])

In [36]:
mse_loss = torch.nn.functional.mse_loss(predicted_actions, actions).item()
print(mse_loss)

0.06582626700401306


In [45]:
print(torch.min((actions)))

tensor(-1.)


In [38]:
mae_loss = torch.nn.functional.l1_loss(predicted_actions, actions).item()
print(mae_loss)


0.19309592247009277


In [49]:
predicted_next_states = predictor_d(torch.cat([actions, states[..., :105]], dim=-1))

In [50]:
mse_loss = torch.nn.functional.mse_loss(predicted_next_states, next_states[..., :105]).item()
print(mse_loss)

0.06090717390179634


In [51]:
print(torch.min((next_states)))

tensor(-17.2192)


In [52]:
print(torch.max((next_states)))

tensor(16.9729)


In [53]:
mae_loss = torch.nn.functional.l1_loss(predicted_next_states, next_states[..., :105]).item()
print(mae_loss)

0.09332173317670822
