In [None]:
import gym
import gymnasium

from TD3.ContinuousCartPole import *
from TD3.ContinuousPendulum import *
from TD3.ContinuousMountainCar import *
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

In [None]:
# Evaluate the trained agent
# env = gym.make("MountainCarContinuous-v0")
# env= gymnasium.make("MountainCarContinuous-v0")
env = MountainCarContinuousEnv()

# model = TD3.load("models/td3-ContinuousMountainCar.zip", env=env)
model = TD3.load("models/pretrained/MountainCarContinuous-v0.zip", env=env)
print("\n=== Continuous Mountain Car TD3 Evaluation ===")
mcc_actions = []
for ep in range(10):
    obs = env.reset()
    done = False
    trunc = False
    total_reward = 0.0
    actions = []
    i = 0
    velocities = []
    while not done and not trunc:
        i+= 1
        velocities.append(obs[1])
        action, _ = model.predict(obs, deterministic=False)
        actions.append(float(action))
        obs, reward, done, trunc, _ = env.step(action)
        total_reward += reward
    mcc_actions.append(actions)
    print(f"Episode {ep+1}: total_reward={total_reward:.8f}, actions={actions[:10]}")

In [None]:
# Evaluate the trained agent
env = PendulumEnv()
model = TD3.load("models/td3-ContinuousPendulum.zip", env=env)
print("\n=== Continuous Pendulum TD3 Evaluation ===")
penc_actions = []
for ep in range(10):
    obs = env.reset()
    done = False
    trunc = False
    total_reward = 0.0
    actions = []
    while not done and not trunc:
        # print("Hello")
        action, _ = model.predict(obs, deterministic=True,)
        actions.append(float(action))
        obs, reward, done, trunc, _ = env.step(action)
        total_reward += reward
    penc_actions.append(actions)
    print(f"Episode {ep+1}: total_reward={total_reward:.2f}, actions={actions[:10]}, length={len(actions)}")


In [None]:
# Evaluate the trained agent
env = ContinuousCartPoleEnv()
model = TD3.load("models/td3-ContinuousCartPole", env=env)
print("\n=== Continuous CartPole TD3 Evaluation ===")
cpc_actions = []
for ep in range(10):
    obs, _ = env.reset()
    done = False
    trunc = False
    total_reward = 0.0
    actions = []
    while not done and not trunc:
        action, _ = model.predict(obs, deterministic=False)
        actions.append(float(action))
        obs, reward, done, trunc, _ = env.step(action)
        total_reward += reward
    cpc_actions.append(actions)
    
    print(f"Episode {ep+1}: total_reward={total_reward:.2f}, actions={actions[:10]}")

In [None]:
flat = []
all_actions = mcc_actions
for x in all_actions:
    for i in x : 
        flat.append(i)
# print(len(flat))
data = np.array(flat)

bins = 100
counts = [0] * bins # create array of zeros
bin_edges = [0] * (bins + 1)
# YOUR CODE HERE
total = 100
max_value = max(data)
min_value = min(data)

bin_edges = np.linspace(min_value, max_value, bins + 1)
s_data = sorted(data)
i = 0
for val in s_data:
    if val <= bin_edges[i+1]:
        counts[i] += 1
    else:
        i += 1
        counts[i] += 1

In [None]:
plt.hist(flat)
plt.plot()

In [None]:
x = []
y = []
for _y, _x in enumerate(s_data):
    x.append(_x)
    y.append(_y/len(s_data))

# print(len(data))
# print(len(y))

for q in np.arange(0,1,0.1):
    print(np.quantile(s_data, q=q))

plt.plot(x, y)
plt.show()

In [None]:
K = 8 # number of actions
diff = np.inf 
sd_dif = np.inf
sample_mean = data.mean()
sample_std = data.std()

result_Acts = []
for _ in range(5):
    # Sample until representable
    actions = []
    while abs(diff) > 0.25 or abs(sd_dif) > 0.05:
        actions = random.choices(data, k = K)
        diff = np.mean(actions) - sample_mean
        sd_dif = np.std(actions) - sample_std
    result_Acts.append(actions)
    diff = np.inf
    sd_dif = np.inf

In [None]:
[print(str(x)) for x in result_Acts]

In [None]:
run_cpc = "echo 'exp2b cp {}' \n./target/release/broccoli --env cpc --depth 2 --num-nodes 3 --num-iters 10000 --predicate-increment 0.1 0.1 0.05 0.1 --initial-state-values -0.05 0.05 0.05 0.05 --predicate-reasoning 1 --actions {} > TD3_acts_cpc_s={}.txt"
run_penc = "echo 'exp2a penc {}' \n./target/release/broccoli --env penc --depth 2 --num-nodes 3 --num-iters 100  --predicate-increment 0.1 0.1 --initial-state-values 0.5 0 --predicate-reasoning 1 --actions {}  > TD3_acts_penc_s={}.txt"
run_mcc = "echo 'exp2b mcc {}' \n./target/release/broccoli --env mcc --depth 2 --num-nodes 3 --num-iters 1000 --predicate-increment 0.1 0.014 --initial-state-values -0.5 0.0 --predicate-reasoning 1 --actions {} > TD3_acts_mcc_s={}.txt"

run = run_cpc
with open("action_exp/exp4_cpc.sh", "w") as f:
    for i, x in enumerate(result_Acts):
        x.sort()
        string = ""
        for y in x:
            string += "{:.6f} ".format(y)
        print(string)
        f.write(run.format(i,string, i))
        f.write("\n")