In [1]:
import os
if os.getcwd().split('\\')[-1] != 'koopman-rl' and os.getcwd().split('/')[-1] != 'koopman-rl':
    os.chdir('../')

In [None]:
import numpy as np
import pandas as pd

from movies.env_enum import EnvEnum

In [None]:
class Args:
    def __init__(self, data_folder: str, ma_window_size: int):
        self.data_folder = data_folder
        self.ma_window_size = ma_window_size

    def __str__(self):
        return f"data_folder: {self.data_folder}, ma_window_size: {self.ma_window_size}"

data_folder_paths = {
    EnvEnum.LinearSystem: "./video_frames/LinearSystem-v0_1733955895",
    EnvEnum.FluidFlow: "./video_frames/FluidFlow-v0_1733955905",
    EnvEnum.Lorenz: "./video_frames/Lorenz-v0_1733955911",
    EnvEnum.DoubleWell: "./video_frames/DoubleWell-v0_1733955917"
}

args = Args(
    # data_folder=data_folder_paths[EnvEnum.LinearSystem],
    # data_folder=data_folder_paths[EnvEnum.FluidFlow],
    # data_folder=data_folder_paths[EnvEnum.Lorenz],
    data_folder=data_folder_paths[EnvEnum.DoubleWell],

    # ma_window_size=20,
    ma_window_size=200,
)

print(args)

In [None]:
# Load main policy data
main_policy_trajectories = np.load(f"{args.data_folder}/main_policy_trajectories.npy")
main_policy_costs = np.load(f"{args.data_folder}/main_policy_costs.npy")

# Load baseline policy data
baseline_trajectories = np.load(f"{args.data_folder}/baseline_policy_trajectories.npy")
baseline_policy_costs = np.load(f"{args.data_folder}/baseline_policy_costs.npy")

#  Load zero policy data
zero_trajectories = np.load(f"{args.data_folder}/zero_policy_trajectories.npy")
zero_costs = np.load(f"{args.data_folder}/zero_policy_costs.npy")

# Load metadata
metadata = np.load(f"{args.data_folder}/metadata.npy", allow_pickle=True).item()

# Extract env_id
env_id = metadata['env_id']
print(env_id)

In [5]:
# Function to compute moving average, preserving the first n values
def moving_average(a, n, keep_first):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    moving_avg = ret[n - 1:] / n

    # If `keep_first` is True, concatenate the first n-1 values of the original array
    if keep_first:
        result = np.concatenate((a[:n - 1], moving_avg))
        print(result.shape)
    else:
        result = moving_avg

    return result

log_cost_ratio = np.log( main_policy_costs[0] / baseline_policy_costs[0] )
ma_log_cost_ratio = moving_average(log_cost_ratio, args.ma_window_size, keep_first=False)

# Pad ma_log_cost_ratio with Nones
ma_log_cost_ratio = ma_log_cost_ratio.tolist()
for _ in range(len(log_cost_ratio) - len(ma_log_cost_ratio)):
    ma_log_cost_ratio.insert(0, None)
ma_log_cost_ratio = np.array(ma_log_cost_ratio)

In [None]:
# Create DataFrame of trajectories and costs
df = pd.DataFrame(
    {
        f"main_x0": main_policy_trajectories[0,:,0],
        f"main_x1": main_policy_trajectories[0,:,1],
        f"main_x2": main_policy_trajectories[0,:,2],
        f"baseline_x0": baseline_trajectories[0,:,0],
        f"baseline_x1": baseline_trajectories[0,:,1],
        f"baseline_x2": baseline_trajectories[0,:,2],
        f"uncontrolled_x0": zero_trajectories[0,:,0],
        f"uncontrolled_x1": zero_trajectories[0,:,1],
        f"uncontrolled_x2": zero_trajectories[0,:,2],
        f"log_cost_ratio": log_cost_ratio,
        f"ma_log_cost_ratio": ma_log_cost_ratio,
    },
    index=range(len(main_policy_trajectories[0]))
)

df

In [7]:
df.to_csv(f"{args.data_folder}/{env_id}_trajectory_data.csv")