In [None]:
import pathlib

In [None]:
import numpy as np
import torch

from residual_actions import ResidualActionsSettings, ResidualActionsLearner

# Read data

In [None]:
# TODO: more training data
with open('datasets/pendulum_expert_data/observations.npy', 'rb') as f:
    states = np.load(f, allow_pickle=True)
with open('datasets/pendulum_expert_data/actions.npy', 'rb') as f:
    actions = np.load(f, allow_pickle=True)

# skip close fromes, essentialy changing from 60 fps to 15 fps
states = states[::4, ...]
actions = actions[::4, ...]

# Set config

In [None]:
# TODO: LR scheduler
SETTINGS = ResidualActionsSettings(
    history_size=16,
    hidden_channels_memory=128,
    hidden_channels_behavior=256,
    batch_size=32,
    optim_learning_rate=0.001,
    target_loss=1e-07,
    force_stop_at_plateau_epochs=20,
    train_log_frequency=20,
    running_loss_window_size=10,
    min_epochs=100,
    grace_epochs_after_min_epochs=20,
    max_epochs=900
)

In [None]:
learner = ResidualActionsLearner(
    state_space_size=states.shape[-1],
    action_space_size=actions.shape[-1],
    settings=SETTINGS,
    device='cuda'
)

# Train

In [None]:
learner.process_and_train_full(
    states_train=torch.from_numpy(states).to(torch.float32).cuda(),
    actions_train=torch.from_numpy(actions).to(torch.float32).cuda()
)

In [None]:
torch.save(learner.get_state_dicts(), SETTINGS.learner_state_path)

# Evaluate

In [None]:
import gym

In [None]:
learner.set_state_dicts(torch.load(SETTINGS.learner_state_path))

In [None]:
learner.to_inplace('cpu')

In [None]:
env = gym.make('Pendulum-v1', render_mode='human')
obs, info = env.reset()
done = False

observations = []
actions = []

In [None]:
max_steps = 200
step_counter = 0
rewards = []
while not done and step_counter < max_steps:
    action = learner.act_and_step(torch.from_numpy(obs).to(torch.float32))
    obs, reward, done, terminated, info = env.step(np.array([action]))
    rewards.append(reward)
    env.render()
    step_counter += 1
    if step_counter % 20 == 0:
        print(f'step: {step_counter}')
env.close()

In [None]:
rewards_np = np.array(rewards)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(rewards_np)

In [None]:
rewards_np.max(), rewards_np.min()