In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last"

import argparse
from ddpg_torch.ddpg_torch import Agent
import gym
import numpy as np
from lifting_rl.linkage_env import LinkageEnv

from livelossplot import PlotLosses

In [None]:
params = {
    "N_LINKS": 2,
    "GOAL_POS": np.array([np.pi / 4, np.pi / 4, 0, 0], dtype=np.float32),
    "INIT_STATE": np.array([np.pi / 4, np.pi / 4, 0, 0], dtype=np.float32),
    "PARAM_VALS": np.array([9.81, 0.4, 1, 0.4, 1], dtype=np.float32),
    "OBS_LOW": np.array([-np.pi, -np.pi, -8 * np.pi, -8 * np.pi], dtype=np.float32),
    "OBS_HIGH": np.array(
        [np.pi, np.pi, 8 * np.pi, 8 * np.pi], dtype=np.float32
    ),
    "ACT_LOW": -30,
    "ACT_HIGH": 30,
    "TIME_STEP": 0.01,
    "VIDEO_FPS": 30,
}


In [None]:
angles_file = "/home/mans/git/human-body-model-dynamics/data/skeleton_angles.csv"

In [None]:
env = LinkageEnv(angles_file, params, verbose=0)

In [None]:
agent = Agent(
    lr_actor=0.000025,
    lr_critic=0.00025,
    input_dims=[8],
    tau=0.001,
    env=env,
    batch_size=64,
    layer1_size=400,
    layer2_size=300,
    n_actions=2,
)

In [None]:
np.random.seed(0)

In [None]:
liveloss = PlotLosses()

In [None]:
score_history = []

In [None]:
for i in range(100000):
    done = False
    score = 0
    obs = env.reset()
    agent.noise.reset()
    while not done:
        env.render()
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state

    score_history.append(score)

    metrics = {"score_history": np.mean(score_history[-100:])}
    liveloss.update(metrics)
    liveloss.send()
    print(
        "episode",
        i,
        "score %.2f" % score,
        "100 game average %.2f" % np.mean(score_history[-100:]),
    )
env.close()