In [1]:
import torch
import sys
import os
import random
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
sys.path.append('../')
from common import make_env, perform_action, const_disturb_five, const_jitter_force, get_frame_skip_and_timestep
from evals import *
sys.path.append('../../')
import TD3
import pandas as pd

In [2]:
response_times = [0.16]
reflex_thresholds = [0.15, 0.17, 0.19]
reflex_response_rates = [0.02, 0.04, 0.08, 0.01]
reflex_force_scales = [1.0, 0.5, 1.5, -0.5, 2.0, .1]
g_forces = [5]
df = pd.DataFrame(columns=['seed', 'g_force', 'reflex_threshold', 'reflex_response_rate', 'reflex_force_scale', 'response_rate', 'reward', 'angle'])
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
delayed_env = True
for response_rate in response_times:
    for reflex_response_rate in reflex_response_rates:
        print(reflex_response_rate)
        for reflex_threshold in reflex_thresholds:
            for reflex_force_scale in reflex_force_scales:
                for g_force in g_forces:
                    for seed in range(1):
                        states = []
                        force = g_force * 9.81
                        arguments = ['TD3', 'reflex', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, reflex_response_rate, reflex_threshold, reflex_force_scale, 'best']
                        file_name = '_'.join([str(x) for x in arguments])
                        frame_skip, timestep, jit_frames = get_frame_skip_and_timestep(jit_duration, response_rate, reflex_response_rate)
                        time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
                        eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip, delayed_env)
                        eval_env.env.env._max_episode_steps = 100000
                        state_dim = eval_env.env.env.observation_space.shape[0]
                        action_dim = eval_env.env.env.action_space.shape[0]
                        max_action = float(eval_env.action_space.high[0])
                        reflex_frames = int(reflex_response_rate/timestep)
                        kwargs = {
                            "state_dim": state_dim,
                            "action_dim": action_dim,
                            "observation_space": eval_env.observation_space,
                            "max_action": max_action,
                            "discount": 0.99,
                            "tau": 0.005,
                            "delayed_env": True,
                            "reflex": True,
                            "threshold": reflex_threshold,
                            "reflex_force_scale": reflex_force_scale
                        }
                        kwargs["policy_noise"] = 2 * max_action
                        kwargs["noise_clip"] = 0.5 * max_action
                        kwargs["policy_freq"] = 2
                        policy = TD3.TD3(**kwargs)
                        policy_file = file_name 
                        if os.path.exists('../reflex/models/'+policy_file+"_critic"):
                            policy.load(f"../reflex/models/{policy_file}")
                            avg_reward, avg_angle, jerk, actions = eval_policy_increasing_force(policy, env_name, 10, time_change_factor, timestep, frame_skip, jit_frames, response_rate, delayed_env, reflex_frames)
                            df.loc[len(df.index)] = [seed, g_force, reflex_threshold, reflex_response_rate, reflex_force_scale, response_rate, avg_reward, avg_angle]
                        else:
                            print("File not found ", file_name)


0.02
0.04
0.08
0.01


In [3]:

df['reward'] = df['reward'] * df['response_rate']
df.sort_values(by=['reward'])

Unnamed: 0,seed,g_force,reflex_threshold,reflex_response_rate,reflex_force_scale,response_rate,reward,angle
24,0.0,5.0,0.17,0.04,1.0,0.16,4.896,1.312747
18,0.0,5.0,0.15,0.04,1.0,0.16,18.384,2.634884
12,0.0,5.0,0.19,0.02,1.0,0.16,18.688,3.799997
9,0.0,5.0,0.17,0.02,-0.5,0.16,22.688,4.814603
22,0.0,5.0,0.15,0.04,2.0,0.16,24.160,3.999941
...,...,...,...,...,...,...,...,...
62,0.0,5.0,0.17,0.01,1.5,0.16,87.216,12.877046
29,0.0,5.0,0.17,0.04,0.1,0.16,89.328,10.048989
53,0.0,5.0,0.19,0.08,0.1,0.16,95.680,12.878386
5,0.0,5.0,0.15,0.02,0.1,0.16,102.464,17.917592


In [11]:
df.loc[(df['reflex_response_rate'] == 0.01) & (df['reflex_force_scale'] == 2)]

Unnamed: 0,seed,g_force,reflex_threshold,reflex_response_rate,reflex_force_scale,response_rate,reward,angle
58,0.0,5.0,0.15,0.01,2.0,0.16,3.776,1.561384
64,0.0,5.0,0.17,0.01,2.0,0.16,5.792,2.64915
70,0.0,5.0,0.19,0.01,2.0,0.16,28.448,1.753076


In [7]:
response_times = [0.16]
reflex_thresholds = [0.15, 0.17]
reflex_response_rates = [0.02, 0.04, 0.08, 0.01]
reflex_force_scales = [0.5, .1]
g_forces = [6]
df = pd.DataFrame(columns=['seed', 'g_force', 'reflex_threshold', 'reflex_response_rate', 'reflex_force_scale', 'response_rate', 'reward', 'angle'])
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
delayed_env = True

for response_rate in response_times:
    for reflex_response_rate in reflex_response_rates:
        for reflex_threshold in reflex_thresholds:
            for reflex_force_scale in reflex_force_scales:
                for g_force in g_forces:
                    for seed in range(5):
                        arguments = ['TD3', 'reflex', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, reflex_response_rate, reflex_threshold, reflex_force_scale, 'final']
                        file_name = '_'.join([str(x) for x in arguments])
                        if not os.path.isfile(f"../reflex/models/{file_name}"):
                            print (response_rate,reflex_response_rate, reflex_threshold,reflex_force_scale, g_force, seed)
for response_rate in response_times:
    for reflex_response_rate in reflex_response_rates:
        print(reflex_response_rate)
        for reflex_threshold in reflex_thresholds:
            for reflex_force_scale in reflex_force_scales:
                for g_force in g_forces:
                    for seed in range(5):
                        states = []
                        force = g_force * 9.81
                        arguments = ['TD3', 'reflex', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, reflex_response_rate, reflex_threshold, reflex_force_scale, 'best']
                        file_name = '_'.join([str(x) for x in arguments])
                        frame_skip, timestep, jit_frames = get_frame_skip_and_timestep(jit_duration, response_rate, reflex_response_rate)
                        time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
                        eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip, delayed_env)
                        eval_env.env.env._max_episode_steps = 100000
                        state_dim = eval_env.env.env.observation_space.shape[0]
                        action_dim = eval_env.env.env.action_space.shape[0]
                        max_action = float(eval_env.action_space.high[0])
                        reflex_frames = int(reflex_response_rate/timestep)
                        kwargs = {
                            "state_dim": state_dim,
                            "action_dim": action_dim,
                            "observation_space": eval_env.observation_space,
                            "max_action": max_action,
                            "discount": 0.99,
                            "tau": 0.005,
                            "delayed_env": True,
                            "reflex": True,
                            "threshold": reflex_threshold,
                            "reflex_force_scale": reflex_force_scale
                        }
                        kwargs["policy_noise"] = 2 * max_action
                        kwargs["noise_clip"] = 0.5 * max_action
                        kwargs["policy_freq"] = 2
                        policy = TD3.TD3(**kwargs)
                        policy_file = file_name 
                        if os.path.exists('../reflex/models/'+policy_file+"_critic"):
                            policy.load(f"../reflex/models/{policy_file}")
                            avg_reward, avg_angle, jerk, actions = eval_policy_increasing_force(policy, env_name, 10, time_change_factor, timestep, frame_skip, jit_frames, response_rate, delayed_env, reflex_frames)
                            df.loc[len(df.index)] = [seed, g_force, reflex_threshold, reflex_response_rate, reflex_force_scale, response_rate, avg_reward, avg_angle]
                        else:
                            print("File not found ", file_name)


0.16 0.02 0.15 0.5 6 0
0.16 0.02 0.15 0.5 6 1
0.16 0.02 0.15 0.5 6 2
0.16 0.02 0.15 0.5 6 3
0.16 0.02 0.15 0.5 6 4
0.16 0.02 0.15 0.1 6 0
0.16 0.02 0.15 0.1 6 1
0.16 0.02 0.15 0.1 6 2
0.16 0.02 0.15 0.1 6 3
0.16 0.02 0.15 0.1 6 4
0.16 0.02 0.17 0.5 6 0
0.16 0.02 0.17 0.5 6 1
0.16 0.02 0.17 0.5 6 2
0.16 0.02 0.17 0.5 6 3
0.16 0.02 0.17 0.5 6 4
0.16 0.02 0.17 0.1 6 0
0.16 0.02 0.17 0.1 6 1
0.16 0.02 0.17 0.1 6 2
0.16 0.02 0.17 0.1 6 3
0.16 0.02 0.17 0.1 6 4
0.16 0.04 0.15 0.5 6 0
0.16 0.04 0.15 0.5 6 1
0.16 0.04 0.15 0.5 6 2
0.16 0.04 0.15 0.5 6 3
0.16 0.04 0.15 0.5 6 4
0.16 0.04 0.15 0.1 6 0
0.16 0.04 0.15 0.1 6 1
0.16 0.04 0.15 0.1 6 2
0.16 0.04 0.15 0.1 6 3
0.16 0.04 0.15 0.1 6 4
0.16 0.04 0.17 0.5 6 0
0.16 0.04 0.17 0.5 6 1
0.16 0.04 0.17 0.5 6 2
0.16 0.04 0.17 0.5 6 3
0.16 0.04 0.17 0.5 6 4
0.16 0.04 0.17 0.1 6 0
0.16 0.04 0.17 0.1 6 1
0.16 0.04 0.17 0.1 6 2
0.16 0.04 0.17 0.1 6 3
0.16 0.04 0.17 0.1 6 4
0.16 0.08 0.15 0.5 6 0
0.16 0.08 0.15 0.5 6 1
0.16 0.08 0.15 0.5 6 2
0.16 0.08 0

In [13]:
states

[]

In [None]:
14665 * 0.08