In [1]:
import gym
import torch
import sys
import os
import random
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
sys.path.append('../')
from common import make_env
sys.path.append('../../')
import TD3

import pandas as pd

if not os.path.exists("images"):
    os.mkdir("images")
    
if not os.path.exists("images2"):
    os.mkdir("images2")

In [2]:
response_times = [ 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28]

In [3]:
for rt in response_times:
    for g_force in range(21):
        for seed in range(5):
            if not os.path.isfile('../models/TD3_InvertedPendulum-v2_'+str(seed)+'_0.02_'+str(float(g_force))+'_'+str(rt)+'_1.0_False_final_actor'):
                print (rt, g_force, seed)

In [4]:
df = pd.DataFrame(columns=['seed', 'g_force', 'response_rate', 'reward', 'angle', 'jerk'])
# df = torch.load('dataframe')
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
for response_rate in response_times:
    for g_force in range(21):
        print(response_rate, g_force)
        for seed in range(5):
            states = []
            force = g_force * 9.81
            arguments = ['TD3', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, False,'final']
                # Target policy smoothing is scaled wrt the action scale
            file_name = '_'.join([str(x) for x in arguments])
            if response_rate % default_timestep == 0:
                frame_skip = response_rate / default_timestep
                timestep = default_timestep
            elif jit_duration < response_rate:
                timestep = jit_duration
                frame_skip = response_rate / timestep
            else:
                timestep = response_rate
                frame_skip = 1
            jit_frames = 0  # How many frames the horizontal jitter force lasts each time
            if jit_duration:
                if jit_duration % timestep == 0:
                    jit_frames = int(jit_duration / timestep)
                else:
                    raise ValueError(
                        "jit_duration should be a multiple of the timestep: " + str(timestep))
                    
            time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
            eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip, False)
            eval_env._max_episode_steps = 100000
            state_dim = eval_env.observation_space.shape[0]
            action_dim = eval_env.action_space.shape[0]
            max_action = float(eval_env.action_space.high[0])
            kwargs = {
                "state_dim": state_dim,
                "action_dim": action_dim,
                "observation_space": eval_env.observation_space,
                "max_action": max_action,
                "discount": 0.99,
                "tau": 0.005,
            }
            kwargs["policy_noise"] = 2 * max_action
            kwargs["noise_clip"] = 0.5 * max_action
            kwargs["policy_freq"] = 2
            policy = TD3.TD3(**kwargs)
            policy_file = file_name 
            if os.path.exists('../models/'+policy_file+"_critic"):
                policy.load(f"../models/{policy_file}")
                avg_reward = 0.
                avg_angle = 0.
                steps = 0

                t = 0
                forces = []
                force_times = []
                for _ in range(1):
                    state, done = eval_env.reset(), False
                    eval_env.model.opt.gravity[0] = 0
                    counter = 0
                    disturb = 5
                    jittering = False
                    force = 0.25
                    prev_action = None
                    jerk = 0
                    while not done:
                        action = policy.select_action(np.array(state))
                        # Perform action
                        if not jittering and round(disturb - counter, 3) >= response_rate:  # Not during the frames when jitter force keeps existing
                            next_state, reward, done, _ = eval_env.step(action)
                            counter += response_rate
                        elif not jittering and round(disturb - counter, 3) < response_rate:
                            forces.append(force)
                            force_times.append(t)
                            jitter_force = force * 9.81 * (2 * (np.random.random() > 0.5) - 1)  # Jitter force strength w/ direction
                            next_state, reward, done, _ = eval_env.jitter_step_start(action, jitter_force,
                                                                                     (disturb - counter) / timestep,
                                                                                     frame_skip - ((disturb - counter) / timestep),
                                                                                     jit_frames)
                            jittered_frames = frame_skip - ((disturb - counter) / timestep)
                            if jittered_frames >= jit_frames:
                                jittered_frames = 0
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                            else:
                                jittering = True
                                eval_env.model.opt.gravity[0] = jitter_force
                                counter += response_rate
                        elif jit_frames - jittered_frames < frame_skip:  # Jitter force will dispear from now!
                            next_state, reward, done, _ = eval_env.jitter_step_end(
                                action, jitter_force, jit_frames - jittered_frames, frame_skip - (jit_frames - jittered_frames))
                            jittering = False  # Stop jittering now
                            eval_env.model.opt.gravity[0] = 0
                            counter = 0
                            force += 0.25
                        else:  # Jitter force keeps existing now!
                            next_state, reward, done, _ = eval_env.step(action)
                            jittered_frames += frame_skip
                            counter += response_rate
                            if jittered_frames == jit_frames:
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                        avg_reward += reward
                        avg_angle += abs(next_state[1])
                        state = next_state
                        counter = round(counter, 3)
                        if jit_duration:
                            if counter == disturb:
                                forces.append(force)
                                force_times.append(t)
                                jitter_force = force * 9.81 * (2 * (random.random() > 0.5) - 1)
                                eval_env.model.opt.gravity[0] = jitter_force
                                jittering = True
                                jittered_frames = 0

                        t += 1
                        if prev_action:
                            jerk += abs(action[0] - prev_action)
                        prev_action = action[0]
                        states.append(state)
                states = np.array(states)
                fig = make_subplots(rows=2, cols=1)
                x = [i for i in range(t)]
                for index, f in enumerate(force_times):
                    fig.add_shape(go.layout.Shape(type="line",
                                            x0=f,
                                            y0=-100,
                                            x1=f,
                                            y1=100,
                                            ),row=1,col=1)
                    fig.add_annotation(x=f,
                                       y=0,
                                       text=str(forces[index]), 
                                       showarrow=False,
                                       row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,0], mode='lines', name='pos'), row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,1], mode='lines', name='angle'), row=2, col=1)
    #             fig.add_trace(go.Scatter(x=x, y=states[:,2], mode='lines', name='vel'))
    #             fig.add_trace(go.Scatter(x=x, y=states[:,3], mode='lines', name='angular vel'))
                fig.update_layout(xaxis_title="Frames")

                fig.write_html("images2/"+file_name+'.html')
        

                avg_reward /= 1
                avg_angle /= 1
                jerk /= avg_reward
                df.loc[len(df.index)] = [seed, g_force, response_rate, avg_reward, avg_angle, jerk]
                

0.01 0
running build_ext
building 'mujoco_py.cymj' extension
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\generated\_pyxbld_2.1.2.14_38_windowsextensionbuilder
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\generated\_pyxbld_2.1.2.14_38_windowsextensionbuilder\temp.win-amd64-3.8
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\generated\_pyxbld_2.1.2.14_38_windowsextensionbuilder\temp.win-amd64-3.8\Release
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\generated\_pyxbld_2.1.2.14_38_windowsextensionbuilder\temp.win-amd64-3.8\Release\users
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\generated\_pyxbld_2.1.2.14_38_windowsextensionbuilder\temp.win-amd64-3.8\Release\users\devdh
creating c:\users\devdh\appdata\local\programs\python\python38\lib\site-packages\mujoco_py\gener

LinkError: command 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.30.30705\\bin\\HostX86\\x64\\link.exe' failed with exit status 1104

In [18]:
fig = go.Figure()

    
for force in range(11):
    rewards = []
    for rt in response_times:
        temp_df = df.loc[(df['response_rate'] == rt) & (df['g_force'] == force)]
#         print (rt, force, np.mean(temp_df['reward'])* rt)
        rewards.append( np.mean(temp_df['reward'])* rt)
    
    fig.add_trace(go.Scatter(x=response_times, y=rewards, mode='lines', name=str(force)))
    
fig.update_xaxes(type="log")
fig.update_layout(title='Learning curve for Delayed vs Non-delayed envrionment',
                   xaxis_title='Response Rate',
                   yaxis_title='Normalized reward')

fig.show()

In [42]:
df

Unnamed: 0,seed,g_force,response_rate,reward,angle,jerk
0,0.0,0.0,0.01,2293.0,47.214931,0.577094
1,1.0,0.0,0.01,120.0,17.372309,2.128509
2,2.0,0.0,0.01,6553.0,144.442322,0.517706
3,3.0,0.0,0.01,6081.0,108.854726,1.500307
4,4.0,0.0,0.01,8046.0,177.474073,3.593683
...,...,...,...,...,...,...
835,0.0,20.0,1.28,1.0,1.573276,0.000000
836,1.0,20.0,1.28,1.0,1.573276,0.000000
837,2.0,20.0,1.28,1.0,1.573276,0.000000
838,3.0,20.0,1.28,1.0,1.573276,0.000000


In [43]:
torch.save(df, 'dataframe')

In [9]:
df = torch.load('dataframe')

In [10]:
df['reward'] = df['reward'] * df['response_rate']
df['jerk'] = df['jerk'] / df['response_rate']
rewards = pd.crosstab(df['g_force'], df['response_rate'], values=df['reward'], aggfunc='mean')
jerks = pd.crosstab(df['g_force'], df['response_rate'], values=df['jerk'], aggfunc='mean')

In [46]:
rewards

response_rate,0.01,0.02,0.04,0.08,0.16,0.32,0.64,1.28
g_force,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,46.186,79.136,82.64,93.824,9.216,0.32,0.64,1.28
1.0,82.884,77.46,83.776,63.152,28.032,0.32,0.64,1.28
2.0,71.912,87.864,83.464,100.48,14.752,0.32,0.64,1.28
3.0,99.924,110.232,90.56,82.4,44.096,0.32,0.64,1.28
4.0,84.118,130.924,100.008,88.56,24.832,0.32,0.64,1.28
5.0,123.1,143.604,113.52,135.008,19.136,0.32,0.64,1.28
6.0,135.182,147.388,120.984,82.416,38.272,0.32,0.64,1.28
7.0,154.53,160.188,127.984,72.576,34.944,0.32,0.64,1.28
8.0,141.448,157.676,124.824,117.024,7.584,0.32,0.64,1.28
9.0,152.618,169.616,114.536,92.784,67.68,0.32,0.64,1.28


In [28]:
fig = go.Figure(data=[go.Surface(z=rewards, y=df['g_force'].unique(), x=df['response_rate'].unique())])

fig.update_layout(title='Evaluation of agents with different response times trained in envrionments of varying max. pertubation',
                  width=1000, height=1000)
fig.update_layout(
    scene = dict(
        xaxis = dict( type='log', title='Response Time', tickvals=df['response_rate'].unique()),
        yaxis = dict( title='Max. Pertubation during training'),
        zaxis = dict( title='Avg. Seconds during evaluation')))

In [8]:
fig = go.Figure(data=[go.Surface(z=rewards, y=df['g_force'].unique(), x=df['response_rate'].unique())])

fig.update_layout(title='Evaluation of agents with different response times trained in envrionments of varying max. pertubation',
                  width=1000, height=1000, scene_camera_eye=dict(x=1.87, y=0.88, z=-0.64))
fig.update_layout(
    scene = dict(
        xaxis = dict( type='log', title='Response Rate', tickvals=df['response_rate'].unique()),
        yaxis = dict( title='Max. Pertubation during training'),
        zaxis = dict( title='Avg. Seconds during evaluation')))
fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                  highlightcolor="black", project_z=True))

In [35]:
fig = go.Figure(data=go.Heatmap(
                    z=rewards, x=df['response_rate'].unique().astype('U'),
                   y=df['g_force'].unique().astype('U')))

fig.update_layout(
    title='Heatmap for with response times vs max. pertubation during training',
    xaxis_title="Response Rate",
    yaxis_title="Max Pertubation during training",
)

In [15]:
fig = ff.create_annotated_heatmap(z=rewards,y=df['g_force'].unique().astype('U').tolist(), x=[str(i) + 's' for i in df['response_rate'].unique().tolist()],  annotation_text=np.around(rewards, decimals=2), colorscale='greens')

fig.update_layout(
    title='Average evaluation seconds for response times vs max. pertubation during training',
    xaxis_title="Response Time",
    yaxis_title="Max Pertubation during training",
    xaxis_side='bottom',
    font=dict(size=18),
)
fig.show()

KeyError: 0.0

In [11]:
fig = ff.create_annotated_heatmap(z=rewards.to_numpy(),y=[str(int(i))+'g' for i in df['g_force'].unique().tolist()], 
                                  x=[str(i) + 's' for i in df['response_rate'].unique().tolist()],  
                                  annotation_text=np.around(rewards.to_numpy(), decimals=2), colorscale='blues', showscale=True,
                                 reversescale =False)

fig.update_layout(
#     title='Average evaluation seconds for response times vs max. pertubation during training',
    xaxis_title="Response Time",
    yaxis_title="Max Pertubation during training",
    xaxis_side='bottom',
    font=dict(size=15),
    height=700,
)
fig.show()



In [12]:
fig = ff.create_annotated_heatmap(z=jerks.to_numpy(),y=[str(int(i))+'g' for i in df['g_force'].unique().tolist()], 
                                  x=[str(i) + 's' for i in df['response_rate'].unique().tolist()],  
                                  annotation_text=np.around(jerks.to_numpy(), decimals=2), colorscale='blues', showscale=True,
                                 reversescale =False)

fig.update_layout(
#     title='Average evaluation seconds for response times vs max. pertubation during training',
    xaxis_title="Response Time",
    yaxis_title="Max Pertubation during training",
    xaxis_side='bottom',
    font=dict(size=15),
    height=700,
)
fig.show()



# Jerk

In [26]:
df = pd.DataFrame(columns=['seed', 'g_force', 'response_rate', 'reward', 'angle', 'jerk'])
# df = torch.load('dataframe')
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
for response_rate in response_times:
    for g_force in [16, 17, 18, 19, 20]:
        print(response_rate, g_force)
        for seed in range(5):
            states = []
            force = g_force * 9.81
            arguments = ['TD3', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, False,'final']
                # Target policy smoothing is scaled wrt the action scale
            file_name = '_'.join([str(x) for x in arguments])
            if response_rate % default_timestep == 0:
                frame_skip = response_rate / default_timestep
                timestep = default_timestep
            elif jit_duration < response_rate:
                timestep = jit_duration
                frame_skip = response_rate / timestep
            else:
                timestep = response_rate
                frame_skip = 1
            jit_frames = 0  # How many frames the horizontal jitter force lasts each time
            if jit_duration:
                if jit_duration % timestep == 0:
                    jit_frames = int(jit_duration / timestep)
                else:
                    raise ValueError(
                        "jit_duration should be a multiple of the timestep: " + str(timestep))
                    
            time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
            eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip, False)
            eval_env._max_episode_steps = 100000
            state_dim = eval_env.observation_space.shape[0]
            action_dim = eval_env.action_space.shape[0]
            max_action = float(eval_env.action_space.high[0])
            kwargs = {
                "state_dim": state_dim,
                "action_dim": action_dim,
                "observation_space": eval_env.observation_space,
                "max_action": max_action,
                "discount": 0.99,
                "tau": 0.005,
            }
            kwargs["policy_noise"] = 2 * max_action
            kwargs["noise_clip"] = 0.5 * max_action
            kwargs["policy_freq"] = 2
            policy = TD3.TD3(**kwargs)
            policy_file = file_name 
            if os.path.exists('../models/'+policy_file+"_critic"):
                policy.load(f"../models/{policy_file}")
                avg_reward = 0.
                avg_angle = 0.
                steps = 0

                t = 0
                forces = []
                force_times = []
                for _ in range(1):
                    state, done = eval_env.reset(), False
                    eval_env.model.opt.gravity[0] = 0
                    counter = 0
                    disturb = 5
                    jittering = False
                    force = 0.25
                    prev_action = None
                    jerk = 0
                    while not done:
                        action = policy.select_action(np.array(state))
                        # Perform action
                        if not jittering and round(disturb - counter, 3) >= response_rate:  # Not during the frames when jitter force keeps existing
                            next_state, reward, done, _ = eval_env.step(action)
                            counter += response_rate
                        elif not jittering and round(disturb - counter, 3) < response_rate:
                            forces.append(force)
                            force_times.append(t)
                            jitter_force = force * 9.81 * (2 * (np.random.random() > 0.5) - 1)  # Jitter force strength w/ direction
                            next_state, reward, done, _ = eval_env.jitter_step_start(action, jitter_force,
                                                                                     (disturb - counter) / timestep,
                                                                                     frame_skip - ((disturb - counter) / timestep),
                                                                                     jit_frames)
                            jittered_frames = frame_skip - ((disturb - counter) / timestep)
                            if jittered_frames >= jit_frames:
                                jittered_frames = 0
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                            else:
                                jittering = True
                                eval_env.model.opt.gravity[0] = jitter_force
                                counter += response_rate
                        elif jit_frames - jittered_frames < frame_skip:  # Jitter force will dispear from now!
                            next_state, reward, done, _ = eval_env.jitter_step_end(
                                action, jitter_force, jit_frames - jittered_frames, frame_skip - (jit_frames - jittered_frames))
                            jittering = False  # Stop jittering now
                            eval_env.model.opt.gravity[0] = 0
                            counter = 0
                            force += 0.25
                        else:  # Jitter force keeps existing now!
                            next_state, reward, done, _ = eval_env.step(action)
                            jittered_frames += frame_skip
                            counter += response_rate
                            if jittered_frames == jit_frames:
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                        avg_reward += reward
                        avg_angle += abs(next_state[1])
                        state = next_state
                        counter = round(counter, 3)
                        if jit_duration:
                            if counter == disturb:
                                forces.append(force)
                                force_times.append(t)
                                jitter_force = force * 9.81 * (2 * (random.random() > 0.5) - 1)
                                eval_env.model.opt.gravity[0] = jitter_force
                                jittering = True
                                jittered_frames = 0

                        t += 1
                        if prev_action:
                            jerk += action - prev_action
                        prev_action = action
                        states.append(state)
                states = np.array(states)
                fig = make_subplots(rows=2, cols=1)
                x = [i for i in range(t)]
                for index, f in enumerate(force_times):
                    fig.add_shape(go.layout.Shape(type="line",
                                            x0=f,
                                            y0=-100,
                                            x1=f,
                                            y1=100,
                                            ),row=1,col=1)
                    fig.add_annotation(x=f,
                                       y=0,
                                       text=str(forces[index]), 
                                       showarrow=False,
                                       row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,0], mode='lines', name='pos'), row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,1], mode='lines', name='angle'), row=2, col=1)
    #             fig.add_trace(go.Scatter(x=x, y=states[:,2], mode='lines', name='vel'))
    #             fig.add_trace(go.Scatter(x=x, y=states[:,3], mode='lines', name='angular vel'))
                fig.update_layout(xaxis_title="Frames")

                fig.write_html("images2/"+file_name+'.html')
        

                avg_reward /= 1
                avg_angle /= 1
                jerk /= avg_reward
                df.loc[len(df.index)] = [seed, g_force, response_rate, avg_reward, avg_angle, jerk]
                

[ 43.3  43.3  43.3  43.3  43.3  43.3 104.3 199.6 182.5  70.1 187.  117.5
 195.2 237.8 130.2 351.4 285.8 361.3 234.5 337.1 368.1 242.5 189.3 329.4
 350.  355.4 332.5 378.7 408.5 325.1 348.4 281.  273.3 179.9  59.5 293.6
 418.8 423.  329.  354.9 317.4 349.6 424.4 348.4 294.2 298.4 485.5 285.5
 474.4 329.2 382.5 304.9 445.3 304.5 200.2 437.4 258.3 251.5 248.8 355.2
 404.8 359.1 377.6 330.9  96.4 302.8 335.  456.8 267.9 364.4 254.3 302.9
 274.8 156.8 321.2 274.7 200.3 452.4 412.1 448.7 286.4]
[ 39.4  39.4  39.4  39.4  39.4  39.4 140.5 152.7 187.8 214.8 142.5 265.6
 138.8 230.  222.9 316.4 349.9 294.7 291.7 267.3 208.8 173.1 305.6 215.7
  78.9 126.8 254.7 226.2 399.8 329.3 285.7 368.6 479.5 251.7 353.1  55.2
 447.8 352.4 246.8 261.3 236.7 426.5 290.  351.2 344.  102.6 217.1 224.5
 290.9 232.8 386.2 290.3 264.2 199.5 191.4 378.5 268.4 402.7 431.3 367.2
 476.9 295.8 352.2 398.7 394.1 345.  268.5 312.7 240.  344.1 372.6 339.
 350.6 387.9 376.9 274.9 271.  340.3 390.7 445.2 308.6]
[ 25.5  25.5 

[  3.    3.    3.    3.    3.    3.    1.    1.    7.   24.7  23.6  36.7
  38.8  36.5  31.   26.5  29.8  34.   38.3  92.7  31.7 104.3  59.4 106.1
  48.8  55.1  55.6  41.7  23.9  24.8  35.1  28.2  34.   31.2  29.9  26.1
  28.7  24.3  34.7  56.9  26.6  80.2  83.1  87.9  21.3 104.3  93.5 120.5
  75.4 116.5 137.7 149.3 136.8 118.9 140.5 114.9 115.4 112.1 107.8  98.7
 101.  121.6 122.6 106.2 107.8 150.6  87.9 105.3 136.2 120.2  80.8 150.4
 143.  144.   95.3 130.8 101.  116.7 115.4  99.3  81.4]
[  3.    3.    3.    3.    3.    3.    1.    1.    3.   13.2  24.7  38.3
  21.2  36.8  61.2  37.6  89.7  76.1  81.9  34.1  61.4  79.9  97.6  60.3
  54.9  35.   76.   80.7  31.6  30.   46.8  37.2  26.4  30.7  27.2  85.4
  81.2  22.9 118.6 120.8  89.7 104.2  83.7  96.1  22.1 114.9 117.5  95.8
 125.3 106.9 111.7 147.9 154.7 101.  109.8 154.1 103.3 134.1 126.5  97.9
 115.3  74.9 114.5 104.9 101.9  76.5 105.6  91.2  82.8 100.7  89.9  80.5
  53.2  79.8  90.1  85.8 109.5  86.7  85.1 109.4  89. ]
[  2.    2. 

20

In [72]:
reflex.bias

Parameter containing:
tensor([ 0.3864, -0.4175], requires_grad=True)

In [22]:
range(15,20)

range(15, 20)