In [1]:
import torch
import sys
import os
import random
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
sys.path.append('../')
from common import make_env
sys.path.append('../../')
import TD3
import pandas as pd

if not os.path.exists("images"):
    os.mkdir("images")
    
if not os.path.exists("images2"):
    os.mkdir("images2")

C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
response_times = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28]

In [3]:
for rt in response_times:
    for g_force in range(6):
        for seed in range(5):
            if not os.path.isfile('../models2/TD3_InvertedPendulum-v2_'+str(seed)+'_0.02_'+str(float(g_force))+'_'+str(rt)+'_1.0_final_actor'):
                print (rt, g_force, seed)

0.01 1 1
0.01 4 3
0.32 0 1


In [5]:
response_times = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28]
df = pd.DataFrame(columns=['seed', 'g_force', 'response_rate', 'reward', 'angle'])
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
for response_rate in response_times:
    for g_force in range(6):
        for seed in range(5):
            states = []
            force = g_force * 9.81
            arguments = ['TD3', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, 'final']
                # Target policy smoothing is scaled wrt the action scale
            file_name = '_'.join([str(x) for x in arguments])
            if response_rate % default_timestep == 0:
                frame_skip = response_rate / default_timestep
                timestep = default_timestep
            elif jit_duration < response_rate:
                timestep = jit_duration
                frame_skip = response_rate / timestep
            else:
                timestep = response_rate
                frame_skip = 1
            jit_frames = 0  # How many frames the horizontal jitter force lasts each time
            if jit_duration:
                if jit_duration % timestep == 0:
                    jit_frames = int(jit_duration / timestep)
                else:
                    raise ValueError(
                        "jit_duration should be a multiple of the timestep: " + str(timestep))

            time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
            eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip)
            eval_env._max_episode_steps = 100000
            state_dim = eval_env.observation_space.shape[0]
            action_dim = eval_env.action_space.shape[0]
            max_action = float(eval_env.action_space.high[0])
            kwargs = {
                "state_dim": state_dim,
                "action_dim": action_dim,
                "max_action": max_action,
                "discount": 0.99,
                "tau": 0.005,
            }
            kwargs["policy_noise"] = 2 * max_action
            kwargs["noise_clip"] = 0.5 * max_action
            kwargs["policy_freq"] = 2
            policy = TD3.TD3(**kwargs)
            policy_file = file_name 
            if os.path.exists('../models/'+policy_file+"_critic"):
                policy.load(f"../models/{policy_file}")
                avg_reward = 0.
                avg_angle = 0.
                steps = 0

                t = 0
                forces = []
                force_times = []
                for _ in range(10):
                    state, done = eval_env.reset(), False
                    counter = 0
                    disturb = 5
                    jittering = False
                    force = 0.25
                    while not done:
                        action = policy.select_action(np.array(state))
                        # Perform action
                        if not jittering and round(disturb - counter, 2) >= response_rate:  # Not during the frames when jitter force keeps existing
                            next_state, reward, done, _ = eval_env.step(action)
                            counter += response_rate
                        elif not jittering and round(disturb - counter, 2) < response_rate:
                            forces.append(force)
                            force_times.append(t)
                            jitter_force = force * 9.81 * (2 * (np.random.random() > 0.5) - 1)  # Jitter force strength w/ direction
                            next_state, reward, done, _ = eval_env.jitter_step_start(action, jitter_force,
                                                                                     (disturb - counter) / timestep,
                                                                                     frame_skip - ((disturb - counter) / timestep),
                                                                                     jit_frames)
                            jittered_frames = frame_skip - ((disturb - counter) / timestep)
                            if jittered_frames >= jit_frames:
                                jittered_frames = 0
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                            else:
                                jittering = True
                                eval_env.model.opt.gravity[0] = jitter_force
                                counter += response_rate
                        elif jit_frames - jittered_frames < frame_skip:  # Jitter force will dispear from now!
                                next_state, reward, done, _ = eval_env.jitter_step_end(
                                    action, jitter_force, jit_frames - jittered_frames, frame_skip - (jit_frames - jittered_frames))
                                jittering = False  # Stop jittering now
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25
                        else:  # Jitter force keeps existing now!
                                next_state, reward, done, _ = eval_env.step(action)
                                jittered_frames += frame_skip
                                counter += response_rate
                                if jittered_frames == jit_frames:
                                    jittering = False
                                    eval_env.model.opt.gravity[0] = 0
                                    counter = 0
                                    force += 0.25

                        avg_reward += reward
                        avg_angle += abs(next_state[1])
                        state = next_state
                        counter = round(counter, 2)
                        if jit_duration:
                            if counter == disturb:
                                forces.append(force)
                                force_times.append(t)
                                jitter_force = force * 9.81 * (2 * (random.random() > 0.5) - 1)
                                eval_env.model.opt.gravity[0] = jitter_force
                                jittering = True
                                jittered_frames = 0

                        t += 1
                        states.append(state)
                states = np.array(states)
                fig = make_subplots(rows=2, cols=1)
                x = [i for i in range(t)]
                for index, f in enumerate(force_times):
                    fig.add_shape(go.layout.Shape(type="line",
                                            x0=f,
                                            y0=-100,
                                            x1=f,
                                            y1=100,
                                            ),row=1,col=1)
                    fig.add_annotation(x=f,
                                       y=0,
                                       text=str(forces[index]), 
                                       showarrow=False,
                                       row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,0], mode='lines', name='pos'), row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,1], mode='lines', name='angle'), row=2, col=1)
    #             fig.add_trace(go.Scatter(x=x, y=states[:,2], mode='lines', name='vel'))
    #             fig.add_trace(go.Scatter(x=x, y=states[:,3], mode='lines', name='angular vel'))
                fig.write_html("images/"+file_name+'.html')

                avg_reward /= 10
                avg_angle /= 10
                df.loc[len(df.index)] = [seed, g_force, response_rate, avg_reward, avg_angle]
                print(avg_reward)

4048.2
5075.1
4006.7
4222.1
2215.0
5651.4
5181.0
4675.1
4021.7
3334.4
4724.8
8518.6
5904.9
1581.3
4573.4
3429.8
5645.1
4194.7
6165.6
4496.9
7065.3
6526.3
4546.8
358.3
7080.5
4382.2
2891.6
3362.5
1580.9
4498.1
3157.0
2062.7
2446.6
1439.7
1346.5
1918.3
3126.2
2821.9
1739.5
1894.1
2392.4
2331.7
1439.5
2572.5
1075.3
3381.6
2479.9
1967.3
1283.1
2941.3
2589.2
1762.9
2094.9
2704.1
1983.6
2318.9
2616.6
1444.0
3212.2
1115.0
1856.2
1281.2
2260.4
2372.6
2757.2
2658.2
734.6
1780.6
1958.3
2942.9
2424.5
1588.1
1735.2
1286.8
2310.2
2995.4
1556.2
2730.8
2217.6
1245.3
2031.8
2213.6
1995.8
1497.7
3427.7
3308.0
1902.5
1465.5
1544.0
925.6
981.9
1018.2
677.2
980.5
1195.1
1199.0
412.6
1033.4
1268.4
1432.0
1186.0
1049.8
1194.2
1207.0
915.6
881.8
483.5
146.0
1381.7
711.3
1474.6
1020.3
1554.2
1340.2
529.6
1566.9
988.0
792.1
1727.9
437.6
490.1
332.1
399.1
399.0
481.4
381.8
399.3
457.6
303.6
445.0
431.3
504.1
465.1
557.3
548.1
444.8
622.6
537.2
389.3
583.0
420.2
456.7
630.7
634.6
480.4
442.0
578.7
493.8
415.2
35

In [None]:
response_times = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28]
df = pd.DataFrame(columns=['seed', 'g_force', 'response_rate', 'reward', 'angle'])
default_timestep = 0.02
default_frame_skip = 2
jit_duration = 0.02
env_name = 'InvertedPendulum-v2'
for response_rate in response_times:
    for g_force in range(6):
        for seed in range(5):
            states = []
            force = g_force * 9.81
            arguments = ['TD3', env_name, seed, jit_duration, float(g_force), response_rate, 1.0, 'final']
                # Target policy smoothing is scaled wrt the action scale
            file_name = '_'.join([str(x) for x in arguments])
            if response_rate % default_timestep == 0:
                frame_skip = response_rate / default_timestep
                timestep = default_timestep
            elif jit_duration < response_rate:
                timestep = jit_duration
                frame_skip = response_rate / timestep
            else:
                timestep = response_rate
                frame_skip = 1
            jit_frames = 0  # How many frames the horizontal jitter force lasts each time
            if jit_duration:
                if jit_duration % timestep == 0:
                    jit_frames = int(jit_duration / timestep)
                else:
                    raise ValueError(
                        "jit_duration should be a multiple of the timestep: " + str(timestep))

            time_change_factor = (default_timestep * default_frame_skip) / (timestep * frame_skip)
            eval_env = make_env(env_name, seed, time_change_factor, timestep, frame_skip)
            eval_env._max_episode_steps = 100000
            state_dim = eval_env.observation_space.shape[0]
            action_dim = eval_env.action_space.shape[0]
            max_action = float(eval_env.action_space.high[0])
            kwargs = {
                "state_dim": state_dim,
                "action_dim": action_dim,
                "max_action": max_action,
                "discount": 0.99,
                "tau": 0.005,
            }
            kwargs["policy_noise"] = 2 * max_action
            kwargs["noise_clip"] = 0.5 * max_action
            kwargs["policy_freq"] = 2
            policy = TD3.TD3(**kwargs)
            policy_file = file_name 
            if os.path.exists('../models2/'+policy_file+"_critic"):
                policy.load(f"../models2/{policy_file}")
                avg_reward = 0.
                avg_angle = 0.
                steps = 0

                t = 0
                forces = []
                force_times = []
                for _ in range(10):
                    state, done = eval_env.reset(), False
                    counter = 0
                    disturb = 5
                    jittering = False
                    force = 0.25
                    while not done:
                        action = policy.select_action(np.array(state))
                        # Perform action
                        if not jittering and round(disturb - counter, 2) >= response_rate:  # Not during the frames when jitter force keeps existing
                            next_state, reward, done, _ = eval_env.step(action)
                            counter += response_rate
                        elif not jittering and round(disturb - counter, 2) < response_rate:
                            forces.append(force)
                            force_times.append(t)
                            jitter_force = force * 9.81 * (2 * (np.random.random() > 0.5) - 1)  # Jitter force strength w/ direction
                            next_state, reward, done, _ = eval_env.jitter_step_start(action, jitter_force,
                                                                                     (disturb - counter) / timestep,
                                                                                     frame_skip - ((disturb - counter) / timestep),
                                                                                     jit_frames)
                            jittered_frames = frame_skip - ((disturb - counter) / timestep)
                            if jittered_frames >= jit_frames:
                                jittered_frames = 0
                                jittering = False
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25

                            else:
                                jittering = True
                                eval_env.model.opt.gravity[0] = jitter_force
                                counter += response_rate
                        elif jit_frames - jittered_frames < frame_skip:  # Jitter force will dispear from now!
                                next_state, reward, done, _ = eval_env.jitter_step_end(
                                    action, jitter_force, jit_frames - jittered_frames, frame_skip - (jit_frames - jittered_frames))
                                jittering = False  # Stop jittering now
                                eval_env.model.opt.gravity[0] = 0
                                counter = 0
                                force += 0.25
                        else:  # Jitter force keeps existing now!
                                next_state, reward, done, _ = eval_env.step(action)
                                jittered_frames += frame_skip
                                counter += response_rate
                                if jittered_frames == jit_frames:
                                    jittering = False
                                    eval_env.model.opt.gravity[0] = 0
                                    counter = 0
                                    force += 0.25

                        avg_reward += reward
                        avg_angle += abs(next_state[1])
                        state = next_state
                        counter = round(counter, 2)
                        if jit_duration:
                            if counter == disturb:
                                forces.append(force)
                                force_times.append(t)
                                jitter_force = force * 9.81 * (2 * (random.random() > 0.5) - 1)
                                eval_env.model.opt.gravity[0] = jitter_force
                                jittering = True
                                jittered_frames = 0

                        t += 1
                        states.append(state)
                states = np.array(states)
                fig = make_subplots(rows=2, cols=1)
                x = [i for i in range(t)]
                for index, f in enumerate(force_times):
                    fig.add_shape(go.layout.Shape(type="line",
                                            x0=f,
                                            y0=-100,
                                            x1=f,
                                            y1=100,
                                            ),row=1,col=1)
                    fig.add_annotation(x=f,
                                       y=0,
                                       text=str(forces[index]), 
                                       showarrow=False,
                                       row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,0], mode='lines', name='pos'), row=1, col=1)
                fig.add_trace(go.Scatter(x=x, y=states[:,1], mode='lines', name='angle'), row=2, col=1)
    #             fig.add_trace(go.Scatter(x=x, y=states[:,2], mode='lines', name='vel'))
    #             fig.add_trace(go.Scatter(x=x, y=states[:,3], mode='lines', name='angular vel'))
                fig.write_html("images2/"+file_name+'.html')

                avg_reward /= 10
                avg_angle /= 10
                df.loc[len(df.index)] = [seed, g_force, response_rate, avg_reward, avg_angle]
                print(avg_reward)

5679.4
4125.7
147.1


In [23]:
fig = go.Figure()

    
for force in range(6):
    rewards = []
    for rt in response_times:
        temp_df = df.loc[(df['response_rate'] == rt) & (df['g_force'] == force)]
        print (rt, force, np.mean(temp_df['reward'])* rt)
        rewards.append( np.mean(temp_df['reward'])* rt)
    
    fig.add_trace(go.Scatter(x=response_times, y=rewards, mode='lines', name=str(force)))
fig.show()

0.01 0 39.7948
0.02 0 45.1284
0.04 0 83.6496
0.08 0 85.15039999999999
0.16 0 66.67519999999999
0.32 0 21.976
0.64 0 8.8064
1.28 0 5.632000000000001
0.01 1 42.38475
0.02 1 46.558800000000005
0.04 1 97.46560000000001
0.08 1 92.38880000000002
0.16 1 57.7024
0.32 1 17.4784
0.64 1 6.656000000000001
1.28 1 5.708799999999999
0.01 2 54.7218
0.02 2 49.704
0.04 2 96.47760000000001
0.08 2 90.93599999999999
0.16 2 73.1808
0.32 2 22.9696
0.64 2 8.601600000000001
1.28 2 6.630400000000001
0.01 3 73.564
0.02 3 53.5508
0.04 3 93.6584
0.08 3 91.11679999999998
0.16 3 79.45280000000001
0.32 3 15.1936
0.64 3 9.728
1.28 3 5.299200000000001
0.01 4 83.37549999999999
0.02 4 61.11359999999999
0.04 4 106.0416
0.08 4 112.97120000000001
0.16 4 89.37920000000003
0.32 4 19.916800000000002
0.64 4 8.8064
1.28 4 6.656000000000001
0.01 5 73.71539999999999
0.02 5 74.63959999999999
0.04 5 110.1592
0.08 5 142.48160000000001
0.16 5 58.34879999999999
0.32 5 21.068800000000003
0.64 5 9.0752
1.28 5 5.5296


In [16]:
df

Unnamed: 0,seed,g_force,response_rate,reward,angle
0,0.0,0.0,0.01,5683.7,88.433887
1,1.0,0.0,0.01,4785.7,47.933353
2,2.0,0.0,0.01,147.1,10.677895
3,3.0,0.0,0.01,4605.1,76.335801
4,4.0,0.0,0.01,4675.8,116.574278
...,...,...,...,...,...
232,0.0,5.0,1.28,4.4,0.958583
233,1.0,5.0,1.28,4.0,0.718849
234,2.0,5.0,1.28,4.4,0.631078
235,3.0,5.0,1.28,4.4,0.651751
