In [1]:
import gym
import numpy as np
import torch
import torchkit.pytorch_utils as ptu
import torchsde
from torch.nn import functional as F
import random as rnd
import copy as cp
# import environments
import envs.pomdp
import pdb
# import recurrent model-free RL (separate architecture)
from policies.models.policy_rnn import ModelFreeOffPolicy_Separate_RNN as Policy_RNN
from policies.models.policy_rnn_shared import ModelFreeOffPolicy_Shared_RNN as Policy_Shared_RNN
from policies.models.policy_mlp import ModelFreeOffPolicy_MLP as Policy_MLP
from tqdm import tqdm
# import the replay buffer
from buffers.seq_replay_buffer_vanilla import SeqReplayBuffer
from buffers.simple_replay_buffer import SimpleReplayBuffer 
from utils import helpers as utl
from typing import Sequence
from read_ini import read_ini
conf =read_ini("C:/Users/alexander.vasilyev/pomdp-baselines-main/configfile.ini")

  logger.warn(
  from collections import OrderedDict, Set
  if not hasattr(tensorboard, "__version__") or LooseVersion(


## Build a POMDP environment: Pendulum-V (only observe the velocity)

In [2]:
cuda_id = 0  # -1 if using cpu
ptu.set_gpu_mode(torch.cuda.is_available() and cuda_id >= 0, cuda_id)

env = gym.make(conf["env_name"])
max_trajectory_len = env._max_episode_steps
act_dim = env.action_space.shape[0]
obs_dim = env.observation_space.shape[0]

shared = False
markov = False

if markov:
    agent = Policy_MLP(
        obs_dim=obs_dim,
        action_dim=act_dim,
        algo_name=conf["algo_name"],
        dqn_layers=[128, 128],
        policy_layers=[128, 128],
        lr=3e-4,
        gamma=0.99,
        tau=5e-3,
    ).to(ptu.device)
    encoder="Nan"
else:
    if shared:
        agent = Policy_Shared_RNN(
            obs_dim=obs_dim,
            action_dim=act_dim,
            encoder=conf["encoder"],
            algo_name=conf["algo_name"],
            action_embedding_size=int(conf["action_embedding_size"]),
            observ_embedding_size=int(conf["observ_embedding_size"]),
            reward_embedding_size=int(conf["reward_embedding_size"]),
            rnn_hidden_size=int(conf["hidden_size"]),
            dqn_layers=[128, 128],
            policy_layers=[128, 128],
            lr=float(conf["lr"]),
            gamma=0.9,
            tau=0.005,
            embed=True,
        ).to(ptu.device)
    else: 
        agent = Policy_RNN(
            obs_dim=obs_dim,
            action_dim=act_dim,
            encoder=conf["encoder"],
            algo_name=conf["algo_name"],
            action_embedding_size=int(conf["action_embedding_size"]),
            observ_embedding_size=int(conf["observ_embedding_size"]),
            reward_embedding_size=int(conf["reward_embedding_size"]),
            rnn_hidden_size=int(conf["hidden_size"]),
            dqn_layers=[128, 128],
            policy_layers=[128, 128],
            lr=float(conf["lr"]),
            gamma=0.9,
            tau=0.005,
            radii=40,
            embed=True,
            activation = conf["activation"],
        ).to(ptu.device)
    
print(agent)
lr=float(conf["lr"])
encoder=conf["encoder"]
num_updates_per_iter = int(conf["num_updates_per_iter"])  # training frequency
sampled_seq_len = int(conf["sampled_seq_len"])  # context length
buffer_size = int(float(conf["buffer_size"]))
batch_size = int(conf["batch_size"])
dropout_rate=float(conf["dropout_rate"])
num_iters = int(conf["num_iters"])
num_init_rollouts_pool = int(conf["num_init_rollouts_pool"])
num_rollouts_per_iter = int(conf["num_rollouts_per_iter"])
total_rollouts = num_init_rollouts_pool + num_iters * num_rollouts_per_iter
n_env_steps_total = max_trajectory_len * total_rollouts
_n_env_steps_total = 0
print("total env episodes", total_rollouts, "total env steps", n_env_steps_total)


ModelFreeOffPolicy_Separate_RNN(
  (critic): Critic_RNN(
    (observ_embedder): FeatureExtractor(
      (fc): Linear(in_features=1, out_features=32, bias=True)
    )
    (action_embedder): FeatureExtractor(
      (fc): Linear(in_features=1, out_features=8, bias=True)
    )
    (reward_embedder): FeatureExtractor(
      (fc): Linear(in_features=1, out_features=8, bias=True)
    )
    (rnn): NeuralCDE(
      (func): CDEFunc(
        (linear0): Linear(in_features=72, out_features=72, bias=True)
        (linear1): Linear(in_features=72, out_features=72, bias=True)
        (linear2): Linear(in_features=72, out_features=3528, bias=True)
      )
      (initial): Linear(in_features=49, out_features=72, bias=True)
      (readout): Linear(in_features=72, out_features=72, bias=True)
    )
    (current_shortcut_embedder): FeatureExtractor(
      (fc): Linear(in_features=2, out_features=48, bias=True)
    )
    (qf1): FlattenMlp(
      (fc0): Linear(in_features=120, out_features=128, bias=True)
   

## Build a recurent model-free RL agent: separate architecture, `lstm` encoder, `oar` policy input space, `td3` RL algorithm (context length set later)

## Define other training parameters such as context length and training frequency

## Define key functions: collect rollouts and policy update

In [3]:
def create_ncde_row(obs, next_obs, prev_action, action, prev_reward, reward, steps,init):
    
    if init:
        obs_row= obs
        rew_row = prev_reward
        act_row = prev_action
    else:
        obs_row=torch.cat((obs, next_obs),0)
        rew_row=torch.cat((prev_reward, reward),0)
        act_row=torch.cat((prev_action, action),0)
 
    if shared: 
        obs_row=agent.observ_embedder(obs_row)
        rew_row=agent.reward_embedder(rew_row)
        act_row=agent.action_embedder(act_row)
    else: 
        obs_row=agent.actor.observ_embedder(obs_row)
        rew_row=agent.actor.reward_embedder(rew_row)
        act_row=agent.actor.action_embedder(act_row)
    
    if init:
        time_tensor=torch.tensor([[steps]]).to(ptu.device)
    else:
        time_tensor=torch.tensor([[steps],[steps+1]]).to(ptu.device)

    ncde_row=torch.cat((time_tensor,act_row,obs_row,rew_row),1)
    ncde_row=ncde_row[None,:]
    
    return ncde_row



@torch.no_grad()
def collect_rollouts(
    num_rollouts, random_actions=False, deterministic=True, train_mode=True
):
    """collect num_rollouts of trajectories in task and save into policy buffer
    :param
        random_actions: whether to use policy to sample actions, or randomly sample action space
        deterministic: deterministic action selection?
        train_mode: whether to train (stored to buffer) or test
    """
    if not train_mode:
        assert random_actions == False and deterministic == True

    total_steps = 0
    total_rewards = 0.0
    trewards =[]
    for idx in range(num_rollouts):
        steps = 0
        rewards = 0.0
        energy = 0.0
        print(env.reset())
        obs = ptu.from_numpy(env.reset())
        obs = obs.reshape(1, obs.shape[-1])
        done_rollout = False
        init=True
        # get hidden state at timestep=0, None for mlp
        
        if not markov:
            action, reward, internal_state = agent.get_initial_info()

            if encoder == "ncde":
                internal_state= None
                ncde_row= create_ncde_row(obs, obs, action, action, reward, reward, steps,init)
                prev_action= action.clone()
                prev_reward= reward.clone()
                next_obs= obs.clone()
        
        
        if train_mode:
            # temporary storage
            obs_list, act_list, rew_list, next_obs_list, term_list = (
                [],
                [],
                [],
                [],
                [],
            )
                           

        while not done_rollout:
            if markov: 
                action = agent.act(obs=obs, deterministic=deterministic)[0]
            else:
                if encoder == "ncde":
                    (action,_,_,_), internal_state= agent.ncde_act(ncde_row=ncde_row, prev_internal_state=internal_state, obs=obs,  deterministic=deterministic)
                else:
                    (action, _, _, _), internal_state = agent.act(
                        prev_internal_state=internal_state,
                        prev_action=action,
                        reward=reward,
                        obs=obs,
                        deterministic=deterministic,
                    )
            # observe reward and next obs (B=1, dim)
            #pdb.set_trace()
        
            #print(torch.norm(internal_state))
            next_obs, reward, done, info = utl.env_step(env, action.squeeze(dim=0))
            done_rollout = False if ptu.get_numpy(done[0][0]) == 0.0 else True
            init=False
            
            if not markov:
                if encoder == "ncde":
   
                    ncde_row= create_ncde_row(obs, next_obs, prev_action, action, prev_reward, reward, steps,init)
            
            #switch on/off dropouts
            #drop_trigger=rnd.uniform(0,1)
            #if drop_trigger<dropout_rate:
            #    next_obs=cp.deepcopy(obs)
            # update statistics
           
            rewards += reward.item()
            energy += action*action
           
            # early stopping env: such as rmdp, pomdp, generalize tasks. term ignores timeout
            term = (
                False
                if "TimeLimit.truncated" in info or steps >= max_trajectory_len
                else done_rollout
            )

            if train_mode:
                # append tensors to temporary storage
                obs_list.append(obs)  # (1, dim)
                act_list.append(action)  # (1, dim)
                rew_list.append(reward)  # (1, dim)
                term_list.append(term)  # bool
                next_obs_list.append(next_obs)  # (1, dim)
            steps += 1
            # set: obs <- next_obs
            obs = next_obs.clone()
            prev_reward= reward.clone()
            prev_action= action.clone()
        if train_mode:
            # add collected sequence to buffer
            policy_storage.add_episode(
                observations=ptu.get_numpy(torch.cat(obs_list, dim=0)),  # (L, dim)
                actions=ptu.get_numpy(torch.cat(act_list, dim=0)),  # (L, dim)
                rewards=ptu.get_numpy(torch.cat(rew_list, dim=0)),  # (L, dim)
                terminals=np.array(term_list).reshape(-1, 1),  # (L, 1)
                next_observations=ptu.get_numpy(
                    torch.cat(next_obs_list, dim=0)
                ),  # (L, dim)
            )
        print(
            "Mode:",
            "Train" if train_mode else "Test",
            "env_steps",
            steps,
            "total rewards",
            rewards,
            "total energy",
            energy,
        )
        total_steps += steps
        total_rewards += rewards
        trewards.append(rewards)
    if train_mode:
        return total_steps
    else:
        return total_rewards / num_rollouts, np.std(trewards)


def update(num_updates, factor):
    rl_losses_agg = {}
    # print(num_updates)
    for update in tqdm(range(num_updates), leave=True):
        # sample random RL batch: in transitions
        batch = ptu.np_to_pytorch_batch(policy_storage.random_episodes(batch_size))
        # RL update
        
        rl_losses = agent.update(batch, factor)

        for k, v in rl_losses.items():
            if update == 0:  # first iterate - create list
                rl_losses_agg[k] = [v]
            else:  # append values
                rl_losses_agg[k].append(v)
    # statistics
    for k in rl_losses_agg:
        rl_losses_agg[k] = np.mean(rl_losses_agg[k])
    return rl_losses_agg

## Train and Evaluate the agent: only costs < 20 min

In [None]:
policy_storage = SeqReplayBuffer(
    max_replay_buffer_size=buffer_size,
    observation_dim=obs_dim,
    action_dim=act_dim,
    sampled_seq_len=sampled_seq_len,
    sample_weight_baseline=0.0,
)

env_steps = collect_rollouts(
    num_rollouts=num_init_rollouts_pool, random_actions=False, train_mode=True
)
_n_env_steps_total += env_steps

# evaluation parameters
last_eval_num_iters = 10
log_interval = 5
eval_num_rollouts = 10
learning_curve = {
    "x": [],
    "y": [],
    "z": [],
}
epoch=0
lambda_pat = 0.65

while _n_env_steps_total < n_env_steps_total:

    env_steps = collect_rollouts(num_rollouts=num_rollouts_per_iter, train_mode=True)
    _n_env_steps_total += env_steps

    #train_stats = update(int(num_updates_per_iter * env_steps))
    factor= lambda_pat **(epoch )
    #train_stats = update(int(num_updates_per_iter * env_steps))
    train_stats = update(25, lr)
    
    epoch += 1
    current_num_iters = _n_env_steps_total // (
        num_rollouts_per_iter * max_trajectory_len
    )
    if (
        current_num_iters != last_eval_num_iters
        and current_num_iters % log_interval == 0
    ):
        last_eval_num_iters = current_num_iters
        average_returns, std_returns = collect_rollouts(
            num_rollouts=eval_num_rollouts,
            train_mode=False,
            random_actions=False,
            deterministic=True,
        )
        learning_curve["x"].append(_n_env_steps_total)
        learning_curve["y"].append(average_returns)
        learning_curve["z"].append(std_returns)
        print(_n_env_steps_total, average_returns)

buffer RAM usage: 0.02 GB
[0.39930123]
angle
tensor(0.1133)
angle
tensor(0.1140)
angle
tensor(0.1146)
angle
tensor(0.1147)
angle
tensor(0.1266)
angle
tensor(0.1272)
angle
tensor(0.1277)
angle
tensor(0.1280)
angle
tensor(0.1258)
angle
tensor(0.1268)
angle
tensor(0.1275)
angle
tensor(0.1278)
angle
tensor(0.1339)
angle
tensor(0.1341)
angle
tensor(0.1340)
angle
tensor(0.1334)
angle
tensor(0.1438)
angle
tensor(0.1427)
angle
tensor(0.1407)
angle
tensor(0.1380)
angle
tensor(0.1525)
angle
tensor(0.1491)
angle
tensor(0.1456)
angle
tensor(0.1414)
angle
tensor(0.1563)
angle
tensor(0.1510)
angle
tensor(0.1460)
angle
tensor(0.1401)
angle
tensor(0.1536)
angle
tensor(0.1470)
angle
tensor(0.1410)
angle
tensor(0.1341)
angle
tensor(0.1472)
angle
tensor(0.1398)
angle
tensor(0.1341)
angle
tensor(0.1269)
angle
tensor(0.1380)
angle
tensor(0.1314)
angle
tensor(0.1264)
angle
tensor(0.1209)
angle
tensor(0.1297)
angle
tensor(0.1247)
angle
tensor(0.1201)
angle
tensor(0.1159)
angle
tensor(0.1229)
angle
tensor(0.1



tensor(0.1363)
angle
tensor(0.1347)
angle
tensor(0.1333)
angle
tensor(0.1309)
angle
tensor(0.1191)
angle
tensor(0.1166)
angle
tensor(0.1144)
angle
tensor(0.1122)
angle
tensor(0.1027)
angle
tensor(0.1005)
angle
tensor(0.0987)
angle
tensor(0.0968)
angle
tensor(0.0961)
angle
tensor(0.0946)
angle
tensor(0.0935)
angle
tensor(0.0921)
angle
tensor(0.0930)
angle
tensor(0.0918)
angle
tensor(0.0906)
angle
tensor(0.0893)
angle
tensor(0.0802)
angle
tensor(0.0788)
angle
tensor(0.0774)
angle
tensor(0.0758)
angle
tensor(0.0801)
angle
tensor(0.0789)
angle
tensor(0.0777)
angle
tensor(0.0763)
angle
tensor(0.0924)
angle
tensor(0.0907)
angle
tensor(0.0891)
angle
tensor(0.0872)
angle
tensor(0.0855)
angle
tensor(0.0835)
angle
tensor(0.0817)
angle
tensor(0.0796)
angle
tensor(0.0857)
angle
tensor(0.0835)
angle
tensor(0.0815)
angle
tensor(0.0793)
angle
tensor(0.0890)
angle
tensor(0.0868)
angle
tensor(0.0846)
angle
tensor(0.0825)
angle
tensor(0.0967)
angle
tensor(0.0942)
angle
tensor(0.0916)
angle
tensor(0.0889

angle
tensor(0.0952)
angle
tensor(0.0933)
angle
tensor(0.0913)
angle
tensor(0.0890)
angle
tensor(0.0832)
angle
tensor(0.0814)
angle
tensor(0.0797)
angle
tensor(0.0779)
angle
tensor(0.0718)
angle
tensor(0.0706)
angle
tensor(0.0693)
angle
tensor(0.0680)
angle
tensor(0.0807)
angle
tensor(0.0790)
angle
tensor(0.0771)
angle
tensor(0.0752)
angle
tensor(0.0958)
angle
tensor(0.0932)
angle
tensor(0.0908)
angle
tensor(0.0881)
angle
tensor(0.0984)
angle
tensor(0.0963)
angle
tensor(0.0942)
angle
tensor(0.0920)
angle
tensor(0.1054)
angle
tensor(0.1029)
angle
tensor(0.1003)
angle
tensor(0.0976)
angle
tensor(0.1114)
angle
tensor(0.1079)
angle
tensor(0.1047)
angle
tensor(0.1010)
angle
tensor(0.1126)
angle
tensor(0.1084)
angle
tensor(0.1047)
angle
tensor(0.1005)
angle
tensor(0.1132)
angle
tensor(0.1085)
angle
tensor(0.1043)
angle
tensor(0.0996)
angle
tensor(0.1125)
angle
tensor(0.1076)
angle
tensor(0.1033)
angle
tensor(0.0987)
angle
tensor(0.1120)
angle
tensor(0.1075)
angle
tensor(0.1038)
angle
tensor(

tensor(0.0563)
angle
tensor(0.0549)
angle
tensor(0.0642)
angle
tensor(0.0628)
angle
tensor(0.0614)
angle
tensor(0.0599)
angle
tensor(0.0748)
angle
tensor(0.0727)
angle
tensor(0.0708)
angle
tensor(0.0685)
angle
tensor(0.0915)
angle
tensor(0.0886)
angle
tensor(0.0860)
angle
tensor(0.0830)
angle
tensor(0.0897)
angle
tensor(0.0871)
angle
tensor(0.0846)
angle
tensor(0.0819)
angle
tensor(0.0926)
angle
tensor(0.0895)
angle
tensor(0.0865)
angle
tensor(0.0833)
angle
tensor(0.0992)
angle
tensor(0.0954)
angle
tensor(0.0919)
angle
tensor(0.0880)
angle
tensor(0.1002)
angle
tensor(0.0960)
angle
tensor(0.0923)
angle
tensor(0.0881)
angle
tensor(0.1024)
angle
tensor(0.0979)
angle
tensor(0.0937)
angle
tensor(0.0890)
angle
tensor(0.1034)
angle
tensor(0.0988)
angle
tensor(0.0951)
angle
tensor(0.0911)
angle
tensor(0.1065)
angle
tensor(0.1024)
angle
tensor(0.0988)
angle
tensor(0.0952)
angle
tensor(0.1115)
angle
tensor(0.1076)
angle
tensor(0.1041)
angle
tensor(0.1007)
angle
tensor(0.1182)
angle
tensor(0.1143

angle
tensor(0.1599)
angle
tensor(0.1628)
angle
tensor(0.1664)
angle
tensor(0.1695)
angle
tensor(0.1715)
angle
tensor(0.1740)
angle
tensor(0.1761)
angle
tensor(0.1768)
angle
tensor(0.1698)
angle
tensor(0.1695)
angle
tensor(0.1688)
angle
tensor(0.1672)
Mode: Train env_steps 200 total rewards -965.5735505819321 total energy tensor([[0.0006]])
[-0.30620307]
angle
tensor(0.1322)
angle
tensor(0.1335)
angle
tensor(0.1347)
angle
tensor(0.1351)
angle
tensor(0.1413)
angle
tensor(0.1426)
angle
tensor(0.1439)
angle
tensor(0.1446)
angle
tensor(0.1396)
angle
tensor(0.1407)
angle
tensor(0.1417)
angle
tensor(0.1419)
angle
tensor(0.1504)
angle
tensor(0.1505)
angle
tensor(0.1497)
angle
tensor(0.1481)
angle
tensor(0.1593)
angle
tensor(0.1564)
angle
tensor(0.1531)
angle
tensor(0.1492)
angle
tensor(0.1613)
angle
tensor(0.1564)
angle
tensor(0.1515)
angle
tensor(0.1459)
angle
tensor(0.1565)
angle
tensor(0.1504)
angle
tensor(0.1449)
angle
tensor(0.1384)
angle
tensor(0.1480)
angle
tensor(0.1412)
angle
tensor(

tensor(0.1064)
angle
tensor(0.1016)
angle
tensor(0.0965)
angle
tensor(0.1102)
angle
tensor(0.1058)
angle
tensor(0.1021)
angle
tensor(0.0981)
angle
tensor(0.0498)
angle
tensor(0.0490)
angle
tensor(0.0482)
angle
tensor(0.0473)
angle
tensor(0.1310)
angle
tensor(0.1323)
angle
tensor(0.1341)
angle
tensor(0.1355)
angle
tensor(0.1408)
angle
tensor(0.1430)
angle
tensor(0.1460)
angle
tensor(0.1484)
angle
tensor(0.1499)
angle
tensor(0.1525)
angle
tensor(0.1552)
angle
tensor(0.1568)
angle
tensor(0.1522)
angle
tensor(0.1537)
angle
tensor(0.1550)
angle
tensor(0.1553)
angle
tensor(0.1462)
angle
tensor(0.1457)
angle
tensor(0.1443)
angle
tensor(0.1428)
angle
tensor(0.1317)
angle
tensor(0.1300)
angle
tensor(0.1280)
angle
tensor(0.1256)
angle
tensor(0.1157)
angle
tensor(0.1134)
angle
tensor(0.1111)
angle
tensor(0.1085)
angle
tensor(0.1108)
angle
tensor(0.1082)
angle
tensor(0.1058)
angle
tensor(0.1027)
angle
tensor(0.0945)
angle
tensor(0.0918)
angle
tensor(0.0893)
angle
tensor(0.0864)
angle
tensor(0.0818

angle
tensor(0.1079)
angle
tensor(0.1052)
angle
tensor(0.1026)
angle
tensor(0.0998)
angle
tensor(0.1185)
angle
tensor(0.1152)
angle
tensor(0.1122)
angle
tensor(0.1089)
angle
tensor(0.1032)
angle
tensor(0.1002)
angle
tensor(0.0972)
angle
tensor(0.0940)
angle
tensor(0.0998)
angle
tensor(0.0964)
angle
tensor(0.0932)
angle
tensor(0.0897)
angle
tensor(0.0976)
angle
tensor(0.0935)
angle
tensor(0.0899)
angle
tensor(0.0858)
angle
tensor(0.0947)
angle
tensor(0.0907)
angle
tensor(0.0872)
angle
tensor(0.0835)
angle
tensor(0.0920)
angle
tensor(0.0878)
angle
tensor(0.0841)
angle
tensor(0.0802)
angle
tensor(0.0889)
angle
tensor(0.0854)
angle
tensor(0.0826)
angle
tensor(0.0796)
angle
tensor(0.0877)
angle
tensor(0.0853)
angle
tensor(0.0835)
angle
tensor(0.0816)
angle
tensor(0.0584)
angle
tensor(0.0577)
angle
tensor(0.0570)
angle
tensor(0.0564)
angle
tensor(0.1188)
angle
tensor(0.1195)
angle
tensor(0.1200)
angle
tensor(0.1202)
angle
tensor(0.1254)
angle
tensor(0.1256)
angle
tensor(0.1255)
angle
tensor(

angle
tensor(0.1353)
angle
tensor(0.1351)
angle
tensor(0.1345)
angle
tensor(0.1333)
angle
tensor(0.1291)
angle
tensor(0.1276)
angle
tensor(0.1261)
angle
tensor(0.1244)
angle
tensor(0.1198)
angle
tensor(0.1179)
angle
tensor(0.1158)
angle
tensor(0.1134)
angle
tensor(0.1240)
angle
tensor(0.1208)
angle
tensor(0.1178)
angle
tensor(0.1144)
angle
tensor(0.0953)
angle
tensor(0.0930)
angle
tensor(0.0907)
angle
tensor(0.0884)
angle
tensor(0.1040)
angle
tensor(0.1017)
angle
tensor(0.0995)
angle
tensor(0.0973)
angle
tensor(0.1276)
angle
tensor(0.1246)
angle
tensor(0.1216)
angle
tensor(0.1177)
angle
tensor(0.1250)
angle
tensor(0.1217)
angle
tensor(0.1183)
angle
tensor(0.1144)
angle
tensor(0.1249)
angle
tensor(0.1208)
angle
tensor(0.1168)
angle
tensor(0.1121)
angle
tensor(0.1173)
angle
tensor(0.1128)
angle
tensor(0.1086)
angle
tensor(0.1037)
angle
tensor(0.1085)
angle
tensor(0.1041)
angle
tensor(0.1004)
angle
tensor(0.0963)
angle
tensor(0.1021)
angle
tensor(0.0985)
angle
tensor(0.0953)
angle
tensor(

angle
tensor(0.0905)
angle
tensor(0.0901)
angle
tensor(0.0896)
angle
tensor(0.0940)
angle
tensor(0.0934)
angle
tensor(0.0929)
angle
tensor(0.0923)
angle
tensor(0.0959)
angle
tensor(0.0953)
angle
tensor(0.0947)
angle
tensor(0.0940)
angle
tensor(0.0961)
angle
tensor(0.0955)
angle
tensor(0.0950)
angle
tensor(0.0942)
angle
tensor(0.0963)
angle
tensor(0.0954)
angle
tensor(0.0947)
angle
tensor(0.0938)
angle
tensor(0.1080)
angle
tensor(0.1064)
angle
tensor(0.1048)
angle
tensor(0.1028)
angle
tensor(0.0936)
angle
tensor(0.0919)
angle
tensor(0.0903)
angle
tensor(0.0884)
angle
tensor(0.0881)
angle
tensor(0.0869)
angle
tensor(0.0857)
angle
tensor(0.0842)
angle
tensor(0.1092)
angle
tensor(0.1066)
angle
tensor(0.1043)
angle
tensor(0.1018)
angle
tensor(0.1069)
angle
tensor(0.1042)
angle
tensor(0.1014)
angle
tensor(0.0982)
angle
tensor(0.0951)
angle
tensor(0.0925)
angle
tensor(0.0901)
angle
tensor(0.0875)
angle
tensor(0.0897)
angle
tensor(0.0872)
angle
tensor(0.0848)
angle
tensor(0.0821)
angle
tensor(

angle
tensor(0.0809)
angle
tensor(0.0787)
angle
tensor(0.0764)
angle
tensor(0.0799)
angle
tensor(0.0775)
angle
tensor(0.0752)
angle
tensor(0.0726)
angle
tensor(0.0769)
angle
tensor(0.0743)
angle
tensor(0.0719)
angle
tensor(0.0691)
angle
tensor(0.0744)
angle
tensor(0.0720)
angle
tensor(0.0700)
angle
tensor(0.0678)
angle
tensor(0.0744)
angle
tensor(0.0726)
angle
tensor(0.0711)
angle
tensor(0.0692)
angle
tensor(0.0401)
angle
tensor(0.0391)
angle
tensor(0.0381)
angle
tensor(0.0370)
angle
tensor(0.1034)
angle
tensor(0.1027)
angle
tensor(0.1020)
angle
tensor(0.1009)
angle
tensor(0.1054)
angle
tensor(0.1050)
angle
tensor(0.1045)
angle
tensor(0.1039)
angle
tensor(0.1074)
angle
tensor(0.1069)
angle
tensor(0.1063)
angle
tensor(0.1054)
angle
tensor(0.1070)
angle
tensor(0.1062)
angle
tensor(0.1054)
angle
tensor(0.1043)
angle
tensor(0.1039)
angle
tensor(0.1027)
angle
tensor(0.1012)
angle
tensor(0.0996)
angle
tensor(0.1066)
angle
tensor(0.1051)
angle
tensor(0.1037)
angle
tensor(0.1022)
angle
tensor(

angle
tensor(0.0972)
angle
tensor(0.0929)
angle
tensor(0.0892)
angle
tensor(0.0853)
angle
tensor(0.1045)
angle
tensor(0.1000)
angle
tensor(0.0960)
angle
tensor(0.0918)
angle
tensor(0.1119)
angle
tensor(0.1070)
angle
tensor(0.1027)
angle
tensor(0.0982)
angle
tensor(0.1179)
angle
tensor(0.1129)
angle
tensor(0.1086)
angle
tensor(0.1037)
angle
tensor(0.1223)
angle
tensor(0.1178)
angle
tensor(0.1142)
angle
tensor(0.1096)
angle
tensor(0.1270)
angle
tensor(0.1236)
angle
tensor(0.1208)
angle
tensor(0.1171)
angle
tensor(0.1323)
angle
tensor(0.1340)
angle
tensor(0.1353)
angle
tensor(0.1365)
angle
tensor(0.1600)
angle
tensor(0.1630)
angle
tensor(0.1660)
angle
tensor(0.1685)
angle
tensor(0.1717)
angle
tensor(0.1748)
angle
tensor(0.1782)
angle
tensor(0.1807)
angle
tensor(0.1753)
angle
tensor(0.1781)
angle
tensor(0.1810)
angle
tensor(0.1829)
angle
tensor(0.1677)
angle
tensor(0.1689)
angle
tensor(0.1702)
angle
tensor(0.1705)
angle
tensor(0.1492)
angle
tensor(0.1489)
angle
tensor(0.1480)
angle
tensor(

angle
tensor(0.1562)
angle
tensor(0.1573)
angle
tensor(0.1588)
angle
tensor(0.1595)
angle
tensor(0.1461)
angle
tensor(0.1463)
angle
tensor(0.1462)
angle
tensor(0.1459)
angle
tensor(0.1291)
angle
tensor(0.1288)
angle
tensor(0.1284)
angle
tensor(0.1279)
angle
tensor(0.1110)
angle
tensor(0.1103)
angle
tensor(0.1094)
angle
tensor(0.1083)
angle
tensor(0.0932)
angle
tensor(0.0922)
angle
tensor(0.0912)
angle
tensor(0.0901)
angle
tensor(0.0780)
angle
tensor(0.0770)
angle
tensor(0.0758)
angle
tensor(0.0743)
angle
tensor(0.0661)
angle
tensor(0.0647)
angle
tensor(0.0635)
angle
tensor(0.0622)
angle
tensor(0.0572)
angle
tensor(0.0562)
angle
tensor(0.0554)
angle
tensor(0.0546)
angle
tensor(0.0494)
angle
tensor(0.0488)
angle
tensor(0.0483)
angle
tensor(0.0477)
angle
tensor(0.0498)
angle
tensor(0.0492)
angle
tensor(0.0486)
angle
tensor(0.0480)
angle
tensor(0.0437)
angle
tensor(0.0431)
angle
tensor(0.0426)
angle
tensor(0.0420)
angle
tensor(0.0396)
angle
tensor(0.0390)
angle
tensor(0.0384)
angle
tensor(

angle
tensor(0.1205)
angle
tensor(0.1162)
angle
tensor(0.1129)
angle
tensor(0.1088)
angle
tensor(0.1263)
angle
tensor(0.1236)
angle
tensor(0.1211)
angle
tensor(0.1177)
angle
tensor(0.1188)
angle
tensor(0.1196)
angle
tensor(0.1203)
angle
tensor(0.1209)
angle
tensor(0.1572)
angle
tensor(0.1598)
angle
tensor(0.1626)
angle
tensor(0.1650)
angle
tensor(0.1684)
angle
tensor(0.1716)
angle
tensor(0.1751)
angle
tensor(0.1776)
angle
tensor(0.1727)
angle
tensor(0.1761)
angle
tensor(0.1795)
angle
tensor(0.1815)
angle
tensor(0.1670)
angle
tensor(0.1685)
angle
tensor(0.1700)
angle
tensor(0.1705)
angle
tensor(0.1494)
angle
tensor(0.1495)
angle
tensor(0.1491)
angle
tensor(0.1479)
angle
tensor(0.1260)
angle
tensor(0.1245)
angle
tensor(0.1229)
angle
tensor(0.1208)
angle
tensor(0.1015)
angle
tensor(0.0998)
angle
tensor(0.0981)
angle
tensor(0.0962)
angle
tensor(0.0804)
angle
tensor(0.0790)
angle
tensor(0.0777)
angle
tensor(0.0763)
angle
tensor(0.0656)
angle
tensor(0.0645)
angle
tensor(0.0633)
angle
tensor(

angle
tensor(0.0884)
angle
tensor(0.0858)
angle
tensor(0.0834)
angle
tensor(0.0807)
angle
tensor(0.0702)
angle
tensor(0.0685)
angle
tensor(0.0668)
angle
tensor(0.0649)
angle
tensor(0.0770)
angle
tensor(0.0752)
angle
tensor(0.0734)
angle
tensor(0.0715)
angle
tensor(0.0895)
angle
tensor(0.0868)
angle
tensor(0.0843)
angle
tensor(0.0816)
angle
tensor(0.0985)
angle
tensor(0.0956)
angle
tensor(0.0927)
angle
tensor(0.0895)
angle
tensor(0.0999)
angle
tensor(0.0967)
angle
tensor(0.0938)
angle
tensor(0.0905)
angle
tensor(0.1065)
angle
tensor(0.1028)
angle
tensor(0.0992)
angle
tensor(0.0952)
angle
tensor(0.1065)
angle
tensor(0.1022)
angle
tensor(0.0983)
angle
tensor(0.0938)
angle
tensor(0.1069)
angle
tensor(0.1021)
angle
tensor(0.0979)
angle
tensor(0.0930)
angle
tensor(0.1061)
angle
tensor(0.1012)
angle
tensor(0.0970)
angle
tensor(0.0924)
angle
tensor(0.1065)
angle
tensor(0.1021)
angle
tensor(0.0983)
angle
tensor(0.0941)
angle
tensor(0.1096)
angle
tensor(0.1055)
angle
tensor(0.1021)
angle
tensor(

angle
tensor(0.0940)
angle
tensor(0.0922)
angle
tensor(0.0905)
angle
tensor(0.0927)
angle
tensor(0.0916)
angle
tensor(0.0905)
angle
tensor(0.0893)
angle
tensor(0.1114)
angle
tensor(0.1093)
angle
tensor(0.1071)
angle
tensor(0.1047)
angle
tensor(0.1118)
angle
tensor(0.1092)
angle
tensor(0.1066)
angle
tensor(0.1038)
angle
tensor(0.1005)
angle
tensor(0.0981)
angle
tensor(0.0958)
angle
tensor(0.0934)
angle
tensor(0.0929)
angle
tensor(0.0907)
angle
tensor(0.0886)
angle
tensor(0.0865)
angle
tensor(0.0836)
angle
tensor(0.0821)
angle
tensor(0.0804)
angle
tensor(0.0787)
angle
tensor(0.0780)
angle
tensor(0.0766)
angle
tensor(0.0753)
angle
tensor(0.0738)
angle
tensor(0.0766)
angle
tensor(0.0752)
angle
tensor(0.0739)
angle
tensor(0.0725)
angle
tensor(0.0786)
angle
tensor(0.0772)
angle
tensor(0.0758)
angle
tensor(0.0743)
angle
tensor(0.0507)
angle
tensor(0.0499)
angle
tensor(0.0491)
angle
tensor(0.0484)
angle
tensor(0.0840)
angle
tensor(0.0840)
angle
tensor(0.0841)
angle
tensor(0.0840)
angle
tensor(

angle
tensor(0.0919)
angle
tensor(0.1041)
angle
tensor(0.1018)
angle
tensor(0.0998)
angle
tensor(0.0975)
angle
tensor(0.0829)
angle
tensor(0.0813)
angle
tensor(0.0798)
angle
tensor(0.0781)
angle
tensor(0.0820)
angle
tensor(0.0804)
angle
tensor(0.0789)
angle
tensor(0.0773)
angle
tensor(0.0953)
angle
tensor(0.0932)
angle
tensor(0.0913)
angle
tensor(0.0893)
angle
tensor(0.1009)
angle
tensor(0.0985)
angle
tensor(0.0962)
angle
tensor(0.0936)
angle
tensor(0.0926)
angle
tensor(0.0903)
angle
tensor(0.0880)
angle
tensor(0.0854)
angle
tensor(0.0886)
angle
tensor(0.0859)
angle
tensor(0.0833)
angle
tensor(0.0804)
angle
tensor(0.0803)
angle
tensor(0.0777)
angle
tensor(0.0754)
angle
tensor(0.0727)
angle
tensor(0.0733)
angle
tensor(0.0710)
angle
tensor(0.0688)
angle
tensor(0.0664)
angle
tensor(0.0704)
angle
tensor(0.0683)
angle
tensor(0.0664)
angle
tensor(0.0644)
angle
tensor(0.0726)
angle
tensor(0.0710)
angle
tensor(0.0694)
angle
tensor(0.0675)
angle
tensor(0.0390)
angle
tensor(0.0383)
angle
tensor(

angle
tensor(0.1226)
angle
tensor(0.1220)
angle
tensor(0.1211)
angle
tensor(0.1195)
angle
tensor(0.1134)
angle
tensor(0.1116)
angle
tensor(0.1097)
angle
tensor(0.1076)
angle
tensor(0.1134)
angle
tensor(0.1111)
angle
tensor(0.1087)
angle
tensor(0.1058)
angle
tensor(0.0950)
angle
tensor(0.0926)
angle
tensor(0.0904)
angle
tensor(0.0879)
angle
tensor(0.0898)
angle
tensor(0.0880)
angle
tensor(0.0862)
angle
tensor(0.0842)
angle
tensor(0.1033)
angle
tensor(0.1014)
angle
tensor(0.0994)
angle
tensor(0.0973)
angle
tensor(0.0963)
angle
tensor(0.0937)
angle
tensor(0.0912)
angle
tensor(0.0883)
angle
tensor(0.0942)
angle
tensor(0.0912)
angle
tensor(0.0882)
angle
tensor(0.0849)
angle
tensor(0.0955)
angle
tensor(0.0919)
angle
tensor(0.0885)
angle
tensor(0.0847)
angle
tensor(0.0951)
angle
tensor(0.0912)
angle
tensor(0.0877)
angle
tensor(0.0840)
angle
tensor(0.0950)
angle
tensor(0.0908)
angle
tensor(0.0870)
angle
tensor(0.0828)
angle
tensor(0.0945)
angle
tensor(0.0902)
angle
tensor(0.0866)
angle
tensor(

tensor(0.1227)
angle
tensor(0.1194)
angle
tensor(0.1159)
angle
tensor(0.1169)
angle
tensor(0.1196)
angle
tensor(0.1226)
angle
tensor(0.1251)
angle
tensor(0.1390)
angle
tensor(0.1427)
angle
tensor(0.1463)
angle
tensor(0.1492)
angle
tensor(0.1579)
angle
tensor(0.1612)
angle
tensor(0.1649)
angle
tensor(0.1669)
angle
tensor(0.1702)
angle
tensor(0.1722)
angle
tensor(0.1736)
angle
tensor(0.1738)
angle
tensor(0.1699)
angle
tensor(0.1694)
angle
tensor(0.1681)
angle
tensor(0.1660)
angle
tensor(0.1575)
angle
tensor(0.1556)
angle
tensor(0.1538)
angle
tensor(0.1515)
angle
tensor(0.1416)
angle
tensor(0.1391)
angle
tensor(0.1367)
angle
tensor(0.1341)
angle
tensor(0.1224)
angle
tensor(0.1202)
angle
tensor(0.1180)
angle
tensor(0.1155)
angle
tensor(0.1098)
angle
tensor(0.1079)
angle
tensor(0.1063)
angle
tensor(0.1045)
angle
tensor(0.1023)
angle
tensor(0.1011)
angle
tensor(0.1000)
angle
tensor(0.0987)
angle
tensor(0.0870)
angle
tensor(0.0854)
angle
tensor(0.0839)
angle
tensor(0.0822)
angle
tensor(0.0954

  0%|          | 0/25 [00:00<?, ?it/s]

angle
tensor(0.1953)
angle
tensor(0.1927)
angle
tensor(0.1904)
angle
tensor(0.1876)
angle
tensor(0.1171)
angle
tensor(0.1169)
angle
tensor(0.1165)
angle
tensor(0.1158)
angle
tensor(0.1166)
angle
tensor(0.1161)
angle
tensor(0.1155)
angle
tensor(0.1145)
angle
tensor(0.1211)
angle
tensor(0.1200)
angle
tensor(0.1187)
angle
tensor(0.1172)
angle
tensor(0.1196)
angle
tensor(0.1178)
angle
tensor(0.1157)
angle
tensor(0.1135)
angle
tensor(0.1173)
angle
tensor(0.1149)
angle
tensor(0.1124)
angle
tensor(0.1099)
angle
tensor(0.1172)
angle
tensor(0.1144)
angle
tensor(0.1117)
angle
tensor(0.1089)
angle
tensor(0.1163)
angle
tensor(0.1133)
angle
tensor(0.1104)
angle
tensor(0.1074)
angle
tensor(0.1127)
angle
tensor(0.1099)
angle
tensor(0.1076)
angle
tensor(0.1051)
angle
tensor(0.1205)
angle
tensor(0.1181)
angle
tensor(0.1159)
angle
tensor(0.1135)
angle
tensor(0.1197)
angle
tensor(0.1174)
angle
tensor(0.1153)
angle
tensor(0.1131)
angle
tensor(0.1204)
angle
tensor(0.1187)
angle
tensor(0.1169)
angle
tensor(

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if v.dtype == np.bool:


angle
tensor(0.0906)
angle
tensor(0.0888)
angle
tensor(0.0868)
angle
tensor(0.0895)
angle
tensor(0.0873)
angle
tensor(0.0853)
angle
tensor(0.0831)
angle
tensor(0.0835)
angle
tensor(0.0815)
angle
tensor(0.0797)
angle
tensor(0.0778)
angle
tensor(0.0845)
angle
tensor(0.0827)
angle
tensor(0.0811)
angle
tensor(0.0794)
angle
tensor(0.0812)
angle
tensor(0.0797)
angle
tensor(0.0783)
angle
tensor(0.0767)
angle
tensor(0.0989)
angle
tensor(0.0973)
angle
tensor(0.0957)
angle
tensor(0.0939)
angle
tensor(0.1023)
angle
tensor(0.1003)
angle
tensor(0.0983)
angle
tensor(0.0962)
angle
tensor(0.1019)
angle
tensor(0.0998)
angle
tensor(0.0978)
angle
tensor(0.0956)
angle
tensor(0.1014)
angle
tensor(0.0993)
angle
tensor(0.0974)
angle
tensor(0.0954)
angle
tensor(0.0997)
angle
tensor(0.0979)
angle
tensor(0.0963)
angle
tensor(0.0945)
angle
tensor(0.0975)
angle
tensor(0.0959)
angle
tensor(0.0945)
angle
tensor(0.0929)
angle
tensor(0.1127)
angle
tensor(0.1105)
angle
tensor(0.1084)
angle
tensor(0.1062)
angle
tensor(

## Draw the learning curve

In [None]:
import matplotlib.pyplot as plt
print(learning_curve)
plt.plot(learning_curve["x"], learning_curve["y"])
plt.fill_between(np.array(learning_curve["x"]), np.array(learning_curve["y"])-np.array(learning_curve["z"]), np.array(learning_curve["y"])+np.array(learning_curve["z"]))
plt.xlabel("env steps")
plt.ylabel("return")
plt.show()

In [None]:
leaning_curve_ncde_64_rk4 = learning_curve

In [None]:
learning_curve


In [None]:
timess=torch.linspace(0, 65-1, 65)

In [None]:
file1 = open('config.txt', 'w')
file1.write(str(conf))

file1.close()
file2 = open('results.txt', 'w')
file2.write(str(learning_curve))
file2.close()