In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
from numpy.linalg import inv
from numpy import cross, matmul, dot
from numpy import linalg as LA
#import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

import os
import gym
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

from keras.utils.vis_utils import plot_model

In [57]:
class quadsim(Env):
    def __init__(self):
        #self.action_space = Box(low=np.array([[300],[300],[300],[300]],dtype=np.float32), high=np.array([[700],[700],[700],[700]],dtype=np.float32))
        self.action_space = Box(low=np.array([0,0,0,0],dtype=np.float32), high=np.array([1,1,1,1],dtype=np.float32))
        self.observation_space = Box(low=np.array([-20.,-20.,-20.,-10.,-10.,-10.,-10.,-10.,-10.,-10.,-10.,-10.0], dtype=np.float32)
                                     ,high=np.array([+20.,+20.,+20.,10.,10.,10.,10.,10.,10.,10.,10.,10.], dtype=np.float32))    
                                                  
        #self.state = np.array([[[1],[1],[0]],[[0],[0],[0]],[[0],[0],[0]],[[0],[0],[0]]] ,dtype=np.float32)
        
        self.target = np.array([1,1,1],dtype=np.float32)
            # Physical constants
        self.g = 10
        self.m = 0.5
        self.L = 0.25
        self.k = 3e-6
        self.b = 0.0245
        self.I = np.diag([5e-2, 5e-2, 10e-2])
        self.kd = 0.25
        self.dt = 0.005
        self.start = 0
        self.end = 10
        self.steps = (self.end - self.start)/self.dt
        self.nest_step = 0
        self.x = np.array([[1],[1],[1]] ,dtype=np.float32)
        self.xdot = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.theta = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.thetadot = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.omega = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.lastdis = 1.732
        self.last_x = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.score = 0
        
    def step(self, action1):
        #self.target = target
        #print('action1',action1)
        action = np.array([[300 + 300 *action1[0]],[300 + 300 *action1[1]],[300 + 300 *action1[2]],[300 + 300 *action1[3]]])
        #print('action1',action1)
        #print('action',action)

        #print(self.nest_step,self.steps)

        self.nest_step += 1
        if (self.nest_step == self.steps):
            done = True
            print(self.score)

        else:
            done = False

        

        info = {}
        
        #print(self.nest_step,self.steps)
        # Compute forces, torques, and accelerations.

        #print(self.state, 'ss',x)

        self.omega = self.thetadot2omega(self.thetadot, self.theta)
        #print("self.omega",self.omega)
        #print(action)
        a = self.acceleration(action, self.theta, self.xdot, self.m, self.g, self.k, self.kd)
        #print("a",a)
        omegadot = self.angular_acceleration(action, self.omega, self.I, self.L, self.b, self.k)
        #print("omegadot:",omegadot)
        # Advance system state.
        #print(omegadot)
        self.omega = self.omega + dot(self.dt , omegadot)
        #print("omega",self.omega)
        self.thetadot = self.omega2thetadot(self.omega, self.theta)
        #print("thetadot: ",thetadot)
        self.thetadot[0,0] = min(self.thetadot[0,0],10)
        self.thetadot[1,0] = min(self.thetadot[1,0],10)
        self.thetadot[2,0] = min(self.thetadot[2,0],10)

        self.thetadot[0,0] = max(self.thetadot[0,0],-10)
        self.thetadot[1,0] = max(self.thetadot[1,0],-10)
        self.thetadot[2,0] = max(self.thetadot[2,0],-10)
        #print(self.thetadot)
        #print("theta: ",self.theta,self.thetadot)
        
        self.theta = self.theta + dot(self.dt , self.thetadot)
        self.xdot = self.xdot + dot(self.dt , a)
        
        #print("xdot:",xdot,"a:",a)
        self.x = self.x + dot(self.dt , self.xdot)
        
        #print(self.theta)
#         self.state[0] = self.x
#         self.state[1] = self.xdot
#         self.state[2] = self.theta
#         self.state[3] = self.thetadot
        #print(self.state,x)
        #print(obs)
        obs = np.array([self.x[0,0],self.x[1,0],self.x[2,0],self.xdot[0,0],self.xdot[1,0],self.xdot[2,0],
                        self.theta[0,0],self.theta[1,0],self.theta[2,0],self.thetadot[0,0],self.thetadot[1,0],
                        self.thetadot[2,0]],dtype=np.float32)
        #print(obs)

        reward = self.reward(self.target)
        #print(obs.type)
        self.score += reward
        return obs, reward, done, info
    
    
    def render(self):
        pass
    
    def reset(self):
        self.score = 0
        self.target = np.array([1,1,1],dtype=np.float32)
        self.nest_step = 0
        #self.state = np.array([[[0],[0],[0]],[[0],[0],[0]],[[0],[0],[0]],[[0],[0],[0]]] ,dtype=np.float32)
        self.x = np.array([[1],[1],[1]] ,dtype=np.float32)
        self.xdot = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.theta = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.thetadot = np.array([[0],[0],[0]] ,dtype=np.float32)
        self.omega = np.array([[0],[0],[0]] ,dtype=np.float32)        
        #self.target = target
        self.nest_step = 0
        obs = np.array([self.x[0,0], self.x[1,0], self.x[2,0], self.xdot[0,0], self.xdot[1,0], self.xdot[2,0],
                        self.theta[0,0], self.theta[1,0], self.theta[2,0], self.thetadot[0,0], self.thetadot[1,0],
                        self.thetadot[2,0]], dtype=np.float32)
        return obs    
    
    def reward(self, target):
        reward = 0.0
        
        target = np.array([[target[0]],[target[1]],[target[2]]])

        #print(target, self.x)
        distance = abs(np.linalg.norm(target - self.x))
        d_dis = self.lastdis - distance
        
#         if(distance < self.lastdis ):
#             reward_1 = 5000*(d_dis)
            
            
        reward_1 = 5000*(d_dis)
        reward_2 = 0
        if (distance < 0.5):
            in_position_reward = 100
            reward_2 = in_position_reward * (self.steps - self.nest_step)/self.steps
#         else:
#             reward_2 = -10* distance
        #print(reward_2)    
            
        #print(target, self.x, self.last_x)
        #print(target[0,0],self.last_x[0,0],self.x[0][1])
        reward_x = (abs(target[0,0] - self.last_x[0,0]) - abs(target[0,0] - self.x[0,0]))*1000
        reward_y = (abs(target[1,0] - self.last_x[1,0]) - abs(target[1,0] - self.x[1,0]))*1000
        reward_z = (abs(target[2,0] - self.last_x[2,0]) - abs(target[2,0] - self.x[2,0]))*1000
        
        #print("reward_z:",reward_z,(target[2,0] - self.last_x[2,0]),(target[2,0] - self.x[2,0]))
        #print("reward:", reward, distance, self.lastdis, d_dis)                
#         if (dist < 0.01):
#             target_reward = 100
#             reward += target_reward * ((self.steps - self.nest_step)/self.steps)

                
#         else:
#             reward += 1 - dist/np.linalg.norm(target )
            
        #print(reward)
        self.last_x = self.x
        self.lastdis = distance
        #reward = reward_1 + reward_2 + reward_x + reward_y + reward_z
        reward = reward_x + reward_y + reward_z
        return float(reward)
    
    
#                 reward = (-LA.norm(self.target - self.x[0]))*1.0
#         if (LA.norm(target - self.state[0]) < 1):
#             reward = 1


    def thetadot2omega(self,thetadot,angles):
        phi = angles[0]
        theta = angles[1]
        psi = angles[2]
        W = np.array([
            [1, 0, -math.sin(theta)],
            [0, math.cos(phi), math.cos(theta)*math.sin(phi)],
            [0, -math.sin(phi), math.cos(theta)*math.cos(phi)]
            ])
        omega = matmul(W,thetadot )
        return omega
        
    def acceleration(self, inputs, angles, vels, m, g, K, kd):
        gravity = np.array([[0, 0, -g]])
        R = self.rotation(angles)
        #print("R:",R)
        #print("self.thrust(inputs, self.k).T:",self.thrust(inputs, self.k).T)
        #print(inputs)
        T = matmul(R, self.thrust(inputs, self.k).T)
        #print("T",T)
        #print("vels",vels)
        Fd = np.dot(-kd , vels)
        #print("Fd:",Fd)
        #print("np.dot((1 / m) , T)",np.dot((1 / m) , T))
        #print("gravity.T",gravity.T)
        a = gravity.T + (np.dot((1 / m) , 10*T)) + Fd
        #print("a = ",a)
        return a
    
    def angular_acceleration(self, inputs, omega, I, L, b, K):
        tau = self.torques(inputs, self.L, self.b, self.k)
        #print("tau",tau)
        #print("cross(omega.T, np.dot(self.I , omega).T):",(tau - cross(omega.T, np.dot(self.I , omega).T).T))
        omegadot = matmul(inv(self.I), (tau - cross(omega.T, np.dot(self.I , omega).T).T)) 
        return omegadot
    
    def omega2thetadot(self, omega, angles):
        #print("omega:",omega)
        phi = angles[0]
        theta = angles[1]
        psi = angles[2]
        W = np.array([
            [1, 0, -math.sin(theta)],
            [0, math.cos(phi), math.cos(theta)*math.sin(phi)],
            [0, -math.sin(phi), math.cos(theta)*math.cos(phi)]
            ])
        thetadot = np.dot(inv(W) , omega)
        #print("thetadot:",thetadot)
        return thetadot
    
    def thrust(self,inputs, k):
        #print("sum(inputs)",sum(inputs)[0])
        T = np.array([[0, 0, dot(100*k , sum(inputs)[0])]])
        #print(T)
        return T
    
    def torques(self,inputs, L, b, k):
        tau = np.array([
            [L * k * (inputs[0,0] - inputs[2,0])],
            [L * k * (inputs[1,0] - inputs[3,0])],
            [b * (inputs[0,0] - inputs[1,0] + inputs[2,0] - inputs[3,0])]
            ])
        #print("inputs[0]",inputs[0,0])
        return tau
    
    def rotation(self,angles):
        ct = math.cos(angles[0])
        cp = math.cos(angles[1])
        cg = math.cos(angles[2])
        st = math.sin(angles[0])
        sp = math.sin(angles[1])
        sg = math.sin(angles[2])
        R_x = np.array([[1,0,0],[0,ct,-st],[0,st,ct]])
        R_y = np.array([[cp,0,sp],[0,1,0],[-sp,0,cp]])
        R_z = np.array([[cg,-sg,0],[sg,cg,0],[0,0,1]])
        R = np.dot(R_z, np.dot( R_y, R_x ))
        return R
    

In [58]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
import os 

env = quadsim()
# It will check your custom environment and output additional warnings if needed
check_env(env)
log_path = os.path.join('training_main', 'Logs')

In [59]:
del model

In [60]:
env = quadsim()
env = DummyVecEnv([lambda: env]) 
model = A2C('MlpPolicy', env, verbose = 1, tensorboard_log=log_path, learning_rate = 0.0001)

Using cpu device


In [None]:
model.learn(total_timesteps=2_000 * 10000)

Logging to training_main\Logs\A2C_24
------------------------------------
| time/                 |          |
|    fps                | 627      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -5.67    |
|    explained_variance | 0.0191   |
|    learning_rate      | 0.0001   |
|    n_updates          | 99       |
|    policy_loss        | -76      |
|    std                | 0.999    |
|    value_loss         | 181      |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 591      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -5.67    |
|    explained_variance | -0.00558 |
|    learning_rate      | 0.0001   |
|    n_updates          | 199      |
|

237.6701700711562
------------------------------------
| time/                 |          |
|    fps                | 570      |
|    iterations         | 1600     |
|    time_elapsed       | 14       |
|    total_timesteps    | 8000     |
| train/                |          |
|    entropy_loss       | -5.7     |
|    explained_variance | 0.000966 |
|    learning_rate      | 0.0001   |
|    n_updates          | 1599     |
|    policy_loss        | -125     |
|    std                | 1.01     |
|    value_loss         | 677      |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 569      |
|    iterations         | 1700     |
|    time_elapsed       | 14       |
|    total_timesteps    | 8500     |
| train/                |          |
|    entropy_loss       | -5.7     |
|    explained_variance | -0.00101 |
|    learning_rate      | 0.0001   |
|    n_updates          | 1699     |
|    policy_loss    

------------------------------------
| time/                 |          |
|    fps                | 570      |
|    iterations         | 3100     |
|    time_elapsed       | 27       |
|    total_timesteps    | 15500    |
| train/                |          |
|    entropy_loss       | -5.72    |
|    explained_variance | 9.78e-06 |
|    learning_rate      | 0.0001   |
|    n_updates          | 3099     |
|    policy_loss        | -125     |
|    std                | 1.01     |
|    value_loss         | 658      |
------------------------------------
-976.3895138078713
------------------------------------
| time/                 |          |
|    fps                | 572      |
|    iterations         | 3200     |
|    time_elapsed       | 27       |
|    total_timesteps    | 16000    |
| train/                |          |
|    entropy_loss       | -5.72    |
|    explained_variance | 2.68e-06 |
|    learning_rate      | 0.0001   |
|    n_updates          | 3199     |
|    policy_loss   

-------------------------------------
| time/                 |           |
|    fps                | 588       |
|    iterations         | 4600      |
|    time_elapsed       | 39        |
|    total_timesteps    | 23000     |
| train/                |           |
|    entropy_loss       | -5.74     |
|    explained_variance | -3.58e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 4599      |
|    policy_loss        | -197      |
|    std                | 1.02      |
|    value_loss         | 1.69e+03  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 589      |
|    iterations         | 4700     |
|    time_elapsed       | 39       |
|    total_timesteps    | 23500    |
| train/                |          |
|    entropy_loss       | -5.74    |
|    explained_variance | 4.17e-06 |
|    learning_rate      | 0.0001   |
|    n_updates          | 4699     |
|    policy_loss       

------------------------------------
| time/                 |          |
|    fps                | 600      |
|    iterations         | 6100     |
|    time_elapsed       | 50       |
|    total_timesteps    | 30500    |
| train/                |          |
|    entropy_loss       | -5.75    |
|    explained_variance | 1.48e-05 |
|    learning_rate      | 0.0001   |
|    n_updates          | 6099     |
|    policy_loss        | -139     |
|    std                | 1.02     |
|    value_loss         | 674      |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 600       |
|    iterations         | 6200      |
|    time_elapsed       | 51        |
|    total_timesteps    | 31000     |
| train/                |           |
|    entropy_loss       | -5.76     |
|    explained_variance | -1.06e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 6199      |
|    policy_loss        | -

-3312.7961314621657
-------------------------------------
| time/                 |           |
|    fps                | 605       |
|    iterations         | 7600      |
|    time_elapsed       | 62        |
|    total_timesteps    | 38000     |
| train/                |           |
|    entropy_loss       | -5.78     |
|    explained_variance | -3.58e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 7599      |
|    policy_loss        | -94.3     |
|    std                | 1.03      |
|    value_loss         | 288       |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 605      |
|    iterations         | 7700     |
|    time_elapsed       | 63       |
|    total_timesteps    | 38500    |
| train/                |          |
|    entropy_loss       | -5.77    |
|    explained_variance | 0.32     |
|    learning_rate      | 0.0001   |
|    n_updates          | 7699     |
|  

-------------------------------------
| time/                 |           |
|    fps                | 609       |
|    iterations         | 9100      |
|    time_elapsed       | 74        |
|    total_timesteps    | 45500     |
| train/                |           |
|    entropy_loss       | -5.78     |
|    explained_variance | -1.67e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 9099      |
|    policy_loss        | -131      |
|    std                | 1.03      |
|    value_loss         | 625       |
-------------------------------------
-2885.1431231267243
-------------------------------------
| time/                 |           |
|    fps                | 610       |
|    iterations         | 9200      |
|    time_elapsed       | 75        |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -5.79     |
|    explained_variance | -2.38e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 9199

-------------------------------------
| time/                 |           |
|    fps                | 612       |
|    iterations         | 10600     |
|    time_elapsed       | 86        |
|    total_timesteps    | 53000     |
| train/                |           |
|    entropy_loss       | -5.8      |
|    explained_variance | -3.58e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 10599     |
|    policy_loss        | -249      |
|    std                | 1.03      |
|    value_loss         | 2.02e+03  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 612       |
|    iterations         | 10700     |
|    time_elapsed       | 87        |
|    total_timesteps    | 53500     |
| train/                |           |
|    entropy_loss       | -5.8      |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 10699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 616      |
|    iterations         | 12100    |
|    time_elapsed       | 98       |
|    total_timesteps    | 60500    |
| train/                |          |
|    entropy_loss       | -5.82    |
|    explained_variance | -0.0523  |
|    learning_rate      | 0.0001   |
|    n_updates          | 12099    |
|    policy_loss        | -9.15    |
|    std                | 1.04     |
|    value_loss         | 2.91     |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 616       |
|    iterations         | 12200     |
|    time_elapsed       | 98        |
|    total_timesteps    | 61000     |
| train/                |           |
|    entropy_loss       | -5.82     |
|    explained_variance | -4.77e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 12199     |
|    policy_loss        | -

644.6653264182427
------------------------------------
| time/                 |          |
|    fps                | 619      |
|    iterations         | 13600    |
|    time_elapsed       | 109      |
|    total_timesteps    | 68000    |
| train/                |          |
|    entropy_loss       | -5.83    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 13599    |
|    policy_loss        | -567     |
|    std                | 1.04     |
|    value_loss         | 9.62e+03 |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 619       |
|    iterations         | 13700     |
|    time_elapsed       | 110       |
|    total_timesteps    | 68500     |
| train/                |           |
|    entropy_loss       | -5.83     |
|    explained_variance | -7.55e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 13699     |
|    poli

------------------------------------
| time/                 |          |
|    fps                | 621      |
|    iterations         | 15100    |
|    time_elapsed       | 121      |
|    total_timesteps    | 75500    |
| train/                |          |
|    entropy_loss       | -5.86    |
|    explained_variance | 1.79e-07 |
|    learning_rate      | 0.0001   |
|    n_updates          | 15099    |
|    policy_loss        | -334     |
|    std                | 1.05     |
|    value_loss         | 3.28e+03 |
------------------------------------
1477.7229350519076
-------------------------------------
| time/                 |           |
|    fps                | 621       |
|    iterations         | 15200     |
|    time_elapsed       | 122       |
|    total_timesteps    | 76000     |
| train/                |           |
|    entropy_loss       | -5.86     |
|    explained_variance | -8.34e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 15199     |
|    pol

------------------------------------
| time/                 |          |
|    fps                | 623      |
|    iterations         | 16600    |
|    time_elapsed       | 133      |
|    total_timesteps    | 83000    |
| train/                |          |
|    entropy_loss       | -5.88    |
|    explained_variance | 1.07e-06 |
|    learning_rate      | 0.0001   |
|    n_updates          | 16599    |
|    policy_loss        | -170     |
|    std                | 1.05     |
|    value_loss         | 1.14e+03 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 623      |
|    iterations         | 16700    |
|    time_elapsed       | 133      |
|    total_timesteps    | 83500    |
| train/                |          |
|    entropy_loss       | -5.88    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 16699    |
|    policy_loss        | -304     |
|

------------------------------------
| time/                 |          |
|    fps                | 625      |
|    iterations         | 18100    |
|    time_elapsed       | 144      |
|    total_timesteps    | 90500    |
| train/                |          |
|    entropy_loss       | -5.91    |
|    explained_variance | -0.0639  |
|    learning_rate      | 0.0001   |
|    n_updates          | 18099    |
|    policy_loss        | -21.3    |
|    std                | 1.06     |
|    value_loss         | 18       |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 625      |
|    iterations         | 18200    |
|    time_elapsed       | 145      |
|    total_timesteps    | 91000    |
| train/                |          |
|    entropy_loss       | -5.91    |
|    explained_variance | 1.79e-07 |
|    learning_rate      | 0.0001   |
|    n_updates          | 18199    |
|    policy_loss        | -243     |
|

1042.9016568642503
------------------------------------
| time/                 |          |
|    fps                | 626      |
|    iterations         | 19600    |
|    time_elapsed       | 156      |
|    total_timesteps    | 98000    |
| train/                |          |
|    entropy_loss       | -5.93    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 19599    |
|    policy_loss        | -495     |
|    std                | 1.07     |
|    value_loss         | 1.16e+04 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 626      |
|    iterations         | 19700    |
|    time_elapsed       | 157      |
|    total_timesteps    | 98500    |
| train/                |          |
|    entropy_loss       | -5.93    |
|    explained_variance | 0.00516  |
|    learning_rate      | 0.0001   |
|    n_updates          | 19699    |
|    policy_loss   

-------------------------------------
| time/                 |           |
|    fps                | 627       |
|    iterations         | 21100     |
|    time_elapsed       | 168       |
|    total_timesteps    | 105500    |
| train/                |           |
|    entropy_loss       | -5.96     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 21099     |
|    policy_loss        | -394      |
|    std                | 1.07      |
|    value_loss         | 4.83e+03  |
-------------------------------------
-2112.779914470638
------------------------------------
| time/                 |          |
|    fps                | 627      |
|    iterations         | 21200    |
|    time_elapsed       | 168      |
|    total_timesteps    | 106000   |
| train/                |          |
|    entropy_loss       | -5.96    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 21199    |
|   

------------------------------------
| time/                 |          |
|    fps                | 628      |
|    iterations         | 22600    |
|    time_elapsed       | 179      |
|    total_timesteps    | 113000   |
| train/                |          |
|    entropy_loss       | -5.96    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 22599    |
|    policy_loss        | -129     |
|    std                | 1.08     |
|    value_loss         | 634      |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 628       |
|    iterations         | 22700     |
|    time_elapsed       | 180       |
|    total_timesteps    | 113500    |
| train/                |           |
|    entropy_loss       | -5.97     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 22699     |
|    policy_loss        | -

-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 24100     |
|    time_elapsed       | 191       |
|    total_timesteps    | 120500    |
| train/                |           |
|    entropy_loss       | -5.98     |
|    explained_variance | -3.34e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 24099     |
|    policy_loss        | -23.9     |
|    std                | 1.08      |
|    value_loss         | 25        |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 630      |
|    iterations         | 24200    |
|    time_elapsed       | 191      |
|    total_timesteps    | 121000   |
| train/                |          |
|    entropy_loss       | -5.97    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 24199    |
|    policy_loss       

-3237.8089467641857
-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 25600     |
|    time_elapsed       | 202       |
|    total_timesteps    | 128000    |
| train/                |           |
|    entropy_loss       | -6        |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 25599     |
|    policy_loss        | -2.03e+03 |
|    std                | 1.08      |
|    value_loss         | 1.38e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 25700     |
|    time_elapsed       | 203       |
|    total_timesteps    | 128500    |
| train/                |           |
|    entropy_loss       | -6        |
|    explained_variance | -7.87e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 2569

-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 27100     |
|    time_elapsed       | 214       |
|    total_timesteps    | 135500    |
| train/                |           |
|    entropy_loss       | -6.01     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 27099     |
|    policy_loss        | -1.37e+03 |
|    std                | 1.09      |
|    value_loss         | 1.08e+05  |
-------------------------------------
-7697.470727104139
------------------------------------
| time/                 |          |
|    fps                | 631      |
|    iterations         | 27200    |
|    time_elapsed       | 215      |
|    total_timesteps    | 136000   |
| train/                |          |
|    entropy_loss       | -6.02    |
|    explained_variance | 5.96e-08 |
|    learning_rate      | 0.0001   |
|    n_updates          | 27199    |
|   

------------------------------------
| time/                 |          |
|    fps                | 631      |
|    iterations         | 28600    |
|    time_elapsed       | 226      |
|    total_timesteps    | 143000   |
| train/                |          |
|    entropy_loss       | -6.02    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 28599    |
|    policy_loss        | -186     |
|    std                | 1.09     |
|    value_loss         | 1.59e+03 |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 28700     |
|    time_elapsed       | 227       |
|    total_timesteps    | 143500    |
| train/                |           |
|    entropy_loss       | -6.03     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 28699     |
|    policy_loss        | -

------------------------------------
| time/                 |          |
|    fps                | 630      |
|    iterations         | 30100    |
|    time_elapsed       | 238      |
|    total_timesteps    | 150500   |
| train/                |          |
|    entropy_loss       | -6.05    |
|    explained_variance | -0.204   |
|    learning_rate      | 0.0001   |
|    n_updates          | 30099    |
|    policy_loss        | -19.2    |
|    std                | 1.1      |
|    value_loss         | 11.5     |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 30200     |
|    time_elapsed       | 239       |
|    total_timesteps    | 151000    |
| train/                |           |
|    entropy_loss       | -6.05     |
|    explained_variance | -0.000238 |
|    learning_rate      | 0.0001    |
|    n_updates          | 30199     |
|    policy_loss        | -

22393.556703905862
-------------------------------------
| time/                 |           |
|    fps                | 629       |
|    iterations         | 31600     |
|    time_elapsed       | 250       |
|    total_timesteps    | 158000    |
| train/                |           |
|    entropy_loss       | -6.07     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 31599     |
|    policy_loss        | -3.77e+03 |
|    std                | 1.1       |
|    value_loss         | 4.9e+05   |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 629      |
|    iterations         | 31700    |
|    time_elapsed       | 251      |
|    total_timesteps    | 158500   |
| train/                |          |
|    entropy_loss       | -6.07    |
|    explained_variance | -0.00044 |
|    learning_rate      | 0.0001   |
|    n_updates          | 31699    |
|   

-------------------------------------
| time/                 |           |
|    fps                | 629       |
|    iterations         | 33100     |
|    time_elapsed       | 262       |
|    total_timesteps    | 165500    |
| train/                |           |
|    entropy_loss       | -6.09     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 33099     |
|    policy_loss        | -3.55e+03 |
|    std                | 1.11      |
|    value_loss         | 4.04e+05  |
-------------------------------------
12804.556726117276
-------------------------------------
| time/                 |           |
|    fps                | 629       |
|    iterations         | 33200     |
|    time_elapsed       | 263       |
|    total_timesteps    | 166000    |
| train/                |           |
|    entropy_loss       | -6.09     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 33199

-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 34600     |
|    time_elapsed       | 274       |
|    total_timesteps    | 173000    |
| train/                |           |
|    entropy_loss       | -6.1      |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 34599     |
|    policy_loss        | -1.23e+03 |
|    std                | 1.11      |
|    value_loss         | 5.46e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 34700     |
|    time_elapsed       | 275       |
|    total_timesteps    | 173500    |
| train/                |           |
|    entropy_loss       | -6.11     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 34699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 630      |
|    iterations         | 36100    |
|    time_elapsed       | 286      |
|    total_timesteps    | 180500   |
| train/                |          |
|    entropy_loss       | -6.13    |
|    explained_variance | -0.00133 |
|    learning_rate      | 0.0001   |
|    n_updates          | 36099    |
|    policy_loss        | -21.3    |
|    std                | 1.12     |
|    value_loss         | 14.2     |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 630       |
|    iterations         | 36200     |
|    time_elapsed       | 286       |
|    total_timesteps    | 181000    |
| train/                |           |
|    entropy_loss       | -6.14     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 36199     |
|    policy_loss        | -

29179.043019006553
-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 37600     |
|    time_elapsed       | 297       |
|    total_timesteps    | 188000    |
| train/                |           |
|    entropy_loss       | -6.16     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 37599     |
|    policy_loss        | -2.59e+03 |
|    std                | 1.13      |
|    value_loss         | 2.56e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 37700     |
|    time_elapsed       | 298       |
|    total_timesteps    | 188500    |
| train/                |           |
|    entropy_loss       | -6.16     |
|    explained_variance | -3.06e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 37699

-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 39100     |
|    time_elapsed       | 309       |
|    total_timesteps    | 195500    |
| train/                |           |
|    entropy_loss       | -6.18     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 39099     |
|    policy_loss        | -3.57e+03 |
|    std                | 1.14      |
|    value_loss         | 4.42e+05  |
-------------------------------------
-48506.9588485046
-------------------------------------
| time/                 |           |
|    fps                | 631       |
|    iterations         | 39200     |
|    time_elapsed       | 310       |
|    total_timesteps    | 196000    |
| train/                |           |
|    entropy_loss       | -6.19     |
|    explained_variance | -2.38e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 39199 

-------------------------------------
| time/                 |           |
|    fps                | 632       |
|    iterations         | 40600     |
|    time_elapsed       | 320       |
|    total_timesteps    | 203000    |
| train/                |           |
|    entropy_loss       | -6.21     |
|    explained_variance | -2.38e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 40599     |
|    policy_loss        | -1.22e+03 |
|    std                | 1.14      |
|    value_loss         | 4.94e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 632       |
|    iterations         | 40700     |
|    time_elapsed       | 321       |
|    total_timesteps    | 203500    |
| train/                |           |
|    entropy_loss       | -6.22     |
|    explained_variance | 2.38e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 40699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 632      |
|    iterations         | 42100    |
|    time_elapsed       | 332      |
|    total_timesteps    | 210500   |
| train/                |          |
|    entropy_loss       | -6.24    |
|    explained_variance | -0.00128 |
|    learning_rate      | 0.0001   |
|    n_updates          | 42099    |
|    policy_loss        | -4.61    |
|    std                | 1.15     |
|    value_loss         | 0.793    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 632       |
|    iterations         | 42200     |
|    time_elapsed       | 333       |
|    total_timesteps    | 211000    |
| train/                |           |
|    entropy_loss       | -6.24     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 42199     |
|    policy_loss        | -

72067.10502604273
-------------------------------------
| time/                 |           |
|    fps                | 632       |
|    iterations         | 43600     |
|    time_elapsed       | 344       |
|    total_timesteps    | 218000    |
| train/                |           |
|    entropy_loss       | -6.26     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 43599     |
|    policy_loss        | -2.97e+03 |
|    std                | 1.16      |
|    value_loss         | 2.96e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 632      |
|    iterations         | 43700    |
|    time_elapsed       | 345      |
|    total_timesteps    | 218500   |
| train/                |          |
|    entropy_loss       | -6.26    |
|    explained_variance | -0.0284  |
|    learning_rate      | 0.0001   |
|    n_updates          | 43699    |
|    

-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 45100     |
|    time_elapsed       | 356       |
|    total_timesteps    | 225500    |
| train/                |           |
|    entropy_loss       | -6.28     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 45099     |
|    policy_loss        | -2.71e+03 |
|    std                | 1.16      |
|    value_loss         | 1.97e+05  |
-------------------------------------
21727.589583311143
-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 45200     |
|    time_elapsed       | 357       |
|    total_timesteps    | 226000    |
| train/                |           |
|    entropy_loss       | -6.29     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 45199

-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 46600     |
|    time_elapsed       | 367       |
|    total_timesteps    | 233000    |
| train/                |           |
|    entropy_loss       | -6.31     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 46599     |
|    policy_loss        | -2.06e+03 |
|    std                | 1.17      |
|    value_loss         | 1.32e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 46700     |
|    time_elapsed       | 368       |
|    total_timesteps    | 233500    |
| train/                |           |
|    entropy_loss       | -6.31     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 46699     |
|    policy_

-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 48100     |
|    time_elapsed       | 379       |
|    total_timesteps    | 240500    |
| train/                |           |
|    entropy_loss       | -6.33     |
|    explained_variance | -4.47e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 48099     |
|    policy_loss        | -51.8     |
|    std                | 1.18      |
|    value_loss         | 137       |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 48200     |
|    time_elapsed       | 380       |
|    total_timesteps    | 241000    |
| train/                |           |
|    entropy_loss       | -6.33     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 48199     |
|    policy_

-45788.02609622623
-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 49600     |
|    time_elapsed       | 391       |
|    total_timesteps    | 248000    |
| train/                |           |
|    entropy_loss       | -6.35     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 49599     |
|    policy_loss        | -3.42e+03 |
|    std                | 1.18      |
|    value_loss         | 3.44e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 633       |
|    iterations         | 49700     |
|    time_elapsed       | 391       |
|    total_timesteps    | 248500    |
| train/                |           |
|    entropy_loss       | -6.35     |
|    explained_variance | -2.81e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 49699

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 51100     |
|    time_elapsed       | 402       |
|    total_timesteps    | 255500    |
| train/                |           |
|    entropy_loss       | -6.38     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 51099     |
|    policy_loss        | -2.54e+03 |
|    std                | 1.19      |
|    value_loss         | 2.15e+05  |
-------------------------------------
33108.600104320445
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 51200     |
|    time_elapsed       | 403       |
|    total_timesteps    | 256000    |
| train/                |           |
|    entropy_loss       | -6.38     |
|    explained_variance | 2.38e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 51199

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 52600     |
|    time_elapsed       | 414       |
|    total_timesteps    | 263000    |
| train/                |           |
|    entropy_loss       | -6.39     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 52599     |
|    policy_loss        | -2.16e+03 |
|    std                | 1.2       |
|    value_loss         | 1.75e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 52700     |
|    time_elapsed       | 415       |
|    total_timesteps    | 263500    |
| train/                |           |
|    entropy_loss       | -6.38     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 52699     |
|    policy_

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 54100     |
|    time_elapsed       | 426       |
|    total_timesteps    | 270500    |
| train/                |           |
|    entropy_loss       | -6.4      |
|    explained_variance | -1.91e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 54099     |
|    policy_loss        | -130      |
|    std                | 1.2       |
|    value_loss         | 416       |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 634      |
|    iterations         | 54200    |
|    time_elapsed       | 427      |
|    total_timesteps    | 271000   |
| train/                |          |
|    entropy_loss       | -6.4     |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 54199    |
|    policy_loss       

-6896.694935875125
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 55600     |
|    time_elapsed       | 438       |
|    total_timesteps    | 278000    |
| train/                |           |
|    entropy_loss       | -6.41     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 55599     |
|    policy_loss        | -4.06e+03 |
|    std                | 1.2       |
|    value_loss         | 5.09e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 634      |
|    iterations         | 55700    |
|    time_elapsed       | 439      |
|    total_timesteps    | 278500   |
| train/                |          |
|    entropy_loss       | -6.41    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 55699    |
|   

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 57100     |
|    time_elapsed       | 449       |
|    total_timesteps    | 285500    |
| train/                |           |
|    entropy_loss       | -6.43     |
|    explained_variance | 2.38e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 57099     |
|    policy_loss        | -2.83e+03 |
|    std                | 1.21      |
|    value_loss         | 2.54e+05  |
-------------------------------------
16741.942845278463
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 57200     |
|    time_elapsed       | 450       |
|    total_timesteps    | 286000    |
| train/                |           |
|    entropy_loss       | -6.43     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 57199

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 58600     |
|    time_elapsed       | 461       |
|    total_timesteps    | 293000    |
| train/                |           |
|    entropy_loss       | -6.46     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 58599     |
|    policy_loss        | -1.47e+03 |
|    std                | 1.22      |
|    value_loss         | 6e+04     |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 58700     |
|    time_elapsed       | 462       |
|    total_timesteps    | 293500    |
| train/                |           |
|    entropy_loss       | -6.46     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 58699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 634      |
|    iterations         | 60100    |
|    time_elapsed       | 473      |
|    total_timesteps    | 300500   |
| train/                |          |
|    entropy_loss       | -6.48    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 60099    |
|    policy_loss        | -80.9    |
|    std                | 1.22     |
|    value_loss         | 155      |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 60200     |
|    time_elapsed       | 474       |
|    total_timesteps    | 301000    |
| train/                |           |
|    entropy_loss       | -6.49     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 60199     |
|    policy_loss        | -

7109.986760227104
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 61600     |
|    time_elapsed       | 485       |
|    total_timesteps    | 308000    |
| train/                |           |
|    entropy_loss       | -6.5      |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 61599     |
|    policy_loss        | -3.71e+03 |
|    std                | 1.23      |
|    value_loss         | 3.35e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 634      |
|    iterations         | 61700    |
|    time_elapsed       | 486      |
|    total_timesteps    | 308500   |
| train/                |          |
|    entropy_loss       | -6.5     |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 61699    |
|    

-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 63100     |
|    time_elapsed       | 497       |
|    total_timesteps    | 315500    |
| train/                |           |
|    entropy_loss       | -6.51     |
|    explained_variance | 4.17e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 63099     |
|    policy_loss        | -3.46e+03 |
|    std                | 1.23      |
|    value_loss         | 4.07e+05  |
-------------------------------------
-15407.294377654218
-------------------------------------
| time/                 |           |
|    fps                | 634       |
|    iterations         | 63200     |
|    time_elapsed       | 497       |
|    total_timesteps    | 316000    |
| train/                |           |
|    entropy_loss       | -6.51     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 6319

-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 64600     |
|    time_elapsed       | 508       |
|    total_timesteps    | 323000    |
| train/                |           |
|    entropy_loss       | -6.52     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 64599     |
|    policy_loss        | -2.78e+03 |
|    std                | 1.24      |
|    value_loss         | 1.96e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 64700     |
|    time_elapsed       | 509       |
|    total_timesteps    | 323500    |
| train/                |           |
|    entropy_loss       | -6.52     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 64699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 635      |
|    iterations         | 66100    |
|    time_elapsed       | 520      |
|    total_timesteps    | 330500   |
| train/                |          |
|    entropy_loss       | -6.54    |
|    explained_variance | 0.139    |
|    learning_rate      | 0.0001   |
|    n_updates          | 66099    |
|    policy_loss        | 4.52     |
|    std                | 1.24     |
|    value_loss         | 0.466    |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 635      |
|    iterations         | 66200    |
|    time_elapsed       | 521      |
|    total_timesteps    | 331000   |
| train/                |          |
|    entropy_loss       | -6.54    |
|    explained_variance | 1.19e-07 |
|    learning_rate      | 0.0001   |
|    n_updates          | 66199    |
|    policy_loss        | -895     |
|

-19432.640591480565
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 67600     |
|    time_elapsed       | 532       |
|    total_timesteps    | 338000    |
| train/                |           |
|    entropy_loss       | -6.56     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 67599     |
|    policy_loss        | -4.49e+03 |
|    std                | 1.25      |
|    value_loss         | 4.21e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 635      |
|    iterations         | 67700    |
|    time_elapsed       | 532      |
|    total_timesteps    | 338500   |
| train/                |          |
|    entropy_loss       | -6.56    |
|    explained_variance | -0.00018 |
|    learning_rate      | 0.0001   |
|    n_updates          | 67699    |
|  

-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 69100     |
|    time_elapsed       | 543       |
|    total_timesteps    | 345500    |
| train/                |           |
|    entropy_loss       | -6.58     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 69099     |
|    policy_loss        | -3.15e+03 |
|    std                | 1.25      |
|    value_loss         | 3.36e+05  |
-------------------------------------
-48276.86980013601
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 69200     |
|    time_elapsed       | 544       |
|    total_timesteps    | 346000    |
| train/                |           |
|    entropy_loss       | -6.58     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 69199

-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 70600     |
|    time_elapsed       | 555       |
|    total_timesteps    | 353000    |
| train/                |           |
|    entropy_loss       | -6.59     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 70599     |
|    policy_loss        | -1.38e+03 |
|    std                | 1.26      |
|    value_loss         | 5.7e+04   |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 70700     |
|    time_elapsed       | 556       |
|    total_timesteps    | 353500    |
| train/                |           |
|    entropy_loss       | -6.59     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 70699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 635      |
|    iterations         | 72100    |
|    time_elapsed       | 567      |
|    total_timesteps    | 360500   |
| train/                |          |
|    entropy_loss       | -6.6     |
|    explained_variance | 0.0322   |
|    learning_rate      | 0.0001   |
|    n_updates          | 72099    |
|    policy_loss        | -4.06    |
|    std                | 1.26     |
|    value_loss         | 0.488    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 72200     |
|    time_elapsed       | 567       |
|    total_timesteps    | 361000    |
| train/                |           |
|    entropy_loss       | -6.61     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 72199     |
|    policy_loss        | -

21843.133142182676
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 73600     |
|    time_elapsed       | 579       |
|    total_timesteps    | 368000    |
| train/                |           |
|    entropy_loss       | -6.63     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 73599     |
|    policy_loss        | -3.44e+03 |
|    std                | 1.27      |
|    value_loss         | 3.28e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 73700     |
|    time_elapsed       | 579       |
|    total_timesteps    | 368500    |
| train/                |           |
|    entropy_loss       | -6.63     |
|    explained_variance | -0.000546 |
|    learning_rate      | 0.0001    |
|    n_updates          | 73699

-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 75100     |
|    time_elapsed       | 590       |
|    total_timesteps    | 375500    |
| train/                |           |
|    entropy_loss       | -6.64     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 75099     |
|    policy_loss        | -3.41e+03 |
|    std                | 1.27      |
|    value_loss         | 3.76e+05  |
-------------------------------------
7545.192671738094
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 75200     |
|    time_elapsed       | 591       |
|    total_timesteps    | 376000    |
| train/                |           |
|    entropy_loss       | -6.65     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 75199 

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 76600     |
|    time_elapsed       | 602       |
|    total_timesteps    | 383000    |
| train/                |           |
|    entropy_loss       | -6.67     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 76599     |
|    policy_loss        | -1.24e+03 |
|    std                | 1.28      |
|    value_loss         | 4.06e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 76700     |
|    time_elapsed       | 602       |
|    total_timesteps    | 383500    |
| train/                |           |
|    entropy_loss       | -6.67     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 76699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 78100    |
|    time_elapsed       | 613      |
|    total_timesteps    | 390500   |
| train/                |          |
|    entropy_loss       | -6.69    |
|    explained_variance | 0.953    |
|    learning_rate      | 0.0001   |
|    n_updates          | 78099    |
|    policy_loss        | 2.1      |
|    std                | 1.29     |
|    value_loss         | 0.101    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 78200     |
|    time_elapsed       | 614       |
|    total_timesteps    | 391000    |
| train/                |           |
|    entropy_loss       | -6.69     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 78199     |
|    policy_loss        | -

98002.21201208461
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 79600     |
|    time_elapsed       | 625       |
|    total_timesteps    | 398000    |
| train/                |           |
|    entropy_loss       | -6.7      |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 79599     |
|    policy_loss        | -4.13e+03 |
|    std                | 1.29      |
|    value_loss         | 4.91e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 79700    |
|    time_elapsed       | 626      |
|    total_timesteps    | 398500   |
| train/                |          |
|    entropy_loss       | -6.7     |
|    explained_variance | 0.786    |
|    learning_rate      | 0.0001   |
|    n_updates          | 79699    |
|    

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 81100     |
|    time_elapsed       | 637       |
|    total_timesteps    | 405500    |
| train/                |           |
|    entropy_loss       | -6.72     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 81099     |
|    policy_loss        | -3.57e+03 |
|    std                | 1.3       |
|    value_loss         | 3.15e+05  |
-------------------------------------
-36842.41843859004
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 81200     |
|    time_elapsed       | 637       |
|    total_timesteps    | 406000    |
| train/                |           |
|    entropy_loss       | -6.72     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 81199

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 82600     |
|    time_elapsed       | 648       |
|    total_timesteps    | 413000    |
| train/                |           |
|    entropy_loss       | -6.72     |
|    explained_variance | -0.173    |
|    learning_rate      | 0.0001    |
|    n_updates          | 82599     |
|    policy_loss        | -1.25e+03 |
|    std                | 1.3       |
|    value_loss         | 3.47e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 82700     |
|    time_elapsed       | 649       |
|    total_timesteps    | 413500    |
| train/                |           |
|    entropy_loss       | -6.73     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 82699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 84100    |
|    time_elapsed       | 660      |
|    total_timesteps    | 420500   |
| train/                |          |
|    entropy_loss       | -6.73    |
|    explained_variance | 0.0277   |
|    learning_rate      | 0.0001   |
|    n_updates          | 84099    |
|    policy_loss        | -5.64    |
|    std                | 1.3      |
|    value_loss         | 0.688    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 84200     |
|    time_elapsed       | 661       |
|    total_timesteps    | 421000    |
| train/                |           |
|    entropy_loss       | -6.73     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 84199     |
|    policy_loss        | -

88597.34718104391
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 85600     |
|    time_elapsed       | 671       |
|    total_timesteps    | 428000    |
| train/                |           |
|    entropy_loss       | -6.74     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 85599     |
|    policy_loss        | -3.85e+03 |
|    std                | 1.3       |
|    value_loss         | 3.59e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 637      |
|    iterations         | 85700    |
|    time_elapsed       | 672      |
|    total_timesteps    | 428500   |
| train/                |          |
|    entropy_loss       | -6.74    |
|    explained_variance | -0.00531 |
|    learning_rate      | 0.0001   |
|    n_updates          | 85699    |
|    

-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 87100     |
|    time_elapsed       | 683       |
|    total_timesteps    | 435500    |
| train/                |           |
|    entropy_loss       | -6.75     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 87099     |
|    policy_loss        | -2.39e+03 |
|    std                | 1.31      |
|    value_loss         | 1.27e+05  |
-------------------------------------
64318.66937998009
-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 87200     |
|    time_elapsed       | 684       |
|    total_timesteps    | 436000    |
| train/                |           |
|    entropy_loss       | -6.75     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 87199 

-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 88600     |
|    time_elapsed       | 695       |
|    total_timesteps    | 443000    |
| train/                |           |
|    entropy_loss       | -6.78     |
|    explained_variance | -4.77e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 88599     |
|    policy_loss        | -1.66e+03 |
|    std                | 1.32      |
|    value_loss         | 9.9e+04   |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 88700     |
|    time_elapsed       | 696       |
|    total_timesteps    | 443500    |
| train/                |           |
|    entropy_loss       | -6.78     |
|    explained_variance | 7.69e-06  |
|    learning_rate      | 0.0001    |
|    n_updates          | 88699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 637      |
|    iterations         | 90100    |
|    time_elapsed       | 707      |
|    total_timesteps    | 450500   |
| train/                |          |
|    entropy_loss       | -6.8     |
|    explained_variance | -0.0647  |
|    learning_rate      | 0.0001   |
|    n_updates          | 90099    |
|    policy_loss        | -4.12    |
|    std                | 1.32     |
|    value_loss         | 0.392    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 90200     |
|    time_elapsed       | 707       |
|    total_timesteps    | 451000    |
| train/                |           |
|    entropy_loss       | -6.8      |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 90199     |
|    policy_loss        | -

26311.91155703107
-------------------------------------
| time/                 |           |
|    fps                | 637       |
|    iterations         | 91600     |
|    time_elapsed       | 718       |
|    total_timesteps    | 458000    |
| train/                |           |
|    entropy_loss       | -6.81     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 91599     |
|    policy_loss        | -3.91e+03 |
|    std                | 1.33      |
|    value_loss         | 4.6e+05   |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 637      |
|    iterations         | 91700    |
|    time_elapsed       | 719      |
|    total_timesteps    | 458500   |
| train/                |          |
|    entropy_loss       | -6.81    |
|    explained_variance | -0.0338  |
|    learning_rate      | 0.0001   |
|    n_updates          | 91699    |
|    

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 93100     |
|    time_elapsed       | 730       |
|    total_timesteps    | 465500    |
| train/                |           |
|    entropy_loss       | -6.82     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 93099     |
|    policy_loss        | -1.72e+03 |
|    std                | 1.33      |
|    value_loss         | 6.92e+04  |
-------------------------------------
-23741.842417471304
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 93200     |
|    time_elapsed       | 731       |
|    total_timesteps    | 466000    |
| train/                |           |
|    entropy_loss       | -6.82     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 9319

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 94600     |
|    time_elapsed       | 742       |
|    total_timesteps    | 473000    |
| train/                |           |
|    entropy_loss       | -6.83     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 94599     |
|    policy_loss        | -1e+03    |
|    std                | 1.34      |
|    value_loss         | 3.51e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 94700     |
|    time_elapsed       | 743       |
|    total_timesteps    | 473500    |
| train/                |           |
|    entropy_loss       | -6.83     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 94699     |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 96100    |
|    time_elapsed       | 754      |
|    total_timesteps    | 480500   |
| train/                |          |
|    entropy_loss       | -6.85    |
|    explained_variance | -0.00921 |
|    learning_rate      | 0.0001   |
|    n_updates          | 96099    |
|    policy_loss        | -46      |
|    std                | 1.34     |
|    value_loss         | 42.9     |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 96200     |
|    time_elapsed       | 755       |
|    total_timesteps    | 481000    |
| train/                |           |
|    entropy_loss       | -6.85     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 96199     |
|    policy_loss        | -

-21338.15048770721
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 97600     |
|    time_elapsed       | 766       |
|    total_timesteps    | 488000    |
| train/                |           |
|    entropy_loss       | -6.87     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 97599     |
|    policy_loss        | -2.74e+03 |
|    std                | 1.35      |
|    value_loss         | 2.9e+05   |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 97700    |
|    time_elapsed       | 767      |
|    total_timesteps    | 488500   |
| train/                |          |
|    entropy_loss       | -6.87    |
|    explained_variance | -0.239   |
|    learning_rate      | 0.0001   |
|    n_updates          | 97699    |
|   

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 99100     |
|    time_elapsed       | 778       |
|    total_timesteps    | 495500    |
| train/                |           |
|    entropy_loss       | -6.89     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0001    |
|    n_updates          | 99099     |
|    policy_loss        | -3.47e+03 |
|    std                | 1.35      |
|    value_loss         | 3.63e+05  |
-------------------------------------
92909.94677438916
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 99200     |
|    time_elapsed       | 778       |
|    total_timesteps    | 496000    |
| train/                |           |
|    entropy_loss       | -6.89     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 99199 

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 100600    |
|    time_elapsed       | 789       |
|    total_timesteps    | 503000    |
| train/                |           |
|    entropy_loss       | -6.92     |
|    explained_variance | -3.58e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 100599    |
|    policy_loss        | -2.6e+03  |
|    std                | 1.37      |
|    value_loss         | 1.51e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 100700    |
|    time_elapsed       | 790       |
|    total_timesteps    | 503500    |
| train/                |           |
|    entropy_loss       | -6.92     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 100699    |
|    policy_

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 102100    |
|    time_elapsed       | 801       |
|    total_timesteps    | 510500    |
| train/                |           |
|    entropy_loss       | -6.93     |
|    explained_variance | -0.000328 |
|    learning_rate      | 0.0001    |
|    n_updates          | 102099    |
|    policy_loss        | -77.7     |
|    std                | 1.37      |
|    value_loss         | 188       |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 102200    |
|    time_elapsed       | 802       |
|    total_timesteps    | 511000    |
| train/                |           |
|    entropy_loss       | -6.94     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 102199    |
|    policy_

-18226.85506815328
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 103600    |
|    time_elapsed       | 813       |
|    total_timesteps    | 518000    |
| train/                |           |
|    entropy_loss       | -6.95     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 103599    |
|    policy_loss        | -4.08e+03 |
|    std                | 1.38      |
|    value_loss         | 3.93e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 103700   |
|    time_elapsed       | 814      |
|    total_timesteps    | 518500   |
| train/                |          |
|    entropy_loss       | -6.95    |
|    explained_variance | 0.613    |
|    learning_rate      | 0.0001   |
|    n_updates          | 103699   |
|   

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 105100    |
|    time_elapsed       | 825       |
|    total_timesteps    | 525500    |
| train/                |           |
|    entropy_loss       | -6.96     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 105099    |
|    policy_loss        | -4.48e+03 |
|    std                | 1.38      |
|    value_loss         | 5.51e+05  |
-------------------------------------
15915.16958144278
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 105200    |
|    time_elapsed       | 826       |
|    total_timesteps    | 526000    |
| train/                |           |
|    entropy_loss       | -6.96     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 105199

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 106600    |
|    time_elapsed       | 837       |
|    total_timesteps    | 533000    |
| train/                |           |
|    entropy_loss       | -6.96     |
|    explained_variance | 4.77e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 106599    |
|    policy_loss        | -1.23e+03 |
|    std                | 1.38      |
|    value_loss         | 3.38e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 106700    |
|    time_elapsed       | 838       |
|    total_timesteps    | 533500    |
| train/                |           |
|    entropy_loss       | -6.96     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 106699    |
|    policy_

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 108100    |
|    time_elapsed       | 849       |
|    total_timesteps    | 540500    |
| train/                |           |
|    entropy_loss       | -6.98     |
|    explained_variance | -0.000113 |
|    learning_rate      | 0.0001    |
|    n_updates          | 108099    |
|    policy_loss        | -70.9     |
|    std                | 1.39      |
|    value_loss         | 106       |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 108200    |
|    time_elapsed       | 850       |
|    total_timesteps    | 541000    |
| train/                |           |
|    entropy_loss       | -6.99     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 108199    |
|    policy_

-5776.286290048458
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 109600    |
|    time_elapsed       | 861       |
|    total_timesteps    | 548000    |
| train/                |           |
|    entropy_loss       | -7        |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 109599    |
|    policy_loss        | -3.41e+03 |
|    std                | 1.39      |
|    value_loss         | 2.65e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 109700    |
|    time_elapsed       | 862       |
|    total_timesteps    | 548500    |
| train/                |           |
|    entropy_loss       | -7        |
|    explained_variance | -2.07e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 10969

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 111100    |
|    time_elapsed       | 873       |
|    total_timesteps    | 555500    |
| train/                |           |
|    entropy_loss       | -7.04     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 111099    |
|    policy_loss        | -3.54e+03 |
|    std                | 1.41      |
|    value_loss         | 4.1e+05   |
-------------------------------------
-122558.10444971867
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 111200    |
|    time_elapsed       | 874       |
|    total_timesteps    | 556000    |
| train/                |           |
|    entropy_loss       | -7.05     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 1111

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 112600    |
|    time_elapsed       | 885       |
|    total_timesteps    | 563000    |
| train/                |           |
|    entropy_loss       | -7.07     |
|    explained_variance | -2.03e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 112599    |
|    policy_loss        | -305      |
|    std                | 1.42      |
|    value_loss         | 1.71e+03  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 112700    |
|    time_elapsed       | 885       |
|    total_timesteps    | 563500    |
| train/                |           |
|    entropy_loss       | -7.07     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 112699    |
|    policy_

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 114100    |
|    time_elapsed       | 896       |
|    total_timesteps    | 570500    |
| train/                |           |
|    entropy_loss       | -7.1      |
|    explained_variance | -4.65e-06 |
|    learning_rate      | 0.0001    |
|    n_updates          | 114099    |
|    policy_loss        | -105      |
|    std                | 1.43      |
|    value_loss         | 310       |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 114200    |
|    time_elapsed       | 897       |
|    total_timesteps    | 571000    |
| train/                |           |
|    entropy_loss       | -7.1      |
|    explained_variance | -3.58e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 114199    |
|    policy_

-27684.159257830543
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 115600    |
|    time_elapsed       | 908       |
|    total_timesteps    | 578000    |
| train/                |           |
|    entropy_loss       | -7.11     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 115599    |
|    policy_loss        | -3.64e+03 |
|    std                | 1.43      |
|    value_loss         | 3.73e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 115700    |
|    time_elapsed       | 909       |
|    total_timesteps    | 578500    |
| train/                |           |
|    entropy_loss       | -7.11     |
|    explained_variance | -4.32e-05 |
|    learning_rate      | 0.0001    |
|    n_updates          | 1156

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 117100    |
|    time_elapsed       | 920       |
|    total_timesteps    | 585500    |
| train/                |           |
|    entropy_loss       | -7.13     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 117099    |
|    policy_loss        | -2.28e+03 |
|    std                | 1.44      |
|    value_loss         | 7.75e+04  |
-------------------------------------
54768.12537429693
------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 117200   |
|    time_elapsed       | 921      |
|    total_timesteps    | 586000   |
| train/                |          |
|    entropy_loss       | -7.13    |
|    explained_variance | 0        |
|    learning_rate      | 0.0001   |
|    n_updates          | 117199   |
|    

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 118600    |
|    time_elapsed       | 931       |
|    total_timesteps    | 593000    |
| train/                |           |
|    entropy_loss       | -7.15     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 118599    |
|    policy_loss        | -2.16e+03 |
|    std                | 1.44      |
|    value_loss         | 1.32e+05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 118700    |
|    time_elapsed       | 932       |
|    total_timesteps    | 593500    |
| train/                |           |
|    entropy_loss       | -7.14     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 118699    |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 120100   |
|    time_elapsed       | 943      |
|    total_timesteps    | 600500   |
| train/                |          |
|    entropy_loss       | -7.15    |
|    explained_variance | 2.69e-05 |
|    learning_rate      | 0.0001   |
|    n_updates          | 120099   |
|    policy_loss        | -47.2    |
|    std                | 1.45     |
|    value_loss         | 40.4     |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 120200    |
|    time_elapsed       | 944       |
|    total_timesteps    | 601000    |
| train/                |           |
|    entropy_loss       | -7.15     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 120199    |
|    policy_loss        | -

-29818.871618934416
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 121600    |
|    time_elapsed       | 955       |
|    total_timesteps    | 608000    |
| train/                |           |
|    entropy_loss       | -7.16     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 121599    |
|    policy_loss        | -5.51e+03 |
|    std                | 1.45      |
|    value_loss         | 5.58e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 121700   |
|    time_elapsed       | 956      |
|    total_timesteps    | 608500   |
| train/                |          |
|    entropy_loss       | -7.17    |
|    explained_variance | -0.00045 |
|    learning_rate      | 0.0001   |
|    n_updates          | 121699   |
|  

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 123100    |
|    time_elapsed       | 967       |
|    total_timesteps    | 615500    |
| train/                |           |
|    entropy_loss       | -7.19     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 123099    |
|    policy_loss        | -2.58e+03 |
|    std                | 1.46      |
|    value_loss         | 2.05e+05  |
-------------------------------------
-27072.312984007203
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 123200    |
|    time_elapsed       | 968       |
|    total_timesteps    | 616000    |
| train/                |           |
|    entropy_loss       | -7.19     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0001    |
|    n_updates          | 1231

-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 124600    |
|    time_elapsed       | 979       |
|    total_timesteps    | 623000    |
| train/                |           |
|    entropy_loss       | -7.2      |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 124599    |
|    policy_loss        | -1.87e+03 |
|    std                | 1.46      |
|    value_loss         | 6.83e+04  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 124700    |
|    time_elapsed       | 980       |
|    total_timesteps    | 623500    |
| train/                |           |
|    entropy_loss       | -7.2      |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 124699    |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 636      |
|    iterations         | 126100   |
|    time_elapsed       | 991      |
|    total_timesteps    | 630500   |
| train/                |          |
|    entropy_loss       | -7.22    |
|    explained_variance | -2.89    |
|    learning_rate      | 0.0001   |
|    n_updates          | 126099   |
|    policy_loss        | -2.97    |
|    std                | 1.47     |
|    value_loss         | 0.216    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 636       |
|    iterations         | 126200    |
|    time_elapsed       | 991       |
|    total_timesteps    | 631000    |
| train/                |           |
|    entropy_loss       | -7.23     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0001    |
|    n_updates          | 126199    |
|    policy_loss        | -

-10712.665381371611
-------------------------------------
| time/                 |           |
|    fps                | 635       |
|    iterations         | 127600    |
|    time_elapsed       | 1003      |
|    total_timesteps    | 638000    |
| train/                |           |
|    entropy_loss       | -7.25     |
|    explained_variance | 0         |
|    learning_rate      | 0.0001    |
|    n_updates          | 127599    |
|    policy_loss        | -3.69e+03 |
|    std                | 1.48      |
|    value_loss         | 3.87e+05  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 635      |
|    iterations         | 127700   |
|    time_elapsed       | 1004     |
|    total_timesteps    | 638500   |
| train/                |          |
|    entropy_loss       | -7.25    |
|    explained_variance | 0.2      |
|    learning_rate      | 0.0001   |
|    n_updates          | 127699   |
|  

In [22]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    env.target = np.array([[1],[1],[1]], dtype=np.float32)

    done = False
    score = 0
    
    while not done:
        #action = env.action_space.sample()
        action, _state = model.predict(obs, deterministic=True)
        #print(action)
        #print(action[0])
        #action = [[1, 1, 1, 1]]
        #print(action)        
        
        obs, reward, done, info = env.step(action)
        #data = [obs[]]
        #print(n_state)
        score +=reward
        #print("x",n_state[0].T)
        #print(x.T)
        #break
        #print("action", action, "reward", reward)
        print(obs[0][0:3])
        
        
    #print(n_state)
    
    #print("score:",obs, score)
    #print("state",state)

[1.        1.        0.9999347]
[1.       1.       0.999804]
[1.        1.        0.9996082]
[1.        1.        0.9993473]
[1.        1.        0.9990215]
[1.         1.         0.99863064]
[1.       1.       0.998175]
[1.        1.        0.9976547]
[1.        1.        0.9970696]
[1.         1.         0.99641997]
[1.        1.        0.9957059]
[1.         1.         0.99492735]
[1.        1.        0.9940845]
[1.        1.        0.9931774]
[1.        1.        0.9922061]
[1.         1.         0.99117076]
[1.        1.        0.9900714]
[1.        1.        0.9889081]
[1.       1.       0.987681]
[1.         1.         0.98639005]
[1.        1.        0.9850355]
[1.        1.        0.9836173]
[1.         1.         0.98213553]
[1.         1.         0.98059034]
[1.        1.        0.9789818]
[1.         1.         0.97730994]
[1.         1.         0.97557485]
[1.        1.        0.9737766]
[1.         1.         0.97191525]
[1.         1.         0.96999097]
[1.        1.   

[ 0.9999628  1.0002438 -1.417358 ]
[ 0.9999624  1.0002469 -1.4336045]
[ 0.99996203  1.0002501  -1.4499002 ]
[ 0.9999617  1.0002533 -1.4662452]
[ 0.9999613  1.0002565 -1.4826392]
[ 0.9999609  1.0002599 -1.4990824]
[ 0.99996054  1.0002632  -1.5155747 ]
[ 0.9999602  1.0002666 -1.5321158]
[ 0.9999598  1.0002699 -1.5487059]
[ 0.9999594  1.0002732 -1.565345 ]
[ 0.99995905  1.0002767  -1.5820328 ]
[ 0.9999587  1.0002801 -1.5987694]
[ 0.99995834  1.0002836  -1.6155547 ]
[ 0.999958   1.000287  -1.6323886]
[ 0.9999576  1.0002906 -1.649271 ]
[ 0.99995726  1.0002942  -1.6662021 ]
[ 0.9999569  1.0002978 -1.6831815]
[ 0.99995655  1.0003015  -1.7002095 ]
[ 0.9999562  1.000305  -1.7172858]
[ 0.99995583  1.0003088  -1.7344103 ]
[ 0.99995553  1.0003124  -1.7515832 ]
[ 0.9999552  1.0003163 -1.7688042]
[ 0.9999548  1.0003201 -1.7860733]
[ 0.9999545  1.0003239 -1.8033905]
[ 0.99995416  1.0003277  -1.8207557 ]
[ 0.99995387  1.0003315  -1.838169  ]
[ 0.9999535  1.0003355 -1.85563  ]
[ 0.9999532  1.0003394 -1

[ 1.0005864  1.0023315 -8.146438 ]
[ 1.0005925  1.0023404 -8.174962 ]
[ 1.0005987  1.0023493 -8.203522 ]
[ 1.0006047  1.0023581 -8.232115 ]
[ 1.000611   1.0023669 -8.260742 ]
[ 1.0006171  1.0023757 -8.289405 ]
[ 1.0006233  1.0023845 -8.318101 ]
[ 1.0006297  1.0023934 -8.34683  ]
[ 1.0006359  1.0024022 -8.375595 ]
[ 1.0006422  1.002411  -8.404393 ]
[ 1.0006485  1.0024198 -8.433226 ]
[ 1.0006548  1.0024287 -8.462091 ]
[ 1.0006613  1.0024374 -8.490993 ]
[ 1.0006676  1.0024462 -8.519926 ]
[ 1.000674  1.002455 -8.548894]
[ 1.0006804  1.0024637 -8.577896 ]
[ 1.0006869  1.0024725 -8.606931 ]
[ 1.0006933  1.0024812 -8.636001 ]
[ 1.0006999  1.00249   -8.665103 ]
[ 1.0007063  1.0024987 -8.69424  ]
[ 1.0007129  1.0025074 -8.72341  ]
[ 1.0007194  1.0025163 -8.752613 ]
[ 1.000726  1.002525 -8.78185 ]
[ 1.0007325  1.0025337 -8.811121 ]
[ 1.0007392  1.0025424 -8.840425 ]
[ 1.0007458  1.0025511 -8.869761 ]
[ 1.0007524  1.0025598 -8.899133 ]
[ 1.0007591  1.0025685 -8.928536 ]
[ 1.0007658  1.0025772 -8.

[  1.0033199   1.0049556 -18.476555 ]
[  1.0033323   1.0049636 -18.513887 ]
[  1.0033447   1.0049716 -18.551245 ]
[  1.0033572   1.0049795 -18.588623 ]
[  1.0033696   1.0049875 -18.626024 ]
[  1.0033821   1.0049953 -18.663448 ]
[  1.0033946   1.0050032 -18.700895 ]
[  1.0034071   1.0050111 -18.738365 ]
[  1.0034198   1.005019  -18.775856 ]
[  1.0034324   1.0050268 -18.813372 ]
[  1.003445    1.0050347 -18.850908 ]
[  1.0034577   1.0050424 -18.888468 ]
[  1.0034704   1.0050503 -18.92605  ]
[  1.0034832   1.005058  -18.963654 ]
[  1.0034959   1.0050658 -19.001282 ]
[  1.0035088   1.0050735 -19.03893  ]
[  1.0035216   1.0050813 -19.076601 ]
[  1.0035344   1.0050889 -19.114296 ]
[  1.0035474   1.0050967 -19.152012 ]
[  1.0035603   1.0051043 -19.189749 ]
[  1.0035733   1.0051119 -19.22751  ]
[  1.0035863   1.0051196 -19.265293 ]
[  1.0035993   1.0051272 -19.303097 ]
[  1.0036124   1.0051348 -19.340925 ]
[  1.0036255   1.0051423 -19.378773 ]
[  1.0036386   1.0051498 -19.416645 ]
[  1.0036517

[  1.0077007   1.0059403 -29.541464 ]
[  1.0077195   1.0059375 -29.583956 ]
[  1.0077384   1.0059346 -29.626463 ]
[  1.0077573   1.0059317 -29.668985 ]
[  1.0077761   1.0059288 -29.711521 ]
[  1.0077951   1.0059257 -29.754072 ]
[  1.007814    1.0059226 -29.796637 ]
[  1.007833    1.0059195 -29.839216 ]
[  1.0078521   1.0059162 -29.881811 ]
[  1.007871   1.005913 -29.92442 ]
[  1.0078901   1.0059097 -29.967043 ]
[  1.0079093   1.0059063 -30.00968  ]
[  1.0079284   1.0059029 -30.052332 ]
[  1.0079476   1.0058994 -30.094997 ]
[  1.0079668   1.0058959 -30.137678 ]
[  1.007986    1.0058923 -30.180372 ]
[  1.0080053   1.0058886 -30.223082 ]
[  1.0080246   1.0058849 -30.265804 ]
[  1.0080439   1.0058812 -30.30854  ]
[  1.0080632   1.0058774 -30.351292 ]
[  1.0080826   1.0058734 -30.394058 ]
[  1.008102    1.0058695 -30.436836 ]
[  1.0081215   1.0058656 -30.479631 ]
[  1.008141    1.0058615 -30.522438 ]
[  1.0081606   1.0058575 -30.56526  ]
[  1.0081801   1.0058533 -30.608095 ]
[  1.0081997   

[  1.0143256   1.0041486 -41.19418  ]
[  1.0143567   1.0041423 -41.239742 ]
[  1.0143878   1.0041361 -41.285316 ]
[  1.014419    1.0041299 -41.3309   ]
[  1.0144501   1.0041237 -41.376488 ]
[  1.0144813   1.0041176 -41.42209  ]
[  1.0145125   1.0041114 -41.467697 ]
[  1.0145438   1.0041053 -41.513317 ]
[  1.014575    1.0040992 -41.558945 ]
[  1.0146064   1.0040932 -41.604584 ]
[  1.0146377   1.0040872 -41.65023  ]
[  1.0146691   1.0040811 -41.695885 ]
[  1.0147004   1.0040752 -41.74155  ]
[  1.0147319   1.0040692 -41.787224 ]
[  1.0147634   1.0040632 -41.83291  ]
[  1.0147948   1.0040573 -41.8786   ]
[  1.0148263   1.0040513 -41.9243   ]
[  1.0148579   1.0040455 -41.970013 ]
[  1.0148895   1.0040395 -42.01573  ]
[  1.0149211   1.0040337 -42.06146  ]
[  1.0149527   1.0040278 -42.107197 ]
[  1.0149844   1.0040221 -42.152943 ]
[  1.0150161   1.0040163 -42.1987   ]
[  1.0150478   1.0040106 -42.244465 ]
[  1.0150795   1.0040048 -42.290237 ]
[  1.0151113   1.0039991 -42.33602  ]
[  1.0151432

[  1.0234269   1.0039061 -53.60551  ]
[  1.0234618   1.0039123 -53.653156 ]
[  1.0234966   1.0039186 -53.70081  ]
[  1.0235314   1.003925  -53.748466 ]
[  1.0235662   1.0039313 -53.79613  ]
[  1.0236012   1.0039377 -53.843803 ]
[  1.023636    1.0039443 -53.891483 ]
[  1.0236708   1.0039508 -53.939167 ]
[  1.0237056   1.0039574 -53.98686  ]
[  1.0237404   1.0039641 -54.034557 ]
[  1.0237752   1.0039707 -54.08226  ]
[  1.02381     1.0039775 -54.129974 ]
[  1.0238447   1.0039843 -54.17769  ]
[  1.0238795   1.0039912 -54.225414 ]
[  1.0239143   1.0039982 -54.273144 ]
[  1.023949    1.0040051 -54.32088  ]
[  1.0239838   1.0040122 -54.368626 ]
[  1.0240186   1.0040193 -54.416378 ]
[  1.0240533   1.0040264 -54.464134 ]
[  1.0240881   1.0040336 -54.511898 ]
[  1.0241228   1.0040408 -54.559666 ]
[  1.0241575   1.0040482 -54.607445 ]
[  1.0241923   1.0040556 -54.655224 ]
[  1.024227   1.004063 -54.703014]
[  1.0242617   1.0040705 -54.75081  ]
[  1.0242964   1.004078  -54.79861  ]
[  1.0243311   

[  1.0318413   1.0081058 -66.736755 ]
[  1.031864    1.0081297 -66.78595  ]
[  1.0318868   1.0081537 -66.835144 ]
[  1.0319096   1.0081776 -66.884346 ]
[  1.0319322   1.0082016 -66.933556 ]
[  1.0319548   1.0082257 -66.98277  ]
[  1.0319772   1.0082498 -67.03199  ]
[  1.0319996   1.008274  -67.081215 ]
[  1.0320219   1.008298  -67.13044  ]
[  1.0320442   1.0083222 -67.17967  ]
[  1.0320663   1.0083466 -67.22891  ]
[  1.0320884   1.0083708 -67.27815  ]
[  1.0321105   1.0083951 -67.3274   ]
[  1.0321324   1.0084195 -67.37665  ]
[  1.0321542   1.0084438 -67.42591  ]
[  1.032176    1.0084683 -67.47517  ]
[  1.0321977   1.0084927 -67.52444  ]
[  1.0322193   1.0085173 -67.57371  ]
[  1.0322409   1.0085418 -67.622986 ]
[  1.0322623   1.0085664 -67.672264 ]
[  1.0322838   1.0085909 -67.72155  ]
[  1.0323051   1.0086156 -67.77084  ]
[  1.0323263   1.0086403 -67.82014  ]
[  1.0323476   1.0086651 -67.86944  ]
[  1.0323687   1.0086898 -67.91874  ]
[  1.0323896   1.0087146 -67.968056 ]
[  1.0324106

In [41]:
import numpy as np

#from numpy.linalg import norm


a = np.array([[0],[0],[0]])
b = np.array([[0],[1],[1]])
dist = np.linalg.norm(a-b)

print((dist))
#print(a-b)

1.4142135623730951


In [105]:
model_path = os.path.join('Trained','first_MAIN19_')
model.save(model_path)

In [50]:
model_path = os.path.join('Trained','first_MAIN')
model = A2C.load(model_path, env=env)

FileNotFoundError: [Errno 2] No such file or directory: 'Trained2\\first.zip'

In [76]:
import time
#log_path = os.path.join('training', 'Logs')
env = quadsim()
env = DummyVecEnv([lambda: env]) 
model = A2C('MlpPolicy', env)
st = time.time()
model.learn(total_timesteps=10e5)
et = time.time()
#learn(total_timesteps, callback=None, log_interval=100, tb_log_name='run', reset_num_timesteps=True)
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Execution time: 1761.5834798812866 seconds


In [57]:
episodes = 10
for episode in range(1, episodes+1):
    obs = env.reset()
    env.target = np.array([[0],[0],[1]], dtype=np.float32)

    done = False
    score = 0
    
    while not done:
        #action = env.action_space.sample()
        action, _state = model.predict(obs, deterministic=True)
        #print(env.nest_step)
        obs, reward, done, info = env.step(action)
        #data = [obs[]]
        #print(n_state)
        score +=reward
        #print("x",n_state[0].T)
        #print(x.T)
        #break
        
        
    #print(n_state)
    
    print("score:",obs, score)
    #print("state",state)

score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]
score: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [-30903.152]


In [33]:
score = 0
obs = env.reset()
for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    score +=reward
    print(score,env.)
    if done:
      obs = env.reset()

[-17.492851] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-34.985695] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-52.478527] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-69.971344] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-87.46414] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-104.95691] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-122.44965] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-139.94237] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-157.43504] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-174.92767] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv o

[-2133.114] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2150.5852] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2168.0562] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2185.5269] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2202.9973] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2220.4675] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2237.9375] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2255.4075] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2272.8772] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-2290.3467] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv o

[-4421.812] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4439.3076] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4456.804] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4474.302] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4491.8003] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4509.3] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4526.8003] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4544.302] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4561.804] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-4579.3076] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object a

[-6782.5303] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6800.3467] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6818.1675] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6835.992] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6853.8213] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6871.655] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6889.4927] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6907.335] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6925.182] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-6943.0337] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv obje

[-9110.529] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9129.205] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9147.891] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9166.586] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9185.291] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9204.005] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9222.729] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9241.462] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9260.205] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-9278.958] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 

[-11716.373] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11736.751] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11757.1455] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11777.556] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11797.982] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11818.425] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11838.884] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11859.359] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11879.852] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-11900.36] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv 

[-14541.505] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14564.421] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14587.36] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14610.322] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14633.307] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14656.313] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14679.344] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14702.396] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14725.473] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-14748.571] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv o

[-17700.865] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17727.08] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17753.324] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17779.598] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17805.898] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17832.229] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17858.586] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17884.973] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17911.389] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x0000022EB1B34B20>
[-17937.832] <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv o

In [121]:
env = quadsim()
env.observation_space

Box([-20. -20. -20.], [20. 20. 20.], (3,), float32)

In [51]:
episodes = 10
for episode in range(1, episodes+1):
    target = np.array([[0],[0],[0]], dtype=np.float32)
    state = env.reset()
    #print("state",state)
    done = False
    score = 0
    #print(episode)
    while not done:
        #action = env.action_space.sample()
        action = [1,1,1,1]
        obs, reward, done, info = env.step(action)
        #print(done)
        #print(n_state)
        score +=reward
        #print("x",n_state[0].T)
        #print(x.T)
        #break
        
        
    #print(n_state)
    print("score:",score)

score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312
score: -47380.81524140312


In [16]:
log_path = os.path.join('training', 'Logs')

In [57]:

env = DummyVecEnv([lambda: env]) tensorboard_log=log_path
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path )

Using cpu device


In [23]:
env1 = gym.make('CartPole-v0')
env

<__main__.quadsim at 0x206a3f2f250>

In [24]:
states = env.observation_space.shape
actions = env.action_space.shape

In [49]:
env.action_space
#env.action_space

Box([300. 300. 300. 300.], [700. 700. 700. 700.], (4,), float32)

In [35]:
#env.reset([[0],[0],[0]])
env.step(np.array([[700],[700],[700],[700]], dtype=np.float32))

(array([[[ 6.6867224e-03],
         [-1.2483347e-02],
         [ 5.0455692e+01]],
 
        [[ 3.2206514e-04],
         [-3.0418320e-03],
         [ 7.6259503e+00]],
 
        [[ 8.5229343e-03],
         [ 2.9264606e-04],
         [ 1.2678874e+02]],
 
        [[ 9.6653357e-02],
         [ 3.4174559e-01],
         [ 2.4449106e+01]]], dtype=float32),
 -50.455696,
 False,
 array([[ 6.68672235e-03],
        [-1.24833469e-02],
        [ 5.04556928e+01]]),
 {})

In [40]:
def build_model(states,actions):
    model = Sequential()
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions[0], activation='linear'))
    return model


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [37]:
model = build_model(states,actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 3, 24)             48        
                                                                 
 dense_1 (Dense)             (None, 3, 24)             600       
                                                                 
 dense_2 (Dense)             (None, 3, 4)              100       
                                                                 
Total params: 748
Trainable params: 748
Non-trainable params: 0
_________________________________________________________________


In [117]:
state = np.array([[[1],[0.0],[0.0]],[[0],[0],[0]],[[0],[0],[0]],[[0],[0],[2]]] ,dtype=np.float32)
print(state)
x = state[3]
print(x)


[[[1.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [2.]]]
[[0.]
 [0.]
 [2.]]


In [66]:
t1 = np.array([[0.],[0.],[0.]])
t2 = np.array([[0.],[0.],[0.]])
cross(t1.T,t2.T).T

array([[0.],
       [0.],
       [0.]])

In [33]:
[[0]
 [0]
 [0]].T

TypeError: 'int' object is not subscriptable

In [62]:
np.diag([1,5,0])

array([[1, 0, 0],
       [0, 5, 0],
       [0, 0, 0]])

In [128]:
dot([[1,1],[1,1]],[[1,0],[1,0]])

array([[2, 0],
       [2, 0]])

In [199]:
state = np.array([[1,2,3,0,0,0,0,0,0,0,0,0]])




[[1 2 3]]


In [152]:
state = np.array([[0,0,0,0,0,0,0,0,0,0,0,0]])
state[0,0:3] = [1,1,1]
print(state)

[[1 1 1 0 0 0 0 0 0 0 0 0]]


In [143]:
T = np.array([[0.    ],
 [0.    ],
 [0.0009]])

np.dot((1 / 0.5) , T)

array([[0.    ],
       [0.    ],
       [0.0018]])

In [194]:
a = np.array([[1,1,1]])
LA.norm(a)

1.7320508075688772

In [115]:
random.randint(-20,+20)

-4

In [125]:
env.state[0]

array([[0.e+00],
       [0.e+00],
       [5.e-05]], dtype=float32)

In [126]:
LA.norm(env.target - env.state[0])

13.928385

In [45]:
from scipy.integrate import ode

y0, t0 = 0, 0

def f(y,t):
    return y*t


r = ode(f).set_integrator('vode', method='bdf')
r.set_initial_value(y0, t0).set_f_params(1,r.t)
dt = 1

In [46]:
while r.successful() and r.t < t1:
    print(r.t+dt, r.integrate(r.t+dt))


1 [0.]
2.0 [0.]
3.0 [0.]
4.0 [0.]
5.0 [0.]
6.0 [0.]
7.0 [0.]
8.0 [0.]
9.0 [0.]
10.0 [0.]


In [42]:
import time
import datetime
time = datetime.datetime.now()

In [44]:
print (time)


2022-06-13 20:39:35.949682


In [42]:
import numpy as np
a = np.array([[1],[11],[1]],dtype=np.float32)
b = np.array([[1],[2],[1]],dtype=np.float32)
b - a
print((b-a).mean())

-3.0


In [28]:
print(a[1][0])
print(a[1,0])

11.0
11.0


NameError: name 'self' is not defined

In [65]:
a = max([[3],[2]],[[2],[0]]).all()
print(a)

AttributeError: 'list' object has no attribute 'all'