In [9]:
import argparse
import gym
import gymfc
import numpy as np
from mpi4py import MPI
import math
import os
import time


import nest_asyncio
nest_asyncio.apply()

In [10]:
current_dir = os.getcwd()
print(current_dir)
config_path = os.path.join(current_dir,
                               "../configs/iris.config")
print(config_path)
os.environ["GYMFC_CONFIG"] = config_path

/home/ec2-user/nti/experiments_bezvershenko/controllers
/home/ec2-user/nti/experiments_bezvershenko/controllers/../configs/iris.config


In [11]:
class Policy(object):
    def action(self, state, sim_time=0, desired=np.zeros(3), actual=np.zeros(3)):
        pass

    def reset(self):
        pass


class PIDPolicy(Policy):
    def __init__(self):
        self.r = [2, 10, 0.005]
        self.p = [10, 10, 0.005]
        self.y = [4, 50, 0.0]
        self.controller = PIDController(pid_roll=self.r, pid_pitch=self.p, pid_yaw=self.y)

    def action(self, state, sim_time=0, desired=np.zeros(3), actual=np.zeros(3)):
        # Convert to degrees
        desired = list(map(math.degrees, desired))
        actual = list(map(math.degrees, actual))
        motor_values = np.array(self.controller.calculate_motor_values(sim_time, desired, actual))
        # Need to scale from 1000-2000 to -1:1
        return np.array([(m - 1000) / 500 - 1 for m in motor_values])

    def reset(self):
        self.controller = PIDController(pid_roll=self.r, pid_pitch=self.p, pid_yaw=self.y)
        


class PIDController(object):
    FD_ROLL = 0
    FD_PITCH = 1
    FD_YAW = 2
    PTERM_SCALE = 0.032029
    ITERM_SCALE = 0.244381
    DTERM_SCALE = 0.000529
    minthrottle = 1070
    maxthrottle = 2000

    def __init__(self, pid_roll=[40, 40, 30], pid_pitch=[58, 50, 35], pid_yaw=[80, 45, 20], itermLimit=150):

        # init gains and scale
        self.Kp = [pid_roll[0], pid_pitch[0], pid_yaw[0]]
        self.Kp = [self.PTERM_SCALE * p for p in self.Kp]

        self.Ki = [pid_roll[1], pid_pitch[1], pid_yaw[1]]
        self.Ki = [self.ITERM_SCALE * i for i in self.Ki]

        self.Kd = [pid_roll[2], pid_pitch[2], pid_yaw[2]]
        self.Kd = [self.DTERM_SCALE * d for d in self.Kd]

        self.itermLimit = itermLimit

        self.previousRateError = [0] * 3
        self.previousTime = 0
        self.previous_motor_values = [self.minthrottle] * 4
        self.pid_rpy = [PID(*pid_roll), PID(*pid_pitch), PID(*pid_yaw)]

    def calculate_motor_values(self, current_time, sp_rates, gyro_rates):
        rpy_sums = []
        for i in range(3):
            self.pid_rpy[i].SetPoint = sp_rates[i]
            self.pid_rpy[i].update(current_time, gyro_rates[i])
            rpy_sums.append(self.pid_rpy[i].output)
        return self.mix(*rpy_sums)

    def constrainf(self, amt, low, high):
        # From BF src/main/common/maths.h
        if amt < low:
            return low
        elif amt > high:
            return high
        else:
            return amt

    def mix(self, r, p, y):
        PID_MIXER_SCALING = 1000.0
        pidSumLimit = 10000.  # 500
        pidSumLimitYaw = 100000.  # 1000.0#400
        motorOutputMixSign = 1
        motorOutputRange = self.maxthrottle - self.minthrottle  # throttle max - throttle min
        motorOutputMin = self.minthrottle

        currentMixer = [
            [1.0, -1.0, 0.598, -1.0],  # REAR_R
            [1.0, -0.927, -0.598, 1.0],  # RONT_R
            [1.0, 1.0, 0.598, 1.0],  # REAR_L
            [1.0, 0.927, -0.598, -1.0],  # RONT_L
        ]
        mixer_index_throttle = 0
        mixer_index_roll = 1
        mixer_index_pitch = 2
        mixer_index_yaw = 3

        scaledAxisPidRoll = self.constrainf(r, -pidSumLimit, pidSumLimit) / PID_MIXER_SCALING
        scaledAxisPidPitch = self.constrainf(p, -pidSumLimit, pidSumLimit) / PID_MIXER_SCALING
        scaledAxisPidYaw = self.constrainf(y, -pidSumLimitYaw, pidSumLimitYaw) / PID_MIXER_SCALING
        scaledAxisPidYaw = -scaledAxisPidYaw

        # Find roll/pitch/yaw desired output
        motor_count = 4
        motorMix = [0] * motor_count
        motorMixMax = 0
        motorMixMin = 0
        # No additional throttle, in air mode
        throttle = 0
        motorRangeMin = 1000
        motorRangeMax = 2000

        for i in range(motor_count):
            mix = (scaledAxisPidRoll * currentMixer[i][1] +
                   scaledAxisPidPitch * currentMixer[i][2] +
                   scaledAxisPidYaw * currentMixer[i][3])

            if mix > motorMixMax:
                motorMixMax = mix
            elif mix < motorMixMin:
                motorMixMin = mix
            motorMix[i] = mix

        motorMixRange = motorMixMax - motorMixMin
        # print("range=", motorMixRange)

        if motorMixRange > 1.0:
            for i in range(motor_count):
                motorMix[i] /= motorMixRange
            # Get the maximum correction by setting offset to center when airmode enabled
            throttle = 0.5

        else:
            # Only automatically adjust throttle when airmode enabled. Airmode logic is always active on high throttle
            throttleLimitOffset = motorMixRange / 2.0
            throttle = self.constrainf(throttle, 0.0 + throttleLimitOffset, 1.0 - throttleLimitOffset)

        motor = []
        for i in range(motor_count):
            motorOutput = motorOutputMin + (motorOutputRange * (
                    motorOutputMixSign * motorMix[i] + throttle * currentMixer[i][mixer_index_throttle]))
            motorOutput = self.constrainf(motorOutput, motorRangeMin, motorRangeMax);
            motor.append(motorOutput)

        motor = list(map(int, np.round(motor)))
        return motor

    def is_airmode_active(self):
        return True

    def reset(self):
        for pid in self.pid_rpy:
            pid.clear()
    
class PID:
    """PID Controller
    """

    def __init__(self, P=0.2, I=0.0, D=0.0):

        self.Kp = P
        self.Ki = I
        self.Kd = D

        self.sample_time = 0.00
        self.current_time = 0
        self.last_time = self.current_time

        self.clear()

    def clear(self):
        """Clears PID computations and coefficients"""
        self.SetPoint = 0.0

        self.PTerm = 0.0
        self.ITerm = 0.0
        self.DTerm = 0.0
        self.last_error = 0.0

        # Windup Guard
        self.int_error = 0.0
        self.windup_guard = 20.0

        self.output = 0.0

    def update(self, current_time, feedback_value):
        """Calculates PID value for given reference feedback

        .. math::
            u(t) = K_p e(t) + K_i \int_{0}^{t} e(t)dt + K_d {de}/{dt}

        .. figure:: images/pid_1.png
           :align:   center

           Test PID with Kp=1.2, Ki=1, Kd=0.001 (test_pid.py)

        """
        error = self.SetPoint - feedback_value

        delta_time = current_time - self.last_time
        delta_error = error - self.last_error

        if (delta_time >= self.sample_time):
            self.PTerm = self.Kp * error
            self.ITerm += error * delta_time

            if (self.ITerm < -self.windup_guard):
                self.ITerm = -self.windup_guard
            elif (self.ITerm > self.windup_guard):
                self.ITerm = self.windup_guard

            self.DTerm = 0.0
            if delta_time > 0:
                self.DTerm = delta_error / delta_time

            # Remember last time and last error for next calculation
            self.last_time = current_time
            self.last_error = error

            # print("P=", self.PTerm, " I=", self.ITerm, " D=", self.DTerm)
            self.output = self.PTerm + (self.Ki * self.ITerm) + (self.Kd * self.DTerm)

    def setKp(self, proportional_gain):
        """Determines how aggressively the PID reacts to the current error with setting Proportional Gain"""
        self.Kp = proportional_gain

    def setKi(self, integral_gain):
        """Determines how aggressively the PID reacts to the current error with setting Integral Gain"""
        self.Ki = integral_gain

    def setKd(self, derivative_gain):
        """Determines how aggressively the PID reacts to the current error with setting Derivative Gain"""
        self.Kd = derivative_gain

    def setWindup(self, windup):
        """Integral windup, also known as integrator windup or reset windup,
        refers to the situation in a PID feedback controller where
        a large change in setpoint occurs (say a positive change)
        and the integral terms accumulates a significant error
        during the rise (windup), thus overshooting and continuing
        to increase as this accumulated error is unwound
        (offset by errors in the other direction).
        The specific problem is the excess overshooting.
        """
        self.windup_guard = windup

    def setSampleTime(self, sample_time):
        """PID that should be updated at a regular interval.
        Based on a pre-determined sampe time, the PID decides if it should compute or return immediately.
        """
        self.sample_time = sample_time


In [14]:
def do(env, pi):
    actuals = []
    desireds = []
    pi.reset()
    ob = env.reset()
    ticks = 0
    while True:
        desired = env.omega_target
        actual = env.omega_actual

        ac = pi.action(ob, env.sim_time, desired, actual)

        ob, reward, done, info = env.step(ac)
        
        print()
        print('Tick #'+str(ticks))
        print('='*20)
        print('Need:', desired)
        print('Right now:', actual)
        print('Is done:', done)
        print('Reward:', reward)
        print('='*20)
        print()
        
        actuals.append(actual)
        desireds.append(desired)
        ticks += 1
        
        
        if done:
            break
            
    env.close()
    return desireds, actuals, ticks

In [15]:
def main(env_id='AttFC_GyroErr-MotorVel_M4_Ep-v0', seed=17):
    env = gym.make(env_id)
    
    rank = MPI.COMM_WORLD.Get_rank()
    workerseed = seed + 1000000 * rank
    env.seed(workerseed)
    
    pi = PIDPolicy()
    desireds, actuals, ticks = do(env, pi)
    
    print('Final ticks', ticks)
    

In [16]:
main()

Starting gzserver with process ID= 7521

Tick #0
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 0. -0. -0.]
Is done: False
Reward: -0.5192675764115963


Tick #1
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 0. -0. -0.]
Is done: False
Reward: -0.5192675764115963


Tick #2
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 2.81049352e-18  2.28073058e-18 -2.94862663e-21]
Is done: False
Reward: -0.5148286245170549


Tick #3
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.08870226e-18 2.22031817e-07 8.36722456e-02]
Is done: False
Reward: -0.5104903856165897


Tick #4
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.00712254e-04 4.27950463e-05 1.65303028e-01]
Is done: False
Reward: -0.5062358211519692


Tick #5
Need: [2.65125294 2.32801414 4.80871902]
Right now: [4.94123942e-04 2.08263809e-04 2.44940986e-01]
Is done: False
Reward: -0.5020490023472447


Tick #6
Need: [2.65125294 2.32801414 4.80871902]
Right now: [0.00135811 0.00057085 0.32263428]
Is done: False



Tick #52
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.36785275 0.66338007 2.75855709]
Is done: False
Reward: -0.25982093630153164


Tick #53
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.39295096 0.69072731 2.80678712]
Is done: False
Reward: -0.2540643077196885


Tick #54
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.42056049 0.72403488 2.85438016]
Is done: False
Reward: -0.2479261480956251


Tick #55
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.45451536 0.75854688 2.90161515]
Is done: False
Reward: -0.24157393313579265


Tick #56
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.49707863 0.78916251 2.94817296]
Is done: False
Reward: -0.23532719030147542


Tick #57
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.54637413 0.81173554 2.99405304]
Is done: False
Reward: -0.22951851429576314


Tick #58
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.59720762 0.82520857 3.03923773]
Is done: False
Reward: -0.2242968125758729


Tick #59



Tick #103
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.54795043 1.56174807 4.6029739 ]
Is done: False
Reward: -0.05439161882151497


Tick #104
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.54101202 1.59329833 4.6284155 ]
Is done: False
Reward: -0.05050258489015724


Tick #105
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.56154469 1.62026655 4.65422134]
Is done: False
Reward: -0.046459286767109184


Tick #106
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60071487 1.63078542 4.68074685]
Is done: False
Reward: -0.043428151820053114


Tick #107
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63934816 1.62264908 4.70738558]
Is done: False
Reward: -0.04278708027095822


Tick #108
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65949311 1.60541097 4.73304301]
Is done: False
Reward: -0.04184676366172185


Tick #109
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65563808 1.59505755 4.75726599]
Is done: False
Reward: -0.040612827248697


Tick #152
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.88382974 1.9879688  5.58012406]
Is done: False
Reward: -0.07356288117186588


Tick #153
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.89664056 1.97562735 5.59757551]
Is done: False
Reward: -0.07347072882952405


Tick #154
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.87624617 1.96924162 5.60984714]
Is done: False
Reward: -0.07130293726319267


Tick #155
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.83875656 1.98101859 5.61825171]
Is done: False
Reward: -0.06873714217986317


Tick #156
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.80999412 2.01031861 5.62794995]
Is done: False
Reward: -0.06760251344370255


Tick #157
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.80780818 2.04345823 5.64188821]
Is done: False
Reward: -0.06873240697880065


Tick #158
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.8311689  2.06278068 5.65914799]
Is done: False
Reward: -0.07168746079197048


Is done: False
Reward: -0.0553154066081226


Tick #202
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58545583 2.36974734 5.743862  ]
Is done: False
Reward: -0.056940370446483334


Tick #203
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.57126165 2.39136432 5.73868076]
Is done: False
Reward: -0.0568854616779684


Tick #204
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58272918 2.40859412 5.73188348]
Is done: False
Reward: -0.054986014648460486


Tick #205
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6130258  2.41168062 5.72328979]
Is done: False
Reward: -0.051867446230200814


Tick #206
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6479953  2.39728824 5.7138679 ]
Is done: False
Reward: -0.05090802867691155


Tick #207
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67269591 2.37003679 5.70484938]
Is done: False
Reward: -0.049253748469434686


Tick #208
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67811688 2.3403266  5.6979560

Is done: False
Reward: -0.036394078191537685


Tick #252
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66640258 2.35702933 5.45056802]
Is done: False
Reward: -0.038301962586627665


Tick #253
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69099274 2.37336557 5.44560447]
Is done: False
Reward: -0.04012032664397037


Tick #254
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72271621 2.38143416 5.44008785]
Is done: False
Reward: -0.04124006146744427


Tick #255
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.75188427 2.37970063 5.43375987]
Is done: False
Reward: -0.04135952566532531


Tick #256
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.76934706 2.37024203 5.42800754]
Is done: False
Reward: -0.04043236271286445


Tick #257
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.76967251 2.35796865 5.42247882]
Is done: False
Reward: -0.03873918527474352


Tick #258
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.75296858 2.3486866  5.4165490


Tick #300
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.78208896 2.40729154 5.24691185]
Is done: False
Reward: -0.034494322521961024


Tick #301
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.79186207 2.40290011 5.24342811]
Is done: False
Reward: -0.03392503796719731


Tick #302
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.79112934 2.39673392 5.23959624]
Is done: False
Reward: -0.03285458395602219


Tick #303
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.7801357  2.39165755 5.23548863]
Is done: False
Reward: -0.03155868661008725


Tick #304
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.76159644 2.39004928 5.231209  ]
Is done: False
Reward: -0.030373994104871658


Tick #305
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.73983329 2.39310856 5.22758189]
Is done: False
Reward: -0.030373994104871658


Tick #306
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.73983329 2.39310856 5.22758189]
Is done: False
Reward: -0.029059018968043


Tick #349
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70932805 2.39714101 5.10953728]
Is done: False
Reward: -0.022730039518295123


Tick #350
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70424053 2.40426393 5.10793379]
Is done: False
Reward: -0.022707401683748272


Tick #351
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69951459 2.41107106 5.10542589]
Is done: False
Reward: -0.022653561385839658


Tick #352
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69554157 2.41661799 5.10283712]
Is done: False
Reward: -0.02245751053685238


Tick #353
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6923823  2.41998418 5.09893472]
Is done: False
Reward: -0.02212537953231893


Tick #354
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68973091 2.42045343 5.09485632]
Is done: False
Reward: -0.021634808462659617


Tick #355
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68701229 2.4176666  5.0911147 ]
Is done: False
Reward: -0.02092933637100


Tick #399
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66359898 2.35985875 5.01136783]
Is done: False
Reward: -0.012589987931819183


Tick #400
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63973188 2.35163087 5.01089746]
Is done: False
Reward: -0.013820594564162242


Tick #401
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61345008 2.34835474 5.01108824]
Is done: False
Reward: -0.015230350049777424


Tick #402
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58934939 2.35048525 5.01143037]
Is done: False
Reward: -0.016495490327250062


Tick #403
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.5717229  2.35714503 5.01099149]
Is done: False
Reward: -0.016495490327250062


Tick #404
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.5717229  2.35714503 5.01099149]
Is done: False
Reward: -0.017598579160448404


Tick #405
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.56700376 2.37559286 5.0086173 ]
Is done: False
Reward: -0.017124372137


Tick #452
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6935132  2.3763203  4.95221556]
Is done: False
Reward: -0.012596774726124884


Tick #453
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70968592 2.36456699 4.95117736]
Is done: False
Reward: -0.01239858831382157


Tick #454
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71796442 2.35342117 4.95030895]
Is done: False
Reward: -0.01189294931763127


Tick #455
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71786526 2.34503311 4.94926507]
Is done: False
Reward: -0.011225699688367698


Tick #456
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71037335 2.34113662 4.94807608]
Is done: False
Reward: -0.010567835795993007


Tick #457
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69768264 2.34273395 4.94676899]
Is done: False
Reward: -0.010099444537792995


Tick #458
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68269556 2.34994161 4.94571943]
Is done: False
Reward: -0.00991922947882


Tick #507
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72504742 2.42044811 4.90456499]
Is done: False
Reward: -0.013705310754073987


Tick #508
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71778882 2.42397181 4.9045651 ]
Is done: False
Reward: -0.013411876746667235


Tick #509
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70931347 2.42702664 4.90445451]
Is done: False
Reward: -0.01307835422557035


Tick #510
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70027196 2.42967494 4.90456095]
Is done: False
Reward: -0.012743473196667408


Tick #511
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69147829 2.43195738 4.90475981]
Is done: False
Reward: -0.012462959063276963


Tick #512
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68382364 2.43385308 4.90523118]
Is done: False
Reward: -0.012227274698854313


Tick #513
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67817176 2.43523445 4.90505914]
Is done: False
Reward: -0.0120543648708


Tick #562
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.57952498 2.42369736 4.87609148]
Is done: False
Reward: -0.01205296054738215


Tick #563
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58902767 2.42628749 4.87541388]
Is done: False
Reward: -0.011113380582635933


Tick #564
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60298793 2.42325445 4.87469647]
Is done: False
Reward: -0.011113380582635933


Tick #565
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60298793 2.42325445 4.87469647]
Is done: False
Reward: -0.008267576134229907


Tick #566
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63383981 2.40114098 4.87401955]
Is done: False
Reward: -0.006715540088329049


Tick #567
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64562294 2.38455532 4.87313309]
Is done: False
Reward: -0.0055597473940332594


Tick #568
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65276451 2.36682149 4.87319912]
Is done: False
Reward: -0.004769404500


Tick #617
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64542976 2.36233717 4.86321475]
Is done: False
Reward: -0.004910939936482175


Tick #618
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65212284 2.3666062  4.86182632]
Is done: False
Reward: -0.004910939936482175


Tick #619
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65212284 2.3666062  4.86182632]
Is done: False
Reward: -0.005714798575489049


Tick #620
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66064733 2.37627165 4.8587888 ]
Is done: False
Reward: -0.005890910918194161


Tick #621
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66182214 2.38058626 4.85661902]
Is done: False
Reward: -0.005936518549804901


Tick #622
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66066816 2.38401789 4.85520106]
Is done: False
Reward: -0.005835751418411011


Tick #623
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65705336 2.38626322 4.8546711 ]
Is done: False
Reward: -0.005574736496


Tick #673
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71762697 2.42521993 4.84455624]
Is done: False
Reward: -0.009237937051541297


Tick #674
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70642321 2.41107403 4.84462029]
Is done: False
Reward: -0.007739067270246759


Tick #675
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69043886 2.39867028 4.84475529]
Is done: False
Reward: -0.006285834261796544


Tick #676
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67160965 2.3895311  4.84533082]
Is done: False
Reward: -0.005035012663155105


Tick #677
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65232748 2.38462783 4.84593877]
Is done: False
Reward: -0.0058254759811837645


Tick #678
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63515096 2.38426978 4.84616929]
Is done: False
Reward: -0.006730256917935182


Tick #679
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62245064 2.3881213  4.84667221]
Is done: False
Reward: -0.00744796616


Tick #727
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.59096468 2.37824973 4.83665607]
Is done: False
Reward: -0.006432440404376178


Tick #728
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60073776 2.37142599 4.83604091]
Is done: False
Reward: -0.00545101645177065


Tick #729
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61118238 2.36371409 4.83569799]
Is done: False
Reward: -0.004474640982568793


Tick #730
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62078673 2.35557223 4.83503991]
Is done: False
Reward: -0.0036304873164947366


Tick #731
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62804396 2.34771182 4.8342456 ]
Is done: False
Reward: -0.0030916023696306795


Tick #732
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63176549 2.3409984  4.83452277]
Is done: False
Reward: -0.0029234705999751777


Tick #733
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6313002  2.33634186 4.83554481]
Is done: False
Reward: -0.0031331215


Tick #782
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68491269 2.37714152 4.8318352 ]
Is done: False
Reward: -0.004700124630837561


Tick #783
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67394037 2.37055983 4.83208139]
Is done: False
Reward: -0.0035535727033809777


Tick #784
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65759208 2.36528173 4.83209572]
Is done: False
Reward: -0.00374225199031397


Tick #785
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63807697 2.3621643  4.83193284]
Is done: False
Reward: -0.0047482802713070685


Tick #786
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61793132 2.36163801 4.83127671]
Is done: False
Reward: -0.005821347164539374


Tick #787
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.59967927 2.36365969 4.83122987]
Is done: False
Reward: -0.0067965957922294686


Tick #788
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58555394 2.36775606 4.83139121]
Is done: False
Reward: -0.0075031369


Tick #837
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65030522 2.43276356 4.82692546]
Is done: False
Reward: -0.0067813768608172825


Tick #838
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66312136 2.42726928 4.82542171]
Is done: False
Reward: -0.007059537892618652


Tick #839
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67655635 2.41958093 4.82491829]
Is done: False
Reward: -0.00719695544755408


Tick #840
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68860941 2.41028012 4.82475631]
Is done: False
Reward: -0.007177242534302762


Tick #841
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69764342 2.40023768 4.82539316]
Is done: False
Reward: -0.006935722897305177


Tick #842
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70230612 2.39057502 4.82584057]
Is done: False
Reward: -0.006504510021684658


Tick #843
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70183648 2.3825359  4.82622114]
Is done: False
Reward: -0.005948803897


Tick #894
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63419438 2.35983496 4.82653924]
Is done: False
Reward: -0.0037234297461558228


Tick #895
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63347697 2.36308267 4.82605968]
Is done: False
Reward: -0.004012197809881107


Tick #896
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6307129  2.36656071 4.82526073]
Is done: False
Reward: -0.004349216600210773


Tick #897
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62646832 2.36977225 4.82415728]
Is done: False
Reward: -0.004697948640327307


Tick #898
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62131722 2.37218626 4.82316564]
Is done: False
Reward: -0.004991881954374619


Tick #899
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61574978 2.37331016 4.82201482]
Is done: False
Reward: -0.005199094299561923


Tick #900
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61014516 2.37278032 4.8208459 ]
Is done: False
Reward: -0.00532546209


Tick #949
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6218035  2.42580209 4.82225789]
Is done: False
Reward: -0.0063353253036039624


Tick #950
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64134796 2.42395708 4.82228945]
Is done: False
Reward: -0.006101571570794687


Tick #951
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66272269 2.41841097 4.82186463]
Is done: False
Reward: -0.006749068174591689


Tick #952
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68355578 2.40987942 4.82176814]
Is done: False
Reward: -0.007168874887912188


Tick #953
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70161836 2.39947497 4.8220232 ]
Is done: False
Reward: -0.007319466276292743


Tick #954
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71509469 2.38856451 4.82229592]
Is done: False
Reward: -0.007235964800168265


Tick #955
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72281602 2.3785968  4.82296833]
Is done: False
Reward: -0.00693101691