In [9]:
import argparse
import gym
import gymfc
import numpy as np
from mpi4py import MPI
import math
import os
import time


import nest_asyncio
nest_asyncio.apply()

In [10]:
current_dir = os.getcwd()
print(current_dir)
config_path = os.path.join(current_dir,
                               "../configs/iris.config")
print(config_path)
os.environ["GYMFC_CONFIG"] = config_path

/home/ec2-user/nti/gymfc/examples/controllers
/home/ec2-user/nti/gymfc/examples/controllers/../configs/iris.config


In [11]:
class Policy(object):
    def action(self, state, sim_time=0, desired=np.zeros(3), actual=np.zeros(3)):
        pass

    def reset(self):
        pass


class PIDPolicy(Policy):
    def __init__(self):
        self.r = [2, 10, 0.005]
        self.p = [10, 10, 0.005]
        self.y = [4, 50, 0.0]
        self.controller = PIDController(pid_roll=self.r, pid_pitch=self.p, pid_yaw=self.y)

    def action(self, state, sim_time=0, desired=np.zeros(3), actual=np.zeros(3)):
        # Convert to degrees
        desired = list(map(math.degrees, desired))
        actual = list(map(math.degrees, actual))
        motor_values = np.array(self.controller.calculate_motor_values(sim_time, desired, actual))
        # Need to scale from 1000-2000 to -1:1
        return np.array([(m - 1000) / 500 - 1 for m in motor_values])

    def reset(self):
        self.controller = PIDController(pid_roll=self.r, pid_pitch=self.p, pid_yaw=self.y)

In [12]:
class PIDController(object):
    FD_ROLL = 0
    FD_PITCH = 1
    FD_YAW = 2
    PTERM_SCALE = 0.032029
    ITERM_SCALE = 0.244381
    DTERM_SCALE = 0.000529
    minthrottle = 1070
    maxthrottle = 2000

    def __init__(self, pid_roll=[40, 40, 30], pid_pitch=[58, 50, 35], pid_yaw=[80, 45, 20], itermLimit=150):

        # init gains and scale
        self.Kp = [pid_roll[0], pid_pitch[0], pid_yaw[0]]
        self.Kp = [self.PTERM_SCALE * p for p in self.Kp]

        self.Ki = [pid_roll[1], pid_pitch[1], pid_yaw[1]]
        self.Ki = [self.ITERM_SCALE * i for i in self.Ki]

        self.Kd = [pid_roll[2], pid_pitch[2], pid_yaw[2]]
        self.Kd = [self.DTERM_SCALE * d for d in self.Kd]

        self.itermLimit = itermLimit

        self.previousRateError = [0] * 3
        self.previousTime = 0
        self.previous_motor_values = [self.minthrottle] * 4
        self.pid_rpy = [PID(*pid_roll), PID(*pid_pitch), PID(*pid_yaw)]

    def calculate_motor_values(self, current_time, sp_rates, gyro_rates):
        rpy_sums = []
        for i in range(3):
            self.pid_rpy[i].SetPoint = sp_rates[i]
            self.pid_rpy[i].update(current_time, gyro_rates[i])
            rpy_sums.append(self.pid_rpy[i].output)
        return self.mix(*rpy_sums)

    def constrainf(self, amt, low, high):
        # From BF src/main/common/maths.h
        if amt < low:
            return low
        elif amt > high:
            return high
        else:
            return amt

    def mix(self, r, p, y):
        PID_MIXER_SCALING = 1000.0
        pidSumLimit = 10000.  # 500
        pidSumLimitYaw = 100000.  # 1000.0#400
        motorOutputMixSign = 1
        motorOutputRange = self.maxthrottle - self.minthrottle  # throttle max - throttle min
        motorOutputMin = self.minthrottle

        currentMixer = [
            [1.0, -1.0, 0.598, -1.0],  # REAR_R
            [1.0, -0.927, -0.598, 1.0],  # RONT_R
            [1.0, 1.0, 0.598, 1.0],  # REAR_L
            [1.0, 0.927, -0.598, -1.0],  # RONT_L
        ]
        mixer_index_throttle = 0
        mixer_index_roll = 1
        mixer_index_pitch = 2
        mixer_index_yaw = 3

        scaledAxisPidRoll = self.constrainf(r, -pidSumLimit, pidSumLimit) / PID_MIXER_SCALING
        scaledAxisPidPitch = self.constrainf(p, -pidSumLimit, pidSumLimit) / PID_MIXER_SCALING
        scaledAxisPidYaw = self.constrainf(y, -pidSumLimitYaw, pidSumLimitYaw) / PID_MIXER_SCALING
        scaledAxisPidYaw = -scaledAxisPidYaw

        # Find roll/pitch/yaw desired output
        motor_count = 4
        motorMix = [0] * motor_count
        motorMixMax = 0
        motorMixMin = 0
        # No additional throttle, in air mode
        throttle = 0
        motorRangeMin = 1000
        motorRangeMax = 2000

        for i in range(motor_count):
            mix = (scaledAxisPidRoll * currentMixer[i][1] +
                   scaledAxisPidPitch * currentMixer[i][2] +
                   scaledAxisPidYaw * currentMixer[i][3])

            if mix > motorMixMax:
                motorMixMax = mix
            elif mix < motorMixMin:
                motorMixMin = mix
            motorMix[i] = mix

        motorMixRange = motorMixMax - motorMixMin
        # print("range=", motorMixRange)

        if motorMixRange > 1.0:
            for i in range(motor_count):
                motorMix[i] /= motorMixRange
            # Get the maximum correction by setting offset to center when airmode enabled
            throttle = 0.5

        else:
            # Only automatically adjust throttle when airmode enabled. Airmode logic is always active on high throttle
            throttleLimitOffset = motorMixRange / 2.0
            throttle = self.constrainf(throttle, 0.0 + throttleLimitOffset, 1.0 - throttleLimitOffset)

        motor = []
        for i in range(motor_count):
            motorOutput = motorOutputMin + (motorOutputRange * (
                    motorOutputMixSign * motorMix[i] + throttle * currentMixer[i][mixer_index_throttle]))
            motorOutput = self.constrainf(motorOutput, motorRangeMin, motorRangeMax);
            motor.append(motorOutput)

        motor = list(map(int, np.round(motor)))
        return motor

    def is_airmode_active(self):
        return True

    def reset(self):
        for pid in self.pid_rpy:
            pid.clear()

In [13]:
class PID:
    """PID Controller
    """

    def __init__(self, P=0.2, I=0.0, D=0.0):

        self.Kp = P
        self.Ki = I
        self.Kd = D

        self.sample_time = 0.00
        self.current_time = 0
        self.last_time = self.current_time

        self.clear()

    def clear(self):
        """Clears PID computations and coefficients"""
        self.SetPoint = 0.0

        self.PTerm = 0.0
        self.ITerm = 0.0
        self.DTerm = 0.0
        self.last_error = 0.0

        # Windup Guard
        self.int_error = 0.0
        self.windup_guard = 20.0

        self.output = 0.0

    def update(self, current_time, feedback_value):
        """Calculates PID value for given reference feedback

        .. math::
            u(t) = K_p e(t) + K_i \int_{0}^{t} e(t)dt + K_d {de}/{dt}

        .. figure:: images/pid_1.png
           :align:   center

           Test PID with Kp=1.2, Ki=1, Kd=0.001 (test_pid.py)

        """
        error = self.SetPoint - feedback_value

        delta_time = current_time - self.last_time
        delta_error = error - self.last_error

        if (delta_time >= self.sample_time):
            self.PTerm = self.Kp * error
            self.ITerm += error * delta_time

            if (self.ITerm < -self.windup_guard):
                self.ITerm = -self.windup_guard
            elif (self.ITerm > self.windup_guard):
                self.ITerm = self.windup_guard

            self.DTerm = 0.0
            if delta_time > 0:
                self.DTerm = delta_error / delta_time

            # Remember last time and last error for next calculation
            self.last_time = current_time
            self.last_error = error

            # print("P=", self.PTerm, " I=", self.ITerm, " D=", self.DTerm)
            self.output = self.PTerm + (self.Ki * self.ITerm) + (self.Kd * self.DTerm)

    def setKp(self, proportional_gain):
        """Determines how aggressively the PID reacts to the current error with setting Proportional Gain"""
        self.Kp = proportional_gain

    def setKi(self, integral_gain):
        """Determines how aggressively the PID reacts to the current error with setting Integral Gain"""
        self.Ki = integral_gain

    def setKd(self, derivative_gain):
        """Determines how aggressively the PID reacts to the current error with setting Derivative Gain"""
        self.Kd = derivative_gain

    def setWindup(self, windup):
        """Integral windup, also known as integrator windup or reset windup,
        refers to the situation in a PID feedback controller where
        a large change in setpoint occurs (say a positive change)
        and the integral terms accumulates a significant error
        during the rise (windup), thus overshooting and continuing
        to increase as this accumulated error is unwound
        (offset by errors in the other direction).
        The specific problem is the excess overshooting.
        """
        self.windup_guard = windup

    def setSampleTime(self, sample_time):
        """PID that should be updated at a regular interval.
        Based on a pre-determined sampe time, the PID decides if it should compute or return immediately.
        """
        self.sample_time = sample_time


In [14]:
def do(env, pi):
    actuals = []
    desireds = []
    pi.reset()
    ob = env.reset()
    ticks = 0
    while True:
        desired = env.omega_target
        actual = env.omega_actual

        ac = pi.action(ob, env.sim_time, desired, actual)

        ob, reward, done, info = env.step(ac)
        
        print()
        print('Tick #'+str(ticks))
        print('='*20)
        print('Need:', desired)
        print('Right now:', actual)
        print('Is done:', done)
        print('Reward:', reward)
        print('='*20)
        print()
        
        actuals.append(actual)
        desireds.append(desired)
        ticks += 1
        
        
        if done:
            break
            
    env.close()
    return desireds, actuals, ticks

In [15]:
def main(env_id='AttFC_GyroErr-MotorVel_M4_Ep-v0', seed=17):
    env = gym.make(env_id)
    
    rank = MPI.COMM_WORLD.Get_rank()
    workerseed = seed + 1000000 * rank
    env.seed(workerseed)
    
    pi = PIDPolicy()
    desireds, actuals, ticks = do(env, pi)
    
    print('Final ticks', ticks)
    

In [16]:
main()

Starting gzserver with process ID= 6464

Tick #0
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 0. -0. -0.]
Is done: False
Reward: -0.5192675764115963


Tick #1
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 0. -0. -0.]
Is done: False
Reward: -0.5192675764115963


Tick #2
Need: [2.65125294 2.32801414 4.80871902]
Right now: [ 0. -0. -0.]
Is done: False
Reward: -0.5148286245170549


Tick #3
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.08870226e-18 2.22031817e-07 8.36722456e-02]
Is done: False
Reward: -0.5104903856165897


Tick #4
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.00712254e-04 4.27950463e-05 1.65303028e-01]
Is done: False
Reward: -0.5062358211519692


Tick #5
Need: [2.65125294 2.32801414 4.80871902]
Right now: [4.94123942e-04 2.08263809e-04 2.44940986e-01]
Is done: False
Reward: -0.5020490023472447


Tick #6
Need: [2.65125294 2.32801414 4.80871902]
Right now: [0.00135811 0.00057085 0.32263428]
Is done: False
Reward: -0.4979151014452757


Tick #


Tick #54
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.42067371 0.72408894 2.85404504]
Is done: False
Reward: -0.2479319670773715


Tick #55
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.45464953 0.75861184 2.90130634]
Is done: False
Reward: -0.24157690806200055


Tick #56
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.49723385 0.78923583 2.94788834]
Is done: False
Reward: -0.23532743442620677


Tick #57
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.5465519  0.81181542 2.99379078]
Is done: False
Reward: -0.22951607533419463


Tick #58
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.59740917 0.82529455 3.03899618]
Is done: False
Reward: -0.22429177865178498


Tick #59
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.64347064 0.83264905 3.08405611]
Is done: False
Reward: -0.21960699342248471


Tick #60
Need: [2.65125294 2.32801414 4.80871902]
Right now: [1.68011391 0.83996295 3.12840526]
Is done: False
Reward: -0.21511643878623782


Tick #


Tick #108
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65996914 1.60566712 4.73241371]
Is done: False
Reward: -0.041887818677695754


Tick #109
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65610998 1.59533625 4.75668533]
Is done: False
Reward: -0.04060138032484793


Tick #110
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6390317  1.60452059 4.77911403]
Is done: False
Reward: -0.0384411562782246


Tick #111
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62861489 1.63453883 4.80023196]
Is done: False
Reward: -0.035974800334228375


Tick #112
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63999464 1.67455346 4.82211064]
Is done: False
Reward: -0.035960087170271604


Tick #113
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67283533 1.70807086 4.84502662]
Is done: False
Reward: -0.038476871807403376


Tick #114
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71078789 1.72266701 4.86911059]
Is done: False
Reward: -0.041196236481468


Tick #161
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.87178945 2.01123509 5.71646908]
Is done: False
Reward: -0.07629183375188167


Tick #162
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.84722234 2.0025884  5.72539444]
Is done: False
Reward: -0.07456901151369702


Tick #163
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.82665253 2.01728711 5.72818845]
Is done: False
Reward: -0.07334260677262083


Tick #164
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.82986816 2.04801757 5.73258604]
Is done: False
Reward: -0.0739921629379189


Tick #165
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.86142178 2.07852133 5.74378006]
Is done: False
Reward: -0.07638669303423194


Tick #166
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.90659294 2.09426476 5.75948826]
Is done: False
Reward: -0.07929735840960679


Tick #167
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.93958533 2.09159349 5.77868948]
Is done: False
Reward: -0.0807579910708952





Tick #215
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72469341 2.38208736 5.65519519]
Is done: False
Reward: -0.051705997418828156


Tick #216
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.74820562 2.36812465 5.6462932 ]
Is done: False
Reward: -0.0503508365154535


Tick #217
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.74932403 2.34880528 5.63894993]
Is done: False
Reward: -0.04813463332948582


Tick #218
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72825395 2.33360481 5.63344593]
Is done: False
Reward: -0.045906106837017666


Tick #219
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69418421 2.32985163 5.62926201]
Is done: False
Reward: -0.044412628879335225


Tick #220
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66100979 2.33943555 5.62470106]
Is done: False
Reward: -0.045128013522628976


Tick #221
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64141142 2.35798536 5.61955128]
Is done: False
Reward: -0.045788157866630


Tick #269
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71899592 2.34062577 5.36436497]
Is done: False
Reward: -0.032154227158596435


Tick #270
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70166282 2.33206414 5.36035347]
Is done: False
Reward: -0.0311734037814469


Tick #271
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68492714 2.33407558 5.35658957]
Is done: False
Reward: -0.031096042719538686


Tick #272
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67566683 2.3452275  5.35323975]
Is done: False
Reward: -0.031911874383161044


Tick #273
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67851258 2.3616892  5.34931039]
Is done: False
Reward: -0.033428940788393334


Tick #274
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69435463 2.37848737 5.34526627]
Is done: False
Reward: -0.035218388078948


Tick #275
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71997158 2.39098979 5.34087727]
Is done: False
Reward: -0.03679318426211009


Tick #324
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.74909393 2.4053986  5.17467889]
Is done: False
Reward: -0.029534921776682557


Tick #325
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.76387654 2.4101626  5.17066844]
Is done: False
Reward: -0.029304286097439992


Tick #326
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.76376869 2.40977402 5.16681747]
Is done: False
Reward: -0.02877683974002146


Tick #327
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.75816495 2.40884126 5.16341181]
Is done: False
Reward: -0.027999669925828977


Tick #328
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.74780942 2.40828835 5.15967091]
Is done: False
Reward: -0.0270887202851895


Tick #329
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.7345389  2.40870827 5.15535048]
Is done: False
Reward: -0.026191012649783994


Tick #330
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.72044747 2.41018579 5.15104296]
Is done: False
Reward: -0.025440572415787


Tick #380
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62315281 2.41013204 5.04473205]
Is done: False
Reward: -0.017326620854308743


Tick #381
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63426775 2.40473238 5.04161546]
Is done: False
Reward: -0.015965779085784


Tick #382
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64551092 2.39340349 5.0385362 ]
Is done: False
Reward: -0.014840792629203153


Tick #383
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65393307 2.37762623 5.03616982]
Is done: False
Reward: -0.013986458540716958


Tick #384
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65730492 2.35984752 5.03447281]
Is done: False
Reward: -0.01289317172086913


Tick #385
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65460994 2.34304179 5.03336551]
Is done: False
Reward: -0.012268532790276343


Tick #386
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64640304 2.3301216  5.0330186 ]
Is done: False
Reward: -0.0130472319212239


Tick #434
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63146064 2.38174995 4.9719404 ]
Is done: False
Reward: -0.012211839184756233


Tick #435
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65479921 2.39295811 4.97041708]
Is done: False
Reward: -0.013688602461775628


Tick #436
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67868436 2.39863957 4.96868686]
Is done: False
Reward: -0.014617567731664696


Tick #437
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69877494 2.3982749  4.96647157]
Is done: False
Reward: -0.01486571950825203


Tick #438
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71131921 2.39258099 4.96429878]
Is done: False
Reward: -0.01442840691995666


Tick #439
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.71394857 2.38331528 4.96269195]
Is done: False
Reward: -0.01339088561826555


Tick #440
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.70609555 2.37288143 4.96142197]
Is done: False
Reward: -0.011981573848196


Tick #487
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69848302 2.40291716 4.91878643]
Is done: False
Reward: -0.011610583153167028


Tick #488
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68575209 2.40375421 4.91733465]
Is done: False
Reward: -0.01108213205274844


Tick #489
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67521089 2.40516238 4.9165066 ]
Is done: False
Reward: -0.010734506185175595


Tick #490
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66801677 2.40662051 4.91568998]
Is done: False
Reward: -0.010595614186990598


Tick #491
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66477622 2.40756927 4.91536371]
Is done: False
Reward: -0.01062733237155536


Tick #492
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66552201 2.4075521  4.91523297]
Is done: False
Reward: -0.010795153424191095


Tick #493
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66979451 2.40634645 4.91532948]
Is done: False
Reward: -0.01105642089815


Tick #542
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6181358  2.3867359  4.88679597]
Is done: False
Reward: -0.009827344673254577


Tick #543
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60638425 2.39028346 4.88682253]
Is done: False
Reward: -0.010440259213303315


Tick #544
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60164433 2.39687231 4.88704695]
Is done: False
Reward: -0.010686922309783204


Tick #545
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60493108 2.40496771 4.8868878 ]
Is done: False
Reward: -0.010476354156439997


Tick #546
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61608155 2.41284878 4.88618808]
Is done: False
Reward: -0.00979018933250268


Tick #547
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63371596 2.41888065 4.88485669]
Is done: False
Reward: -0.009166903936366905


Tick #548
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65541355 2.42178077 4.88358426]
Is done: False
Reward: -0.0102331436855


Tick #595
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61625786 2.36312136 4.86644408]
Is done: False
Reward: -0.006297542551476234


Tick #596
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62993988 2.36739916 4.8667271 ]
Is done: False
Reward: -0.005842052664297977


Tick #597
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64159006 2.37046458 4.86672605]
Is done: False
Reward: -0.005842052664297977


Tick #598
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64159006 2.37046458 4.86672605]
Is done: False
Reward: -0.00560473270180185


Tick #599
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65344378 2.37322277 4.86696652]
Is done: False
Reward: -0.0055318777076038906


Tick #600
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65245485 2.3734515  4.86635344]
Is done: False
Reward: -0.005637085544801687


Tick #601
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6472468  2.37340331 4.86558051]
Is done: False
Reward: -0.006026431878


Tick #650
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63458218 2.43352945 4.85207067]
Is done: False
Reward: -0.00832733508388624


Tick #651
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64340411 2.43525595 4.85059532]
Is done: False
Reward: -0.007918263519831912


Tick #652
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65537848 2.43259037 4.84927336]
Is done: False
Reward: -0.0082231849184732


Tick #653
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66882947 2.42562757 4.84853281]
Is done: False
Reward: -0.008331331044546215


Tick #654
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68188012 2.41503671 4.84811154]
Is done: False
Reward: -0.008178364617866972


Tick #655
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69268226 2.40201647 4.84744628]
Is done: False
Reward: -0.007806779053777427


Tick #656
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.69938081 2.38833531 4.84742465]
Is done: False
Reward: -0.007280210150274


Tick #703
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62745962 2.42050958 4.84083824]
Is done: False
Reward: -0.006962949620293802


Tick #704
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63601294 2.4113585  4.84138348]
Is done: False
Reward: -0.006111641635035588


Tick #705
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64310601 2.40177106 4.84201717]
Is done: False
Reward: -0.0054004692249209965


Tick #706
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64821934 2.39285577 4.84264047]
Is done: False
Reward: -0.0048420157265916305


Tick #707
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65124362 2.38554311 4.84245079]
Is done: False
Reward: -0.004598199756339991


Tick #708
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65242308 2.38046601 4.84177125]
Is done: False
Reward: -0.004403628262516413


Tick #709
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65216685 2.3779044  4.8409215 ]
Is done: False
Reward: -0.0043100106


Tick #757
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.57946192 2.36594348 4.8380553 ]
Is done: False
Reward: -0.0076271246551957396


Tick #758
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58946142 2.38178422 4.83692566]
Is done: False
Reward: -0.00755857939172249


Tick #759
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60241777 2.39458088 4.8357933 ]
Is done: False
Reward: -0.007184017882988242


Tick #760
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61653526 2.4027756  4.83465575]
Is done: False
Reward: -0.006564307157201308


Tick #761
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.62978116 2.40541509 4.83358086]
Is done: False
Reward: -0.00578332930320818


Tick #762
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64015914 2.40231392 4.83233889]
Is done: False
Reward: -0.0049891104394832525


Tick #763
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64602273 2.39408769 4.831458  ]
Is done: False
Reward: -0.004329304747


Tick #811
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63608643 2.41362256 4.82974452]
Is done: False
Reward: -0.006035952417859583


Tick #812
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64068115 2.40952933 4.83040733]
Is done: False
Reward: -0.005521510664163866


Tick #813
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64610088 2.40435517 4.8313042 ]
Is done: False
Reward: -0.00503214875282827


Tick #814
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65195277 2.39891619 4.83197113]
Is done: False
Reward: -0.005129964073834001


Tick #815
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.65803004 2.39399526 4.83265857]
Is done: False
Reward: -0.0052545009635519046


Tick #816
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.66426188 2.39023732 4.83253215]
Is done: False
Reward: -0.005444441714314833


Tick #817
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67067031 2.3880555  4.83188585]
Is done: False
Reward: -0.005742038973


Tick #866
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58424754 2.36461283 4.82627497]
Is done: False
Reward: -0.007027769854965805


Tick #867
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58296927 2.37431437 4.82660577]
Is done: False
Reward: -0.007405678120546494


Tick #868
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58716824 2.38571072 4.82653181]
Is done: False
Reward: -0.007508084865230142


Tick #869
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.59626249 2.39724135 4.82602576]
Is done: False
Reward: -0.007361604639876724


Tick #870
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60896567 2.40742393 4.82578526]
Is done: False
Reward: -0.006958587203807035


Tick #871
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6234368  2.41501392 4.82506968]
Is done: False
Reward: -0.006406326010170639


Tick #872
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.63754237 2.41915216 4.82462711]
Is done: False
Reward: -0.005814653061


Tick #921
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.64918974 2.36727962 4.82059571]
Is done: False
Reward: -0.0033120950426398007


Tick #922
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6354224  2.36211429 4.82122   ]
Is done: False
Reward: -0.0039949332621858884


Tick #923
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.61960632 2.3580583  4.82233114]
Is done: False
Reward: -0.004775978514669534


Tick #924
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.60351657 2.35583573 4.82318635]
Is done: False
Reward: -0.005572375540084606


Tick #925
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.58908699 2.3558552  4.82374907]
Is done: False
Reward: -0.006299841020443191


Tick #926
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.57821314 2.35815628 4.82428656]
Is done: False
Reward: -0.006850282821795274


Tick #927
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.57248731 2.36243499 4.82465762]
Is done: False
Reward: -0.0071442246


Tick #976
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67596047 2.39889022 4.82183762]
Is done: False
Reward: -0.005561289418932366


Tick #977
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67432128 2.39746383 4.82102908]
Is done: False
Reward: -0.005545988361765229


Tick #978
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6739918  2.39838504 4.82014893]
Is done: False
Reward: -0.005707624714133437


Tick #979
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.67510372 2.40124164 4.8192272 ]
Is done: False
Reward: -0.00606639519306344


Tick #980
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.6774574  2.40550992 4.81936791]
Is done: False
Reward: -0.006491034382144939


Tick #981
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68055371 2.41064023 4.81914557]
Is done: False
Reward: -0.006978250736661101


Tick #982
Need: [2.65125294 2.32801414 4.80871902]
Right now: [2.68366284 2.41614269 4.81971782]
Is done: False
Reward: -0.0074098639176