In [1]:
import pandas as pd
import numpy as np
import random
import copy
import gym
import time
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
print(tf. __version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.13.1


#### The data

In [2]:
num_classes = 3
input_shape = (3,)

In [3]:
df = pd.read_csv('data/dataset_10000.csv')
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
#X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
#X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

#### The gym env subclass

In [4]:
class SyntheticEnv2(gym.Env):
    def __init__(self, data=(X_train, y_train)):
        print('Instantiating environment')
        super().__init__()
        self.action_space = gym.spaces.Discrete(6)
        #self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1, 3), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)
        self.step_count = 0
        self.X, self.Y = data
        self.random = random
        self.dataset_idx = 0
        #self.x, self.y = self.X[self.dataset_idx].reshape(-1, 3), self.Y[self.dataset_idx]
        self.x, self.y = self.X[self.dataset_idx], self.Y[self.dataset_idx]
        self.state = np.zeros((1, 3), dtype=np.float32)
        #self.available_actions = np.zeros((1, 6), dtype=np.float32)
        self.total_reward = 0
        
    
    def reset(self):
        print('Resetting environment')
        random.seed(42)
        self.step_count = 0
        self.total_reward = 0
        self.state = np.zeros((1, 3), dtype=np.float32)
        #self.available_actions = np.zeros((1,6), dtype=np.float32)
        self.dataset_idx = random.randint(0, len(self.X)-1)
        self.x, self.y = self.X[self.dataset_idx], self.Y[self.dataset_idx]
        
        return self.state
        
    
    def render(self):
        print(f': Current state of the environment: {self.state}')
        #print(f'Available actions: {self.available_actions}')
    
    def step(self, action):
        print('A step in the environment')
        self.step_count += 1
        if action < 3:
            done = True
            next_state = None
            reward = int(action == self.y)
            y_actual = self.y
            y_pred = action
        else:
            done = False
            next_state = self._get_next_state(action)
            reward = 0
            y_actual = np.nan
            y_pred = np.nan
        self.total_reward += reward
        info = {'episode_length':self.step_count, 'total_reward': self.total_reward, 'y_actual':y_actual, 'y_pred': y_pred}
        print(f'current_state:{self.state}')
        print(f'next_state: {next_state}')
        print(f'reward: {reward}')
        print(f'Done: {done}')
        print(f'Info: {info}')
        return next_state, reward, done, info
    
    def _get_next_state(self, action): #_next_obs
        print('Getting tenext state in the environment')
        #self.available_actions[0, action] = 1
        feature_idx = action - 3
        self.x = self.x.reshape(-1, 3)
        x_value = self.x[0, feature_idx]
        next_state = copy.deepcopy(self.state)
        next_state[0, feature_idx] = x_value
        #print(f'Next state: {next_state}')
        return next_state

In [5]:
from baselines.ppo2 import ppo2
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv

from baselines import bench
from baselines import logger
from baselines import deepq
from baselines.common.tf_util import make_session

In [6]:
def synthetic_dqn2():
    logger.configure(dir='./logs/synthetic_dqn2', format_strs=['stdout', 'tensorboard'])
    env = SyntheticEnv2()
    env = bench.Monitor(env, logger.get_dir())

    model = deepq.learn(
        env,
        'mlp',
        num_layers=3, #change number of layers
        num_hidden=64,
        activation=tf.nn.relu,
        hiddens=[32],
        dueling=False, 
        lr=1e-4,
        total_timesteps=int(1.2e5),
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
    )

    model.save('models/dqn_synth2_real.pkl')
    env.close()

    return model

start_time = time.time()
dqn_model = synthetic_dqn2()
print("DQN Training Time:", time.time() - start_time)

Logging to ./logs/synthetic_dqn2
Instantiating environment
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.




Instructions for updating:
Use tf.cast instead.
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.       

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: Tru

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Get

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 5, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_sta

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred':

Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True


Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A ste

Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resettin

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 5, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_act

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
I

Resetting environment
--------------------------------------
| % time spent exploring  | 90       |
| episodes                | 600      |
| mean 100 episode reward | 0.4      |
| steps                   | 1.12e+03 |
--------------------------------------
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
n

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, '

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_act

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_rewar

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_rewar

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
--------------------------------------
| % time spent exploring  | 86       |
| episodes                | 900      |
| mean 100 episode reward | 0.4      |
| steps                   | 1.65e+03 |
--------------------------------------
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 4, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.

Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Reset

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.7

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
--------------------------------------
| % time spent exploring  | 82       |
| episodes                | 1.2e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 2.15e+03 |
--------------------------------------
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
ne

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred':

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environmen

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 4, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 4, 'total_reward': 1, 'y_actual': 0, 'y

Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the enviro

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 4, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environmen

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: Fals

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: Tru

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
--------------------------------------
| % time spent exploring  | 66       |
| episodes                | 2.4e+03  |
| mean 100 episode reward | 0.5      |
| steps                   | 4.12e+03 |
--------------------------------------
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0


A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: Fals

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 5, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Get

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred':

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_act

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 5, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 5, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 6, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
--------------------------------------
| % time spent exploring  | 57       |
| episodes                | 3.1e+03  |
| mean 100 episode reward | 0.6      |
| steps                   | 5.14e+03 |
--------------------------------------
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Get

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, '

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the enviro

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_stat

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environmen

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_act

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 5, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the envi

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 4, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Don

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Reset

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the enviro

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 5, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'tot

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred':

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_ac

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 3, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
cur

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_act

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Get

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
cur

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_stat

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred':

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step i

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 4, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.        0.        0.4387755]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step i

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the enviro

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Get

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step 

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Get

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_stat

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_st

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the enviro

A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 3, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 1}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0. 0. 0.]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 2, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
cur

A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A ste

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Reset

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
Getting tenext state in the environment
current_state:[[0. 0. 0.]]
next_state: [[0.78571427 0.         0.        ]]
reward: 0
Done: False
Info: {'episode_length': 1, 'total_reward': 0, 'y_actual': nan, 'y_pred': nan}
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 0
Done: True
Info: {'episode_length': 2, 'total_reward': 0, 'y_actual': 0, 'y_pred': 2}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Reset

Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_state:[[0. 0. 0.]]
next_state: None
reward: 1
Done: True
Info: {'episode_length': 1, 'total_reward': 1, 'y_actual': 0, 'y_pred': 0}
Resetting environment
A step in the environment
current_stat

  return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)


ValueError: cannot reshape array of size 32 into shape (3)