# Stochastic Variational Method with RL algorithms

In [1]:
import numpy as np
import gym
import svm_env as svm

## Expoloring environment

In [2]:
env = gym.make('svm_env:svmEnv-v0', file_sigmas ="./svmCodeSVD/sigmas.dat" )

obs_space = env.observation_space

print('###### Observation space ####### \n', obs_space)

state_size = env.observation_space.shape[-1]

print('###### Size of observation space ####### \n', state_size)

act_space = env.action_space

print('###### Action space ####### \n', act_space)

act_size = env.action_space.shape[-1]

print('###### Number of actions ####### \n', act_size)

state = env.reset()

print('##### State after reset ###### \n', state)

print('##### File where will be stored sigmas \n', env.file_sigmas)

###### Observation space ####### 
 Box(-inf, inf, (1,), float32)
###### Size of observation space ####### 
 1
###### Action space ####### 
 Box(-1.0, 1.0, (3,), float32)
###### Number of actions ####### 
 3
*****CALL RESET******
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
##### State after reset ###### 
 [0.]
##### File where will be stored sigmas 
 ./svmCodeSVD/sigmas.dat


## DDPG from `stable_baseline3`

In [None]:
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

# The noise objects for DDPG
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=1.0 * np.ones(n_actions))

model = DDPG("MlpPolicy", env, action_noise = action_noise, \
             learning_starts=1,batch_size=1, gamma=1.0, verbose=1)

# (policy, env, learning_rate=0.001, buffer_size=1000000,learning_starts=100, batch_size=100, 
# tau=0.005, gamma=0.99, train_freq=(1, 'episode'),  gradient_steps=- 1, action_noise=None, 
# replay_buffer_class=None, replay_buffer_kwargs=None,  optimize_memory_usage=False, 
# tensorboard_log=None, create_eval_env=False, policy_kwargs=None,  verbose=0, seed=None, 
# device='auto', _init_setup_model=True)

model.learn(total_timesteps=10000, log_interval=10)

# learn(total_timesteps, callback=None, log_interval=4, eval_env=None, eval_freq=- 1,
# n_eval_episodes=5, tb_log_name='DDPG', eval_log_path=None, reset_num_timesteps=True)

## From my `ddpg_agent.py` code

In [2]:
from ddpg_agent import Agent

In [3]:
env = gym.make('svm_env:svmEnv-v0')
# Instance of the ddpg agent
agent = Agent(1, 3, random_seed=2)

### Training loop
def run_ddpg(n_episodes=300):        
    """Deep Deterministic Policy Gradient learning for Reacher Unity Environment.
    
    Params Input
    ==========
        n_episode (int): maximum number of episodes
        queue (int): number of consecutive episodes 
        
    Params Output
    ==========
        scores_all (list of floats): are the scores collected at the end of each episode
        
    """
    
    ##Inizialization
    scores_all = []                         
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()                  
        score = 0.0
        scores = []
        
        ## Training loop of each episode
        while True:
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)                   
            agent.step(state, action, reward, next_state, done)
            score += reward
            state = next_state  
            if done:                                  
                break

        scores.append(score)
        
        print('Episode {} ... Reward: {:.3f}'.format(i_episode, score))

        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

    return scores

In [None]:
scores = run_ddpg()

*****CALL RESET******
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
****CALL STEP****
Action chosen at step:  [51.218685 60.377647 63.21339 ]
With this action the energy is:  0.0270402
With this action the full dim is:  1  and princip dim is:  1
The new action:  [51.218685 60.377647 63.21339 ]  makes the energy positive:  True
The new action:  [51.218685 60.377647 63.21339 ]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [51.218685 60.377647 63.21339 ]  makes the energy less than: -0.1026 False
The new action:  [51.218685 60.377647 63.21339 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -28.0402
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [62.12168  90.141716 55.780006]
With this action the energy is:  0.0228858
Wi

With this action the energy is:  649218.0
With this action the full dim is:  1  and princip dim is:  1
The new action:  [1.6377857e+01 7.8245897e+00 9.9999998e-03]  makes the energy positive:  True
The new action:  [1.6377857e+01 7.8245897e+00 9.9999998e-03]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [1.6377857e+01 7.8245897e+00 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.6377857e+01 7.8245897e+00 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -649218001.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [8.6420166e+01 6.6693520e+01 9.9999998e-03]
With this action the energy is:  649218.0
With this action the full dim is:  1  and princip dim is:  1
The new action:  [8.6420166e+01 6.6693520e+01 9.9999998e-03]  m

With this action the energy is:  0.0285649
With this action the full dim is:  1  and princip dim is:  1
The new action:  [65.09381  41.45869  55.753716]  makes the energy positive:  True
The new action:  [65.09381  41.45869  55.753716]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [65.09381  41.45869  55.753716]  makes the energy less than: -0.1026 False
The new action:  [65.09381  41.45869  55.753716]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -29.5649
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [51.737743 65.43545  60.511307]
With this action the energy is:  0.0266071
With this action the full dim is:  1  and princip dim is:  1
The new action:  [51.737743 65.43545  60.511307]  makes the energy positive:  True
The new action:  [51.737743 65.43545  60.5

With this action the energy is:  0.017023
With this action the full dim is:  1  and princip dim is:  1
The new action:  [ 54.265594 110.        98.559395]  makes the energy positive:  True
The new action:  [ 54.265594 110.        98.559395]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [ 54.265594 110.        98.559395]  makes the energy less than: -0.1026 False
The new action:  [ 54.265594 110.        98.559395]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -18.023
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 67.406624 110.        73.48503 ]
With this action the energy is:  0.0181362
With this action the full dim is:  1  and princip dim is:  1
The new action:  [ 67.406624 110.        73.48503 ]  makes the energy positive:  True
The new action:  [ 67.4066

With this action the energy is:  46.4475
With this action the full dim is:  1  and princip dim is:  1
The new action:  [ 1.1680079 73.384995  61.64528  ]  makes the energy positive:  True
The new action:  [ 1.1680079 73.384995  61.64528  ]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [ 1.1680079 73.384995  61.64528  ]  makes the energy less than: -0.1026 False
The new action:  [ 1.1680079 73.384995  61.64528  ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -46448.5
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [36.183624 69.14681  26.937319]
With this action the energy is:  0.0271627
With this action the full dim is:  1  and princip dim is:  1
The new action:  [36.183624 69.14681  26.937319]  makes the energy positive:  True
The new action:  [36.183624 69.1

With this action the energy is:  -0.0499682
With this action the full dim is:  9  and princip dim is:  9
The new action:  [98.975235 84.11793  32.865242]  makes the energy positive:  False
The new action:  [98.975235 84.11793  32.865242]  makes the energy greater than:  -0.0498289  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0001393
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [91.36118  96.85549  19.960949]
With this action the energy is:  -0.0499775
With this action the full dim is:  10  and princip dim is:  10
The new action:  [91.36118  96.85549  19.960949]  makes the energy positive:  False
The new action:  [91.36118  96.85549  19.960949]  makes the energy greater than:  -0.0499682  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0000093


With this action the energy is:  -0.0555128
With this action the full dim is:  18  and princip dim is:  18
The new action:  [4.5615528e+01 4.8098686e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [4.5615528e+01 4.8098686e+01 9.9999998e-03]  makes the energy greater than:  -0.0555128  the previous one:  True
The new action:  [4.5615528e+01 4.8098686e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [4.5615528e+01 4.8098686e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [35.77431  30.723818 12.614043]
With this action the energy is:  -0.069994
With this action the full dim is:  18  and princip dim is:  18
The new action:  [35.77431  30.723818 12.614043]  makes the energy p

With this action the energy is:  -0.0945385
With this action the full dim is:  29  and princip dim is:  29
The new action:  [34.794456 54.145023 10.092642]  makes the energy positive:  False
The new action:  [34.794456 54.145023 10.092642]  makes the energy greater than:  -0.0941398  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0003987
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [58.136875 70.27746  36.78732 ]
With this action the energy is:  -0.0945467
With this action the full dim is:  30  and princip dim is:  30
The new action:  [58.136875 70.27746  36.78732 ]  makes the energy positive:  False
The new action:  [58.136875 70.27746  36.78732 ]  makes the energy greater than:  -0.0945385  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000008

With this action the energy is:  -0.0960248
With this action the full dim is:  41  and princip dim is:  41
The new action:  [110.        13.902007  20.409939]  makes the energy positive:  False
The new action:  [110.        13.902007  20.409939]  makes the energy greater than:  -0.095501  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0005238
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [98.89375  23.70491  29.224136]
With this action the energy is:  -0.0960274
With this action the full dim is:  42  and princip dim is:  42
The new action:  [98.89375  23.70491  29.224136]  makes the energy positive:  False
The new action:  [98.89375  23.70491  29.224136]  makes the energy greater than:  -0.0960248  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0

With this action the energy is:  -0.0966618
With this action the full dim is:  56  and princip dim is:  56
The new action:  [6.5367561e+01 5.2543243e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [6.5367561e+01 5.2543243e+01 9.9999998e-03]  makes the energy greater than:  -0.0966619  the previous one:  True
The new action:  [6.5367561e+01 5.2543243e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [6.5367561e+01 5.2543243e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.000099999999989
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.1306171e+01 5.5984398e+01 9.9999998e-03]
With this action the energy is:  -0.0966618
With this action the full dim is:  56  and princip dim is:  56
The new action:  [6.1306171e+01 5.5984398

With this action the energy is:  -0.0966619
With this action the full dim is:  56  and princip dim is:  56
The new action:  [6.8196922e+01 5.0677048e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [6.8196922e+01 5.0677048e+01 9.9999998e-03]  makes the energy greater than:  -0.0966619  the previous one:  True
The new action:  [6.8196922e+01 5.0677048e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [6.8196922e+01 5.0677048e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.468435e+01 3.788792e+01 1.000000e-02]
With this action the energy is:  -0.0966619
With this action the full dim is:  56  and princip dim is:  56
The new action:  [6.468435e+01 3.788792e+01 1.000000e-02] 

With this action the energy is:  -0.0969265
With this action the full dim is:  57  and princip dim is:  57
The new action:  [7.995666e+01 7.164385e+00 1.000000e-02]  makes the energy positive:  False
The new action:  [7.995666e+01 7.164385e+00 1.000000e-02]  makes the energy greater than:  -0.0969265  the previous one:  True
The new action:  [7.995666e+01 7.164385e+00 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [7.995666e+01 7.164385e+00 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [7.1815536e+01 1.2826001e+01 9.9999998e-03]
With this action the energy is:  -0.0969265
With this action the full dim is:  57  and princip dim is:  57
The new action:  [7.1815536e+01 1.2826001e+01 9.9999998e-03]  makes

With this action the energy is:  -0.0970762
With this action the full dim is:  65  and princip dim is:  65
The new action:  [59.238365 25.67171  17.644941]  makes the energy positive:  False
The new action:  [59.238365 25.67171  17.644941]  makes the energy greater than:  -0.097033  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0000432
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [80.227875 19.435562 13.3946  ]
With this action the energy is:  -0.0974267
With this action the full dim is:  66  and princip dim is:  66
The new action:  [80.227875 19.435562 13.3946  ]  makes the energy positive:  False
The new action:  [80.227875 19.435562 13.3946  ]  makes the energy greater than:  -0.0970762  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0003505

With this action the energy is:  -0.0995305
With this action the full dim is:  76  and princip dim is:  76
The new action:  [9.2835472e+01 9.9999998e-03 3.5635998e+01]  makes the energy positive:  False
The new action:  [9.2835472e+01 9.9999998e-03 3.5635998e+01]  makes the energy greater than:  -0.0995305  the previous one:  True
The new action:  [9.2835472e+01 9.9999998e-03 3.5635998e+01]  makes the energy less than: -0.1026 False
The new action:  [9.2835472e+01 9.9999998e-03 3.5635998e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [8.784932e+01 1.000000e-02 3.046915e+01]
With this action the energy is:  -0.0995305
With this action the full dim is:  76  and princip dim is:  76
The new action:  [8.784932e+01 1.000000e-02 3.046915e+01] 

With this action the energy is:  -0.0998441
With this action the full dim is:  77  and princip dim is:  77
The new action:  [1.0866605e+02 1.8193689e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.0866605e+02 1.8193689e+01 9.9999998e-03]  makes the energy greater than:  -0.0998441  the previous one:  True
The new action:  [1.0866605e+02 1.8193689e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.0866605e+02 1.8193689e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [8.415521e+01 3.048386e+01 1.000000e-02]
With this action the energy is:  -0.0998441
With this action the full dim is:  77  and princip dim is:  77
The new action:  [8.415521e+01 3.048386e+01 1.000000e-02] 

With this action the energy is:  -0.100424
With this action the full dim is:  82  and princip dim is:  82
The new action:  [110.          3.2819726   9.648369 ]  makes the energy positive:  False
The new action:  [110.          3.2819726   9.648369 ]  makes the energy greater than:  -0.100064  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.00036
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [1.1000000e+02 1.7332603e+01 9.9999998e-03]
With this action the energy is:  -0.100424
With this action the full dim is:  83  and princip dim is:  83
The new action:  [1.1000000e+02 1.7332603e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+02 1.7332603e+01 9.9999998e-03]  makes the energy greater than:  -0.100424  the previous one:  True
The new action:  [1.1000000e+02 1.7332603e+01 9.9999998e-03]  makes the energy less than:

With this action the energy is:  -0.100424
With this action the full dim is:  83  and princip dim is:  83
The new action:  [1.1000000e+02 9.9999998e-03 1.6421066e+01]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 1.6421066e+01]  makes the energy greater than:  -0.100424  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 1.6421066e+01]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 1.6421066e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.2541534e+01 9.9999998e-03 1.8817453e+01]
With this action the energy is:  -0.100424
With this action the full dim is:  83  and princip dim is:  83
The new action:  [9.2541534e+01 9.9999998e-03 1.8817453e+0

With this action the energy is:  -0.100453
With this action the full dim is:  83  and princip dim is:  83
The new action:  [94.067245 11.144995 17.37469 ]  makes the energy positive:  False
The new action:  [94.067245 11.144995 17.37469 ]  makes the energy greater than:  -0.100424  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000029
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [8.0612724e+01 9.9999998e-03 1.5718398e+01]
With this action the energy is:  -0.100453
With this action the full dim is:  84  and princip dim is:  84
The new action:  [8.0612724e+01 9.9999998e-03 1.5718398e+01]  makes the energy positive:  False
The new action:  [8.0612724e+01 9.9999998e-03 1.5718398e+01]  makes the energy greater than:  -0.100453  the previous one:  True
The new action:  [8.0612724e+01 9.9999998e-03 1.5718398e+01]  makes the energy less than: -0.1026 Fa

With this action the energy is:  -0.101645
With this action the full dim is:  90  and princip dim is:  90
The new action:  [8.5107735e+01 9.9999998e-03 2.4336391e+01]  makes the energy positive:  False
The new action:  [8.5107735e+01 9.9999998e-03 2.4336391e+01]  makes the energy greater than:  -0.101645  the previous one:  True
The new action:  [8.5107735e+01 9.9999998e-03 2.4336391e+01]  makes the energy less than: -0.1026 False
The new action:  [8.5107735e+01 9.9999998e-03 2.4336391e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.0083377e+02 9.9999998e-03 1.7659290e+01]
With this action the energy is:  -0.101645
With this action the full dim is:  90  and princip dim is:  90
The new action:  [1.0083377e+02 9.9999998e-03 1.7659290e+0

With this action the energy is:  -0.101645
With this action the full dim is:  90  and princip dim is:  89
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater than:  -0.101645  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.0e-02]
With this action the energy is:  -0.101645
With this action the full dim is:  90  and princip dim is:  89
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater t

With this action the energy is:  -0.101681
With this action the full dim is:  91  and princip dim is:  91
The new action:  [1.1000000e+02 9.9999998e-03 6.9321437e+00]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 6.9321437e+00]  makes the energy greater than:  -0.101681  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 6.9321437e+00]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 6.9321437e+00]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.0745632e+02 9.9999998e-03 8.8904839e+00]
With this action the energy is:  -0.101681
With this action the full dim is:  91  and princip dim is:  91
The new action:  [1.0745632e+02 9.9999998e-03 8.8904839e+0

With this action the energy is:  -0.101695
With this action the full dim is:  92  and princip dim is:  92
The new action:  [110.        18.511375  33.243134]  makes the energy positive:  False
The new action:  [110.        18.511375  33.243134]  makes the energy greater than:  -0.101683  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000012
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.          3.5893652  45.761093 ]
With this action the energy is:  -0.101702
With this action the full dim is:  93  and princip dim is:  93
The new action:  [110.          3.5893652  45.761093 ]  makes the energy positive:  False
The new action:  [110.          3.5893652  45.761093 ]  makes the energy greater than:  -0.101695  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is

With this action the energy is:  -0.101942
With this action the full dim is:  97  and princip dim is:  97
The new action:  [9.985268e+01 6.568345e+01 1.000000e-02]  makes the energy positive:  False
The new action:  [9.985268e+01 6.568345e+01 1.000000e-02]  makes the energy greater than:  -0.101942  the previous one:  True
The new action:  [9.985268e+01 6.568345e+01 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [9.985268e+01 6.568345e+01 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.0698685e+01 7.4601273e+01 9.9999998e-03]
With this action the energy is:  -0.101942
With this action the full dim is:  97  and princip dim is:  97
The new action:  [9.0698685e+01 7.4601273e+01 9.9999998e-03]  makes th

With this action the energy is:  -0.101959
With this action the full dim is:  98  and princip dim is:  98
The new action:  [1.0583108e+02 2.6682138e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.0583108e+02 2.6682138e+01 9.9999998e-03]  makes the energy greater than:  -0.101959  the previous one:  True
The new action:  [1.0583108e+02 2.6682138e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.0583108e+02 2.6682138e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.0124281e+02 2.0708376e+01 9.9999998e-03]
With this action the energy is:  -0.101959
With this action the full dim is:  98  and princip dim is:  98
The new action:  [1.0124281e+02 2.0708376e+01 9.9999998e-0

With this action the energy is:  -0.102087
With this action the full dim is:  100  and princip dim is:  99
The new action:  [6.247946e+01 3.820161e+00 1.000000e-02]  makes the energy positive:  False
The new action:  [6.247946e+01 3.820161e+00 1.000000e-02]  makes the energy greater than:  -0.102087  the previous one:  True
The new action:  [6.247946e+01 3.820161e+00 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [6.247946e+01 3.820161e+00 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [7.3773628e+01 2.1621311e+01 9.9999998e-03]
With this action the energy is:  -0.102087
With this action the full dim is:  100  and princip dim is:  100
The new action:  [7.3773628e+01 2.1621311e+01 9.9999998e-03]  makes

With this action the energy is:  -0.102088
With this action the full dim is:  101  and princip dim is:  100
The new action:  [8.192359e+01 1.000000e-02 1.000000e-02]  makes the energy positive:  False
The new action:  [8.192359e+01 1.000000e-02 1.000000e-02]  makes the energy greater than:  -0.102088  the previous one:  True
The new action:  [8.192359e+01 1.000000e-02 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [8.192359e+01 1.000000e-02 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [7.7693611e+01 9.9999998e-03 3.3658966e+01]
With this action the energy is:  -0.102088
With this action the full dim is:  101  and princip dim is:  101
The new action:  [7.7693611e+01 9.9999998e-03 3.3658966e+01]  make

With this action the energy is:  -0.102091
With this action the full dim is:  103  and princip dim is:  103
The new action:  [96.490036 52.638493 38.61106 ]  makes the energy positive:  False
The new action:  [96.490036 52.638493 38.61106 ]  makes the energy greater than:  -0.102091  the previous one:  True
The new action:  [96.490036 52.638493 38.61106 ]  makes the energy less than: -0.1026 False
The new action:  [96.490036 52.638493 38.61106 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [93.6815   53.137386 37.14167 ]
With this action the energy is:  -0.102091
With this action the full dim is:  103  and princip dim is:  103
The new action:  [93.6815   53.137386 37.14167 ]  makes the energy positive:  False
The new action:  [93.6815   53

With this action the energy is:  -0.102094
With this action the full dim is:  104  and princip dim is:  104
The new action:  [1.08112366e+02 3.19325848e+01 9.99999978e-03]  makes the energy positive:  False
The new action:  [1.08112366e+02 3.19325848e+01 9.99999978e-03]  makes the energy greater than:  -0.102094  the previous one:  True
The new action:  [1.08112366e+02 3.19325848e+01 9.99999978e-03]  makes the energy less than: -0.1026 False
The new action:  [1.08112366e+02 3.19325848e+01 9.99999978e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.         18.729376    2.8160205]
With this action the energy is:  -0.102094
With this action the full dim is:  104  and princip dim is:  104
The new action:  [110.         18.729376    2.81

With this action the energy is:  -0.102103
With this action the full dim is:  106  and princip dim is:  106
The new action:  [71.34593    4.8018994 55.450165 ]  makes the energy positive:  False
The new action:  [71.34593    4.8018994 55.450165 ]  makes the energy greater than:  -0.102098  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000005
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [79.40054   4.998999 34.825916]
With this action the energy is:  -0.102099
With this action the full dim is:  107  and princip dim is:  107
The new action:  [79.40054   4.998999 34.825916]  makes the energy positive:  False
The new action:  [79.40054   4.998999 34.825916]  makes the energy greater than:  -0.102103  the previous one:  True
The new action:  [79.40054   4.998999 34.825916]  makes the energy less than: -0.1026 False
The new action:  [79.40054   4.998

With this action the energy is:  -0.102106
With this action the full dim is:  109  and princip dim is:  109
The new action:  [1.1000000e+02 7.2708545e+00 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+02 7.2708545e+00 9.9999998e-03]  makes the energy greater than:  -0.102106  the previous one:  True
The new action:  [1.1000000e+02 7.2708545e+00 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 7.2708545e+00 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1000000e+02 1.2805595e+01 9.9999998e-03]
With this action the energy is:  -0.102106
With this action the full dim is:  109  and princip dim is:  109
The new action:  [1.1000000e+02 1.2805595e+01 9.999999

With this action the energy is:  -0.102101
With this action the full dim is:  109  and princip dim is:  109
The new action:  [110.        17.224285  45.14667 ]  makes the energy positive:  False
The new action:  [110.        17.224285  45.14667 ]  makes the energy greater than:  -0.102106  the previous one:  True
The new action:  [110.        17.224285  45.14667 ]  makes the energy less than: -0.1026 False
The new action:  [110.        17.224285  45.14667 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.005000000000005
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [107.60363   23.07481   58.956497]
With this action the energy is:  -0.102103
With this action the full dim is:  109  and princip dim is:  109
The new action:  [107.60363   23.07481   58.956497]  makes the energy positive:  Fals

With this action the energy is:  -0.102101
With this action the full dim is:  109  and princip dim is:  109
The new action:  [110.        77.49357   14.962398]  makes the energy positive:  False
The new action:  [110.        77.49357   14.962398]  makes the energy greater than:  -0.102106  the previous one:  True
The new action:  [110.        77.49357   14.962398]  makes the energy less than: -0.1026 False
The new action:  [110.        77.49357   14.962398]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.005000000000005
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        49.273685   2.162881]
With this action the energy is:  -0.102106
With this action the full dim is:  109  and princip dim is:  109
The new action:  [110.        49.273685   2.162881]  makes the energy positive:  Fals

With this action the energy is:  -0.10211
With this action the full dim is:  111  and princip dim is:  111
The new action:  [110.        21.878094  43.126205]  makes the energy positive:  False
The new action:  [110.        21.878094  43.126205]  makes the energy greater than:  -0.102109  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000001
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.        28.871948  23.893267]
With this action the energy is:  -0.10211
With this action the full dim is:  112  and princip dim is:  111
The new action:  [110.        28.871948  23.893267]  makes the energy positive:  False
The new action:  [110.        28.871948  23.893267]  makes the energy greater than:  -0.10211  the previous one:  True
The new action:  [110.        28.871948  23.893267]  makes the energy less than: -0.1026 False
The new action:  [110.   

With this action the energy is:  -0.102116
With this action the full dim is:  113  and princip dim is:  111
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater than:  -0.102116  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.0e-02]
With this action the energy is:  -0.102116
With this action the full dim is:  113  and princip dim is:  111
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy great

With this action the energy is:  -0.102127
With this action the full dim is:  115  and princip dim is:  114
The new action:  [1.100000e+02 1.000000e-02 8.728276e+00]  makes the energy positive:  False
The new action:  [1.100000e+02 1.000000e-02 8.728276e+00]  makes the energy greater than:  -0.102127  the previous one:  True
The new action:  [1.100000e+02 1.000000e-02 8.728276e+00]  makes the energy less than: -0.1026 False
The new action:  [1.100000e+02 1.000000e-02 8.728276e+00]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [97.948395   4.1092577 28.830177 ]
With this action the energy is:  -0.102128
With this action the full dim is:  115  and princip dim is:  114
The new action:  [97.948395   4.1092577 28.830177 ]  makes the energy posit

With this action the energy is:  -0.102128
With this action the full dim is:  116  and princip dim is:  115
The new action:  [1.1000000e+02 9.9999998e-03 1.1247656e+01]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 1.1247656e+01]  makes the energy greater than:  -0.102128  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 1.1247656e+01]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 1.1247656e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.100000e+02 1.000000e-02 3.744178e+01]
With this action the energy is:  -0.102128
With this action the full dim is:  116  and princip dim is:  115
The new action:  [1.100000e+02 1.000000e-02 3.744178e+01]

With this action the energy is:  -0.102129
With this action the full dim is:  117  and princip dim is:  116
The new action:  [1.1000000e+02 1.2068521e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+02 1.2068521e+01 9.9999998e-03]  makes the energy greater than:  -0.102129  the previous one:  True
The new action:  [1.1000000e+02 1.2068521e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 1.2068521e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.0e-02]
With this action the energy is:  -0.102129
With this action the full dim is:  117  and princip dim is:  115
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  

With this action the energy is:  -0.102135
With this action the full dim is:  118  and princip dim is:  117
The new action:  [110.        25.509031  12.330962]  makes the energy positive:  False
The new action:  [110.        25.509031  12.330962]  makes the energy greater than:  -0.102131  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.0000040000000001
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [101.15014   23.548029  14.662605]
With this action the energy is:  -0.102135
With this action the full dim is:  119  and princip dim is:  117
The new action:  [101.15014   23.548029  14.662605]  makes the energy positive:  False
The new action:  [101.15014   23.548029  14.662605]  makes the energy greater than:  -0.102135  the previous one:  True
The new action:  [101.15014   23.548029  14.662605]  makes the energy less than: -0.1026 False
The new acti

With this action the energy is:  -0.102158
With this action the full dim is:  121  and princip dim is:  119
The new action:  [80.022545 11.598393 19.665382]  makes the energy positive:  False
The new action:  [80.022545 11.598393 19.665382]  makes the energy greater than:  -0.102137  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000021
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [100.27588   12.33791   23.890667]
With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  120
The new action:  [100.27588   12.33791   23.890667]  makes the energy positive:  False
The new action:  [100.27588   12.33791   23.890667]  makes the energy greater than:  -0.102158  the previous one:  True
The new action:  [100.27588   12.33791   23.890667]  makes the energy less than: -0.1026 False
The new action:  [100.27588 

With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  119
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater than:  -0.102158  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1000000e+02 1.4764741e+01 9.9999998e-03]
With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  120
The new action:  [1.1000000e+02 1.4764741e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+0

With this action the energy is:  -0.102156
With this action the full dim is:  122  and princip dim is:  120
The new action:  [110.        44.9869    15.376035]  makes the energy positive:  False
The new action:  [110.        44.9869    15.376035]  makes the energy greater than:  -0.102158  the previous one:  True
The new action:  [110.        44.9869    15.376035]  makes the energy less than: -0.1026 False
The new action:  [110.        44.9869    15.376035]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.002000000000002
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        42.874634  20.52514 ]
With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  120
The new action:  [110.        42.874634  20.52514 ]  makes the energy positive:  Fals

With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  120
The new action:  [1.100000e+02 8.829301e+01 1.000000e-02]  makes the energy positive:  False
The new action:  [1.100000e+02 8.829301e+01 1.000000e-02]  makes the energy greater than:  -0.102158  the previous one:  True
The new action:  [1.100000e+02 8.829301e+01 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [1.100000e+02 8.829301e+01 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.100000e+02 4.754707e+01 1.000000e-02]
With this action the energy is:  -0.102158
With this action the full dim is:  122  and princip dim is:  120
The new action:  [1.100000e+02 4.754707e+01 1.000000e-02]  makes the 

With this action the energy is:  -0.102159
With this action the full dim is:  123  and princip dim is:  121
The new action:  [1.0418001e+02 9.9999998e-03 2.5201380e+01]  makes the energy positive:  False
The new action:  [1.0418001e+02 9.9999998e-03 2.5201380e+01]  makes the energy greater than:  -0.102159  the previous one:  True
The new action:  [1.0418001e+02 9.9999998e-03 2.5201380e+01]  makes the energy less than: -0.1026 False
The new action:  [1.0418001e+02 9.9999998e-03 2.5201380e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1000000e+02 9.9999998e-03 2.2738327e+01]
With this action the energy is:  -0.102159
With this action the full dim is:  123  and princip dim is:  121
The new action:  [1.1000000e+02 9.9999998e-03 2.273832

With this action the energy is:  -0.102227
With this action the full dim is:  126  and princip dim is:  123
The new action:  [70.811615   3.3647168 45.47178  ]  makes the energy positive:  False
The new action:  [70.811615   3.3647168 45.47178  ]  makes the energy greater than:  -0.102226  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 1.000001
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [70.72122    9.5609665 44.205902 ]
With this action the energy is:  -0.102227
With this action the full dim is:  127  and princip dim is:  124
The new action:  [70.72122    9.5609665 44.205902 ]  makes the energy positive:  False
The new action:  [70.72122    9.5609665 44.205902 ]  makes the energy greater than:  -0.102227  the previous one:  True
The new action:  [70.72122    9.5609665 44.205902 ]  makes the energy less than: -0.1026 False
The new action:  [70.7

With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  126
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater than:  -0.102247  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1000000e+02 3.5264404e+00 9.9999998e-03]
With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  126
The new action:  [1.1000000e+02 3.5264404e+00 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+0

With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  127
The new action:  [1.0277162e+02 9.9999998e-03 2.2245123e+01]  makes the energy positive:  False
The new action:  [1.0277162e+02 9.9999998e-03 2.2245123e+01]  makes the energy greater than:  -0.102247  the previous one:  True
The new action:  [1.0277162e+02 9.9999998e-03 2.2245123e+01]  makes the energy less than: -0.1026 False
The new action:  [1.0277162e+02 9.9999998e-03 2.2245123e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.440321e+01 1.000000e-02 1.000000e-02]
With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  126
The new action:  [9.440321e+01 1.000000e-02 1.000000e-02]

With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  126
The new action:  [7.8826927e+01 9.9999998e-03 2.1035497e+00]  makes the energy positive:  False
The new action:  [7.8826927e+01 9.9999998e-03 2.1035497e+00]  makes the energy greater than:  -0.102247  the previous one:  True
The new action:  [7.8826927e+01 9.9999998e-03 2.1035497e+00]  makes the energy less than: -0.1026 False
The new action:  [7.8826927e+01 9.9999998e-03 2.1035497e+00]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [8.3383698e+01 9.9999998e-03 1.8249971e+01]
With this action the energy is:  -0.102247
With this action the full dim is:  130  and princip dim is:  127
The new action:  [8.3383698e+01 9.9999998e-03 1.824997

With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [1.1000000e+02 4.7709765e+00 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+02 4.7709765e+00 9.9999998e-03]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [1.1000000e+02 4.7709765e+00 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 4.7709765e+00 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [8.7964905e+01 9.9999998e-03 9.9999998e-03]
With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [8.7964905e+01 9.9999998e-03 9.999999

With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [9.821646e+01 1.000000e-02 1.000000e-02]  makes the energy positive:  False
The new action:  [9.821646e+01 1.000000e-02 1.000000e-02]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [9.821646e+01 1.000000e-02 1.000000e-02]  makes the energy less than: -0.1026 False
The new action:  [9.821646e+01 1.000000e-02 1.000000e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.0e-02]
With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The ne

With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  129
The new action:  [9.9645218e+01 9.9999998e-03 3.4981277e+01]  makes the energy positive:  False
The new action:  [9.9645218e+01 9.9999998e-03 3.4981277e+01]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [9.9645218e+01 9.9999998e-03 3.4981277e+01]  makes the energy less than: -0.1026 False
The new action:  [9.9645218e+01 9.9999998e-03 3.4981277e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.8672676e+01 9.9999998e-03 2.5497015e+01]
With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  129
The new action:  [9.8672676e+01 9.9999998e-03 2.549701

With this action the energy is:  -0.102331
With this action the full dim is:  132  and princip dim is:  128
The new action:  [110.        16.376083  16.356775]  makes the energy positive:  False
The new action:  [110.        16.376083  16.356775]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [110.        16.376083  16.356775]  makes the energy less than: -0.1026 False
The new action:  [110.        16.376083  16.356775]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.002999999999989
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.4981659e+01 1.6637108e+01 9.9999998e-03]
With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  129
The new action:  [9.4981659e+01 1.6637108e+01 9.9999998e-03]  makes the ener

With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.0e-02]
With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.0e-02]  makes the energy great

With this action the energy is:  -0.102334
With this action the full dim is:  132  and princip dim is:  128
The new action:  [1.1000000e+02 9.9999998e-03 4.6605515e+00]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 4.6605515e+00]  makes the energy greater than:  -0.102334  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 4.6605515e+00]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 4.6605515e+00]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.          8.354917    5.3291993]
With this action the energy is:  -0.102362
With this action the full dim is:  132  and princip dim is:  129
The new action:  [110.          8.354917    5.3291993]  make

With this action the energy is:  -0.102362
With this action the full dim is:  133  and princip dim is:  129
The new action:  [9.1616234e+01 9.9999998e-03 9.9999998e-03]  makes the energy positive:  False
The new action:  [9.1616234e+01 9.9999998e-03 9.9999998e-03]  makes the energy greater than:  -0.102362  the previous one:  True
The new action:  [9.1616234e+01 9.9999998e-03 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [9.1616234e+01 9.9999998e-03 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [5.3944767e+01 9.9999998e-03 9.9999998e-03]
With this action the energy is:  -0.102362
With this action the full dim is:  133  and princip dim is:  129
The new action:  [5.3944767e+01 9.9999998e-03 9.999999

With this action the energy is:  -0.102361
With this action the full dim is:  133  and princip dim is:  130
The new action:  [110.        26.836416  43.469128]  makes the energy positive:  False
The new action:  [110.        26.836416  43.469128]  makes the energy greater than:  -0.102362  the previous one:  True
The new action:  [110.        26.836416  43.469128]  makes the energy less than: -0.1026 False
The new action:  [110.        26.836416  43.469128]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        15.119341  39.268066]
With this action the energy is:  -0.102361
With this action the full dim is:  133  and princip dim is:  129
The new action:  [110.        15.119341  39.268066]  makes the energy positive:  Fals

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.1000000e+02 6.2356216e+01 9.9999998e-03]  makes the energy positive:  False
The new action:  [1.1000000e+02 6.2356216e+01 9.9999998e-03]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [1.1000000e+02 6.2356216e+01 9.9999998e-03]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 6.2356216e+01 9.9999998e-03]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.433551e+01 7.827929e+01 1.000000e-02]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [9.433551e+01 7.827929e+01 1.000000e-02]

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [7.8911804e+01 9.9999998e-03 4.1302933e+01]  makes the energy positive:  False
The new action:  [7.8911804e+01 9.9999998e-03 4.1302933e+01]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [7.8911804e+01 9.9999998e-03 4.1302933e+01]  makes the energy less than: -0.1026 False
The new action:  [7.8911804e+01 9.9999998e-03 4.1302933e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.436800e+01 1.000000e-02 3.913082e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [6.436800e+01 1.000000e-02 3.913082e+01]

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.1000000e+02 9.9999998e-03 2.6224846e+01]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 2.6224846e+01]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 2.6224846e+01]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 2.6224846e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.0061886e+02 9.9999998e-03 1.0447090e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.0061886e+02 9.9999998e-03 1.044709

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [7.9916252e+01 9.9999998e-03 8.4494835e+01]  makes the energy positive:  False
The new action:  [7.9916252e+01 9.9999998e-03 8.4494835e+01]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [7.9916252e+01 9.9999998e-03 8.4494835e+01]  makes the energy less than: -0.1026 False
The new action:  [7.9916252e+01 9.9999998e-03 8.4494835e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.100000e+02 1.000000e-02 9.729814e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.100000e+02 1.000000e-02 9.729814e+01]

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.100000e+02 1.000000e-02 7.192881e+01]  makes the energy positive:  False
The new action:  [1.100000e+02 1.000000e-02 7.192881e+01]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [1.100000e+02 1.000000e-02 7.192881e+01]  makes the energy less than: -0.1026 False
The new action:  [1.100000e+02 1.000000e-02 7.192881e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.100000e+02 1.000000e-02 8.555326e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.100000e+02 1.000000e-02 8.555326e+01]  makes the 

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.100000e+02 1.000000e-02 6.911589e+01]  makes the energy positive:  False
The new action:  [1.100000e+02 1.000000e-02 6.911589e+01]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [1.100000e+02 1.000000e-02 6.911589e+01]  makes the energy less than: -0.1026 False
The new action:  [1.100000e+02 1.000000e-02 6.911589e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1000000e+02 9.9999998e-03 4.5776505e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [1.1000000e+02 9.9999998e-03 4.5776505e+01]  make

With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [ 85.789154  17.700537 110.      ]  makes the energy positive:  False
The new action:  [ 85.789154  17.700537 110.      ]  makes the energy greater than:  -0.102368  the previous one:  True
The new action:  [ 85.789154  17.700537 110.      ]  makes the energy less than: -0.1026 False
The new action:  [ 85.789154  17.700537 110.      ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        11.314228 101.43028 ]
With this action the energy is:  -0.102368
With this action the full dim is:  135  and princip dim is:  131
The new action:  [110.        11.314228 101.43028 ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  131
The new action:  [101.59058   28.110197 110.      ]  makes the energy positive:  False
The new action:  [101.59058   28.110197 110.      ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [101.59058   28.110197 110.      ]  makes the energy less than: -0.1026 False
The new action:  [101.59058   28.110197 110.      ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 81.99759   32.218346 110.      ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  131
The new action:  [ 81.99759   32.218346 110.      ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  131
The new action:  [110.        29.613808 107.996864]  makes the energy positive:  False
The new action:  [110.        29.613808 107.996864]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.        29.613808 107.996864]  makes the energy less than: -0.1026 False
The new action:  [110.        29.613808 107.996864]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [101.36945   19.468513 110.      ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [101.36945   19.468513 110.      ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [ 70.94887     1.1075342 110.       ]  makes the energy positive:  False
The new action:  [ 70.94887     1.1075342 110.       ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [ 70.94887     1.1075342 110.       ]  makes the energy less than: -0.1026 False
The new action:  [ 70.94887     1.1075342 110.       ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 60.461075  21.78131  110.      ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [ 60.461075  21.78131  110.      ]  makes the energy positive:  False


With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  131
The new action:  [5.3834908e+01 9.9999998e-03 1.0979414e+02]  makes the energy positive:  False
The new action:  [5.3834908e+01 9.9999998e-03 1.0979414e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [5.3834908e+01 9.9999998e-03 1.0979414e+02]  makes the energy less than: -0.1026 False
The new action:  [5.3834908e+01 9.9999998e-03 1.0979414e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.257811e+01 1.000000e-02 9.744846e+01]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [6.257811e+01 1.000000e-02 9.744846e+01]

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.004405e+02 1.000000e-02 8.452103e+01]  makes the energy positive:  False
The new action:  [1.004405e+02 1.000000e-02 8.452103e+01]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [1.004405e+02 1.000000e-02 8.452103e+01]  makes the energy less than: -0.1026 False
The new action:  [1.004405e+02 1.000000e-02 8.452103e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.10000000e+02 9.99999978e-03 1.04666405e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.10000000e+02 9.99999978e-03 1

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.0690287e+02 9.9999998e-03 1.1000000e+02]  makes the energy positive:  False
The new action:  [1.0690287e+02 9.9999998e-03 1.1000000e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [1.0690287e+02 9.9999998e-03 1.1000000e+02]  makes the energy less than: -0.1026 False
The new action:  [1.0690287e+02 9.9999998e-03 1.1000000e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [7.266714e+01 1.000000e-02 1.100000e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [7.266714e+01 1.000000e-02

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the energy less than: -0.1026 False
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.1e+02]
With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the energy positive:  False
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes th

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.        10.602802 110.      ]  makes the energy positive:  False
The new action:  [110.        10.602802 110.      ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.        10.602802 110.      ]  makes the energy less than: -0.1026 False
The new action:  [110.        10.602802 110.      ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        14.459722 110.      ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.        14.459722 110.      ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [9.4299652e+01 9.9999998e-03 1.0727484e+02]  makes the energy positive:  False
The new action:  [9.4299652e+01 9.9999998e-03 1.0727484e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [9.4299652e+01 9.9999998e-03 1.0727484e+02]  makes the energy less than: -0.1026 False
The new action:  [9.4299652e+01 9.9999998e-03 1.0727484e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.1e+02 1.0e-02 1.1e+02]
With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.1e+02 1.0e-02 1.1e+02]  makes the ener

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.          1.5530468 107.4606   ]  makes the energy positive:  False
The new action:  [110.          1.5530468 107.4606   ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.          1.5530468 107.4606   ]  makes the energy less than: -0.1026 False
The new action:  [110.          1.5530468 107.4606   ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.       13.96565  76.4004 ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.       13.96565  76.4004 ]  makes the energy positive:  False
The ne

With this action the energy is:  -0.102343
With this action the full dim is:  136  and princip dim is:  130
The new action:  [ 89.029366  64.205986 110.      ]  makes the energy positive:  False
The new action:  [ 89.029366  64.205986 110.      ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [ 89.029366  64.205986 110.      ]  makes the energy less than: -0.1026 False
The new action:  [ 89.029366  64.205986 110.      ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0259999999999982
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 99.0315    45.564518 103.4108  ]
With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [ 99.0315    45.564518 103.4108  ]  makes the energy positive:  Fal

With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [97.469925 46.703247 91.41375 ]  makes the energy positive:  False
The new action:  [97.469925 46.703247 91.41375 ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [97.469925 46.703247 91.41375 ]  makes the energy less than: -0.1026 False
The new action:  [97.469925 46.703247 91.41375 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0099999999999962
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [72.84059  55.157467 77.89452 ]
With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [72.84059  55.157467 77.89452 ]  makes the energy positive:  False
The new action:

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.         5.566504  55.593475]  makes the energy positive:  False
The new action:  [110.         5.566504  55.593475]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.         5.566504  55.593475]  makes the energy less than: -0.1026 False
The new action:  [110.         5.566504  55.593475]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.       42.97055  50.86904]
With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  131
The new action:  [110.       42.97055  50.86904]  makes the energy positive:  False
The new action:  [

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [93.75066    1.4341407 95.39516  ]  makes the energy positive:  False
The new action:  [93.75066    1.4341407 95.39516  ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [93.75066    1.4341407 95.39516  ]  makes the energy less than: -0.1026 False
The new action:  [93.75066    1.4341407 95.39516  ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 78.784744   5.931831 109.72119 ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [ 78.784744   5.931831 109.72119 ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [ 78.19873   43.440937 110.      ]  makes the energy positive:  False
The new action:  [ 78.19873   43.440937 110.      ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [ 78.19873   43.440937 110.      ]  makes the energy less than: -0.1026 False
The new action:  [ 78.19873   43.440937 110.      ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0099999999999962
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [ 96.78198  52.69484 110.     ]
With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [ 96.78198  52.69484 110.     ]  makes the energy positive:  False
The

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.        11.397539  92.75119 ]  makes the energy positive:  False
The new action:  [110.        11.397539  92.75119 ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.        11.397539  92.75119 ]  makes the energy less than: -0.1026 False
The new action:  [110.        11.397539  92.75119 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        6.50645 110.     ]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.        6.50645 110.     ]  makes the energy positive:  False
The new action:  [

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.1000000e+02 9.9999998e-03 7.2688126e+01]  makes the energy positive:  False
The new action:  [1.1000000e+02 9.9999998e-03 7.2688126e+01]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [1.1000000e+02 9.9999998e-03 7.2688126e+01]  makes the energy less than: -0.1026 False
The new action:  [1.1000000e+02 9.9999998e-03 7.2688126e+01]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [9.5493958e+01 9.9999998e-03 3.3576393e+01]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [9.5493958e+01 9.999999

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [68.95561  11.744981 77.115875]  makes the energy positive:  False
The new action:  [68.95561  11.744981 77.115875]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [68.95561  11.744981 77.115875]  makes the energy less than: -0.1026 False
The new action:  [68.95561  11.744981 77.115875]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [7.639907e+01 1.000000e-02 8.153810e+01]
With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [7.639907e+01 1.000000e-02 8.153810e+01]  makes the energy positive:  Fals

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.         8.787773  78.04417 ]  makes the energy positive:  False
The new action:  [110.         8.787773  78.04417 ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.         8.787773  78.04417 ]  makes the energy less than: -0.1026 False
The new action:  [110.         8.787773  78.04417 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [110.        23.573114  69.87895 ]
With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.        23.573114  69.87895 ]  makes the energy positive:  False
The new acti

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [8.775376e+01 1.000000e-02 1.100000e+02]  makes the energy positive:  False
The new action:  [8.775376e+01 1.000000e-02 1.100000e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [8.775376e+01 1.000000e-02 1.100000e+02]  makes the energy less than: -0.1026 False
The new action:  [8.775376e+01 1.000000e-02 1.100000e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.4924324e+01 9.9999998e-03 1.1000000e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [6.4924324e+01 9.9999998e-03 1.1000000e+02]  make

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [7.7093605e+01 9.9999998e-03 1.0611711e+02]  makes the energy positive:  False
The new action:  [7.7093605e+01 9.9999998e-03 1.0611711e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [7.7093605e+01 9.9999998e-03 1.0611711e+02]  makes the energy less than: -0.1026 False
The new action:  [7.7093605e+01 9.9999998e-03 1.0611711e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.373206e+01 1.000000e-02 1.100000e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [6.373206e+01 1.000000e-02 1.100000e+02]

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [7.856921e+01 1.000000e-02 1.100000e+02]  makes the energy positive:  False
The new action:  [7.856921e+01 1.000000e-02 1.100000e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [7.856921e+01 1.000000e-02 1.100000e+02]  makes the energy less than: -0.1026 False
The new action:  [7.856921e+01 1.000000e-02 1.100000e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [6.050931e+01 1.000000e-02 1.100000e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [6.050931e+01 1.000000e-02 1.100000e+02]  makes the 

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [6.99842e+01 1.00000e-02 1.10000e+02]  makes the energy positive:  False
The new action:  [6.99842e+01 1.00000e-02 1.10000e+02]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [6.99842e+01 1.00000e-02 1.10000e+02]  makes the energy less than: -0.1026 False
The new action:  [6.99842e+01 1.00000e-02 1.10000e+02]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [5.870056e+01 1.000000e-02 1.100000e+02]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [5.870056e+01 1.000000e-02 1.100000e+02]  makes the energy posit

With this action the energy is:  -0.102368
With this action the full dim is:  136  and princip dim is:  131
The new action:  [53.170395 27.191355 86.41386 ]  makes the energy positive:  False
The new action:  [53.170395 27.191355 86.41386 ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [53.170395 27.191355 86.41386 ]  makes the energy less than: -0.1026 False
The new action:  [53.170395 27.191355 86.41386 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.001000000000001
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [57.38219  60.243298 79.94099 ]
With this action the energy is:  -0.102359
With this action the full dim is:  136  and princip dim is:  131
The new action:  [57.38219  60.243298 79.94099 ]  makes the energy positive:  False
The new action: 

With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [110.         6.455557  98.98727 ]  makes the energy positive:  False
The new action:  [110.         6.455557  98.98727 ]  makes the energy greater than:  -0.102369  the previous one:  True
The new action:  [110.         6.455557  98.98727 ]  makes the energy less than: -0.1026 False
The new action:  [110.         6.455557  98.98727 ]  makes the energy nan:  False
This action IS REMOVED from actions taken and sigmas, the enery is NOT STORED!
The energy is greater than previous energy --> Set reward:  -1.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
****CALL STEP****
Action chosen at step:  [1.100000e+02 1.000000e-02 8.746694e+01]
With this action the energy is:  -0.102369
With this action the full dim is:  136  and princip dim is:  132
The new action:  [1.100000e+02 1.000000e-02 8.746694e+01]  makes the energy positive:  False


## Random search as in original SVM

In [None]:
state = env.reset()
scores = []
step = 0
score = 0.0

while True:
    print(".....STEP.....", step)
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    step = step + 1
    score += reward
    scores.append(score)
    state = next_state
    if done:
        break