# Stochastic Variational Method with RL algorithms

In [1]:
import numpy as np
import gym
import torch
import subprocess

## Expoloring environment

In [2]:
env = gym.make('svm_env:svmEnv-v1', file_sigmas ="./svmCodeSVD/sigmas1.dat" )

obs_space = env.observation_space

print('###### Observation space ####### \n', obs_space)

state_size = env.observation_space.shape[-1]

print('###### Size of observation space ####### \n', state_size)

act_space = env.action_space

print('###### Action space ####### \n', act_space)

act_size = env.action_space.shape[-1]

print('###### Number of actions ####### \n', act_size)

state = env.reset()

print('##### State after reset ###### \n', state)

print('##### File where will be stored sigmas \n', env.file_sigmas)

###### Observation space ####### 
 Box(-inf, inf, (1,), float32)
###### Size of observation space ####### 
 1
###### Action space ####### 
 Box(-1.0, 1.0, (3,), float32)
###### Number of actions ####### 
 3
#### CALL RESET ####
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
##### State after reset ###### 
 [0.]
##### File where will be stored sigmas 
 ./svmCodeSVD/sigmas1.dat


## Twin Delayed DDPG (TD3) from `stable_baseline3`

In [None]:
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

# The noise objects for DDPG
action_noise = NormalActionNoise(mean=np.zeros(act_size), sigma=0.2 * np.ones(act_size))

model = DDPG("MlpPolicy", env, action_noise = action_noise, \
            learning_starts=150, batch_size=150, gamma=1.0, verbose=1, seed = 2)

# (policy, env, learning_rate=0.001, buffer_size=1000000,learning_starts=100, batch_size=100, 
# tau=0.005, gamma=0.99, train_freq=(1, 'episode'),  gradient_steps=- 1, action_noise=None, 
# replay_buffer_class=None, replay_buffer_kwargs=None,  optimize_memory_usage=False, 
# tensorboard_log=None, create_eval_env=False, policy_kwargs=None,  verbose=0, seed=None, 
# device='auto', _init_setup_model=True)

model.learn(total_timesteps=500, log_interval = 5, n_eval_episodes = 1)

# learn(total_timesteps, callback=None, log_interval=4, eval_env=None, eval_freq=- 1,
# n_eval_episodes=5, tb_log_name='DDPG', eval_log_path=None, reset_num_timesteps=True)

## PPO with GAE from `stable_baseline3` 

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

model = PPO("MlpPolicy", env, verbose=1, n_steps = 2, batch_size = 64, gamma = 1.0)

# classstable_baselines3.ppo.PPO(policy, env, learning_rate=0.0003, n_steps=2048, 
#         batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, 
#         clip_range_vf=None, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, 
#         use_sde=False, sde_sample_freq=- 1, target_kl=None, tensorboard_log=None, 
#         create_eval_env=False, policy_kwargs=None, verbose=0, seed=None, device='auto', 
#         _init_setup_model=True)

model.learn(total_timesteps = 350, n_eval_episodes = 1)

# learn(total_timesteps, callback=None, log_interval=1, eval_env=None, eval_freq=- 1, 
#       n_eval_episodes=5, tb_log_name='PPO', eval_log_path=None, reset_num_timesteps=True)

model.save("ppo_svm")

In [None]:
model = PPO.load("ppo_svm")

obs = env.reset()
rewards = []
score = 0.0
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
    

## From my `ddpg_agent.py` code

In [3]:
from ddpg_agent import Agent
agent = Agent(1, 3, random_seed=2)

In [4]:
## Save all rewards, energies and princip dims in files during training

def save_all(agent, rewards, energies, princip_dims):
    torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
    torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

    name_rewards = 'rewards_RL_0.out'
    file_rewards = open(name_rewards,'w')
    np.savetxt(file_rewards, rewards, fmt="%f")
    file_rewards.close()

    name_energies = 'energies_RL_0.out'
    file_energies = open(name_energies,'w')
    np.savetxt(file_energies, energies, fmt="%f")
    file_energies.close()

    name_dim = 'princip_dims_RL_0.out'
    file_dim = open(name_dim,'w')
    np.savetxt(file_dim, princip_dims, fmt="%f")
    file_dim.close()

def run_ddpg(max_t_step = 300, n_episodes=700):
    ##Inizialization
    rewards = []
    energies = []  
    princip_dims = []

    for i_episode in range(n_episodes):
        state = env.reset()
        agent.reset()
        rew_per_i_episode = []
        energies_per_i_episode = []
        princip_dim_per_i_episode = []

        ## Training loop of each episode
        for t_step in range(max_t_step):
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state

            # Save
            rew_per_i_episode.append(reward)
            energies_per_i_episode.append(state[0])
            princip_dim_per_i_episode.append(env.princp_dim)
            if done:
                break
                
        ## Save data during training (to not lose the work done)
        rewards.append(rew_per_i_episode)
        energies.append(energies_per_i_episode)
        princip_dims.append(princip_dim_per_i_episode)

        save_all(agent, rewards, energies, princip_dims)

        print('Episode {} ... Score: {:.3f}'.format(i_episode, np.sum(rewards[i_episode])))

        
    return rewards, energies, princip_dims


In [None]:
all_rewards, all_energies, all_princip_dim = run_ddpg()

#### CALL RESET ####
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
#### CALL STEP #### 1
Action chosen at step:  [36.242744 48.282074 51.0255  ]
Basis size (it should be the same of full dim) =   1
With this action the energy is:  0.0274688
With this action the full dim is:  1  and princip dim is:  1
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -2.370702015662202
#### CALL STEP #### 2
Action chosen at step:  [28.180555 46.97483  61.97136 ]
Basis size (it should be the same of full dim) =   2
With this action the energy is:  0.0187526
With this action the full dim is:  2  and princip dim is:  2
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -1.764582143013298
#### CALL STEP #### 3
Action chosen at step:  [45.071487 62.3534   99.84169 ]
Basis size (it should be the same of full dim) =   3
With this action the energy is:  0.00629539
With this action the full dim is:  3  and princip dim is:  3
###

With this action the energy is:  -0.00778749
With this action the full dim is:  25  and princip dim is:  25
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08100147953901171
#### CALL STEP #### 26
Action chosen at step:  [ 57.30811   63.368137 100.421486]
Basis size (it should be the same of full dim) =   26
With this action the energy is:  -0.00790771
With this action the full dim is:  26  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08936151385326063
#### CALL STEP #### 27
Action chosen at step:  [ 52.38388   61.459126 110.      ]
Basis size (it should be the same of full dim) =   27
With this action the energy is:  -0.00832902
With this action the full dim is:  27  and princip dim is:  27
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.11865918526347308
#### CALL STEP #### 28
Action chosen at step:  [ 39.17717   41.188522 110.      ]
Basis size (it should be the same of full dim)

With this action the energy is:  -0.0844007
With this action the full dim is:  46  and princip dim is:  46
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  5.408643007776511
#### CALL STEP #### 55
Action chosen at step:  [57.232204 76.42425   3.586071]
Basis size (it should be the same of full dim) =   47
With this action the energy is:  -0.0845126
With this action the full dim is:  47  and princip dim is:  47
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  5.4164244737535885
#### CALL STEP #### 56
Action chosen at step:  [67.70469 77.47153  0.     ]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
#### CALL STEP #### 57
Action chosen at step:  [84.68733  68.408905  0.      ]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
#### CALL STEP #### 58
Action chosen at step:  [76.21104  50.67

With this action the energy is:  -0.088751
With this action the full dim is:  68  and princip dim is:  67
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  5.711160536284866
#### CALL STEP #### 83
Action chosen at step:  [69.45013 54.84677  0.     ]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
#### CALL STEP #### 84
Action chosen at step:  [58.68162  54.116302 11.875076]
Basis size (it should be the same of full dim) =   69
With this action the energy is:  -0.088816
With this action the full dim is:  69  and princip dim is:  68
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  5.715680601419005
#### CALL STEP #### 85
Action chosen at step:  [82.80615  45.434414  9.045956]
Basis size (it should be the same of full dim) =   70
With this action the energy is:  -0.0891793
With this action the full dim is:  70  and princip dim is:  69
#### THE ACTION IS A GOOD ONE ##

With this action the energy is:  -0.0941029
With this action the full dim is:  91  and princip dim is:  83
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.083328791537117
#### CALL STEP #### 109
Action chosen at step:  [110.        46.18246   20.461132]
Basis size (it should be the same of full dim) =   92
With this action the energy is:  -0.094149
With this action the full dim is:  92  and princip dim is:  84
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.086534560809174
#### CALL STEP #### 110
Action chosen at step:  [110.        85.228714  40.862106]
Basis size (it should be the same of full dim) =   93
With this action the energy is:  -0.0941587
With this action the full dim is:  93  and princip dim is:  84
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.0872090936061145
#### CALL STEP #### 111
Action chosen at step:  [110.        67.1076    42.311523]
Basis size (it should be the same of full dim) =   9

With this action the energy is:  -0.0960641
With this action the full dim is:  114  and princip dim is:  96
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.219709587553618
#### CALL STEP #### 135
Action chosen at step:  [110.        62.35652    6.856861]
Basis size (it should be the same of full dim) =   115
With this action the energy is:  -0.0964991
With this action the full dim is:  115  and princip dim is:  97
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.2499592542205455
#### CALL STEP #### 136
Action chosen at step:  [110.        64.45867   15.573143]
Basis size (it should be the same of full dim) =   116
With this action the energy is:  -0.0966057
With this action the full dim is:  116  and princip dim is:  98
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.257372161040532
#### CALL STEP #### 137
Action chosen at step:  [110.        61.7678    26.041002]
Basis size (it should be the same of full dim)

With this action the energy is:  -0.102311
With this action the full dim is:  137  and princip dim is:  109
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.654115662729776
#### CALL STEP #### 162
Action chosen at step:  [70.15787  13.264423 38.01553 ]
Basis size (it should be the same of full dim) =   138
With this action the energy is:  -0.102318
With this action the full dim is:  138  and princip dim is:  110
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.654602438974992
#### CALL STEP #### 163
Action chosen at step:  [85.90958  10.540775 50.998055]
Basis size (it should be the same of full dim) =   139
With this action the energy is:  -0.102339
With this action the full dim is:  139  and princip dim is:  111
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  6.656062767710637
#### CALL STEP #### 164
Action chosen at step:  [92.98512   0.       75.297516]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This acti

With this action the energy is:  -0.137158
With this action the full dim is:  151  and princip dim is:  121
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.077357350873273
#### CALL STEP #### 194
Action chosen at step:  [93.708786 25.50611  34.7396  ]
Basis size (it should be the same of full dim) =   152
With this action the energy is:  -0.137156
With this action the full dim is:  152  and princip dim is:  122
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.077218271946068
#### CALL STEP #### 195
Action chosen at step:  [110.       38.28803  64.09686]
Basis size (it should be the same of full dim) =   153
With this action the energy is:  -0.137155
With this action the full dim is:  153  and princip dim is:  123
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.077148732482465
#### CALL STEP #### 196
Action chosen at step:  [110.        29.779861  67.68159 ]
Basis size (it should be the same of full dim) =   15

With this action the energy is:  -0.140837
With this action the full dim is:  174  and princip dim is:  132
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.333193037465515
#### CALL STEP #### 220
Action chosen at step:  [31.29763  15.874653 83.28778 ]
Basis size (it should be the same of full dim) =   175
With this action the energy is:  -0.140881
With this action the full dim is:  175  and princip dim is:  133
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.33625277386401
#### CALL STEP #### 221
Action chosen at step:  [51.904293 29.59704  94.50176 ]
Basis size (it should be the same of full dim) =   176
With this action the energy is:  -0.14088
With this action the full dim is:  176  and princip dim is:  133
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.336183234400409
#### CALL STEP #### 222
Action chosen at step:  [45.52243   8.829254 86.692696]
Basis size (it should be the same of full dim) =   177
Wit

With this action the energy is:  -0.141152
With this action the full dim is:  195  and princip dim is:  140
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.355097968500187
#### CALL STEP #### 247
Action chosen at step:  [69.09964  22.772099 58.02116 ]
Basis size (it should be the same of full dim) =   196
With this action the energy is:  -0.141151
With this action the full dim is:  196  and princip dim is:  140
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.355028429036585
#### CALL STEP #### 248
Action chosen at step:  [41.223892  0.       60.012966]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
#### CALL STEP #### 249
Action chosen at step:  [40.09336  0.      52.21229]
**** ILLEGAL ACTION **** --> Set reward: -10.0
This action IS REMOVED from actions taken and sigmas, the energy is NOT STORED!
#### CALL STEP #### 250
Action chosen at step:  [57.133926

With this action the energy is:  -0.141134
With this action the full dim is:  216  and princip dim is:  144
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.35384625815535
#### CALL STEP #### 274
Action chosen at step:  [64.71934 68.31092 97.95993]
Basis size (it should be the same of full dim) =   217
With this action the energy is:  -0.141134
With this action the full dim is:  217  and princip dim is:  144
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.35384625815535
#### CALL STEP #### 275
Action chosen at step:  [ 52.148415  65.078224 106.72301 ]
Basis size (it should be the same of full dim) =   218
With this action the energy is:  -0.141134
With this action the full dim is:  218  and princip dim is:  144
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  9.35384625815535
#### CALL STEP #### 276
Action chosen at step:  [ 55.888893  79.1186   110.      ]
Basis size (it should be the same of full dim) =   219
W

With this action the energy is:  0.0114808
With this action the full dim is:  1  and princip dim is:  1
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -1.2589050715913146
#### CALL STEP #### 2
Action chosen at step:  [ 19.320023  66.5029   106.90942 ]
Basis size (it should be the same of full dim) =   2
With this action the energy is:  0.00933428
With this action the full dim is:  2  and princip dim is:  2
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -1.1096372221800657
#### CALL STEP #### 3
Action chosen at step:  [ 10.106224  64.86205  110.      ]
Basis size (it should be the same of full dim) =   3
With this action the energy is:  -0.0160213
With this action the full dim is:  3  and princip dim is:  3
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.6535762103408818
#### CALL STEP #### 4
Action chosen at step:  [ 10.579578  60.549564 110.      ]
Basis size (it should be the same of full dim) =   4
With this

With this action the energy is:  -0.132647
With this action the full dim is:  21  and princip dim is:  21
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.763664830564053
#### CALL STEP #### 30
Action chosen at step:  [ 17.934208  79.81407  109.69159 ]
Basis size (it should be the same of full dim) =   22
With this action the energy is:  -0.132715
With this action the full dim is:  22  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.768393514088999
#### CALL STEP #### 31
Action chosen at step:  [ 12.924324 106.09877  110.      ]
Basis size (it should be the same of full dim) =   23
With this action the energy is:  -0.132751
With this action the full dim is:  23  and princip dim is:  23
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.770896934778676
#### CALL STEP #### 32
Action chosen at step:  [ 32.662235 110.       110.      ]
Basis size (it should be the same of full dim) =   24
With

With this action the energy is:  -0.133694
With this action the full dim is:  44  and princip dim is:  44
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.836472648955487
#### CALL STEP #### 58
Action chosen at step:  [ 34.12092 110.      110.     ]
Basis size (it should be the same of full dim) =   45
With this action the energy is:  -0.133696
With this action the full dim is:  45  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.83661172788269
#### CALL STEP #### 59
Action chosen at step:  [ 32.200485 107.38692  110.      ]
Basis size (it should be the same of full dim) =   46
With this action the energy is:  -0.13371
With this action the full dim is:  46  and princip dim is:  46
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.837585280373121
#### CALL STEP #### 60
Action chosen at step:  [ 29.35998 110.      110.     ]
Basis size (it should be the same of full dim) =   47
With this ac

With this action the energy is:  -0.133837
With this action the full dim is:  69  and princip dim is:  61
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846416792250592
#### CALL STEP #### 83
Action chosen at step:  [ 60.581966 110.       110.      ]
Basis size (it should be the same of full dim) =   70
With this action the energy is:  -0.133836
With this action the full dim is:  70  and princip dim is:  61
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846347252786991
#### CALL STEP #### 84
Action chosen at step:  [ 61.804474  85.71949  110.      ]
Basis size (it should be the same of full dim) =   71
With this action the energy is:  -0.133836
With this action the full dim is:  71  and princip dim is:  62
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846347252786991
#### CALL STEP #### 85
Action chosen at step:  [ 56.057602  95.96617  110.      ]
Basis size (it should be the same of full dim) =   72
With

With this action the energy is:  -0.133848
With this action the full dim is:  95  and princip dim is:  64
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.847181726350215
#### CALL STEP #### 109
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   96
With this action the energy is:  -0.133848
With this action the full dim is:  96  and princip dim is:  64
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.847181726350215
#### CALL STEP #### 110
Action chosen at step:  [110.        87.759186 110.      ]
Basis size (it should be the same of full dim) =   97
With this action the energy is:  -0.133848
With this action the full dim is:  97  and princip dim is:  64
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.847181726350215
#### CALL STEP #### 111
Action chosen at step:  [110.       67.24088 110.     ]
Basis size (it should be the same of full dim) =   98
With this action the e

With this action the energy is:  -0.133844
With this action the full dim is:  120  and princip dim is:  65
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846903568495806
#### CALL STEP #### 134
Action chosen at step:  [110.       104.136444 110.      ]
Basis size (it should be the same of full dim) =   121
With this action the energy is:  -0.133844
With this action the full dim is:  121  and princip dim is:  65
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846903568495806
#### CALL STEP #### 135
Action chosen at step:  [102.326645 110.       110.      ]
Basis size (it should be the same of full dim) =   122
With this action the energy is:  -0.133844
With this action the full dim is:  122  and princip dim is:  65
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.846903568495806
#### CALL STEP #### 136
Action chosen at step:  [102.159424 110.       110.      ]
Basis size (it should be the same of full dim) =  

With this action the energy is:  -0.133969
With this action the full dim is:  145  and princip dim is:  68
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.855596001446074
#### CALL STEP #### 159
Action chosen at step:  [69.52298 61.35992 88.02478]
Basis size (it should be the same of full dim) =   146
With this action the energy is:  -0.133997
With this action the full dim is:  146  and princip dim is:  69
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.857543106426933
#### CALL STEP #### 160
Action chosen at step:  [54.817486 65.800064 98.40703 ]
Basis size (it should be the same of full dim) =   147
With this action the energy is:  -0.134001
With this action the full dim is:  147  and princip dim is:  70
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.857821264281341
#### CALL STEP #### 161
Action chosen at step:  [ 73.76558  97.48053 110.     ]
Basis size (it should be the same of full dim) =   148
With th

With this action the energy is:  -0.134031
With this action the full dim is:  170  and princip dim is:  72
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859907448189405
#### CALL STEP #### 184
Action chosen at step:  [110.       71.65931  95.26349]
Basis size (it should be the same of full dim) =   171
With this action the energy is:  -0.134031
With this action the full dim is:  171  and princip dim is:  72
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859907448189405
#### CALL STEP #### 185
Action chosen at step:  [110.       83.61612 110.     ]
Basis size (it should be the same of full dim) =   172
With this action the energy is:  -0.134031
With this action the full dim is:  172  and princip dim is:  72
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859907448189405
#### CALL STEP #### 186
Action chosen at step:  [110.       92.17793 110.     ]
Basis size (it should be the same of full dim) =   173
With

With this action the energy is:  -0.134032
With this action the full dim is:  195  and princip dim is:  72
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859976987653008
#### CALL STEP #### 209
Action chosen at step:  [110.        49.045105  64.5439  ]
Basis size (it should be the same of full dim) =   196
With this action the energy is:  -0.134036
With this action the full dim is:  196  and princip dim is:  73
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860255145507415
#### CALL STEP #### 210
Action chosen at step:  [110.       64.36097  77.18455]
Basis size (it should be the same of full dim) =   197
With this action the energy is:  -0.134037
With this action the full dim is:  197  and princip dim is:  73
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860324684971017
#### CALL STEP #### 211
Action chosen at step:  [110.       68.61258  86.87231]
Basis size (it should be the same of full dim) =   198
W

With this action the energy is:  -0.134036
With this action the full dim is:  220  and princip dim is:  73
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860255145507415
#### CALL STEP #### 234
Action chosen at step:  [110.        87.665794 110.      ]
Basis size (it should be the same of full dim) =   221
With this action the energy is:  -0.134036
With this action the full dim is:  221  and princip dim is:  73
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860255145507415
#### CALL STEP #### 235
Action chosen at step:  [110.        92.483315 110.      ]
Basis size (it should be the same of full dim) =   222
With this action the energy is:  -0.134036
With this action the full dim is:  222  and princip dim is:  73
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860255145507415
#### CALL STEP #### 236
Action chosen at step:  [101.49131  82.74222 108.36426]
Basis size (it should be the same of full dim) =   22

With this action the energy is:  -0.134032
With this action the full dim is:  245  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859976987653008
#### CALL STEP #### 259
Action chosen at step:  [ 95.52661 110.      110.     ]
Basis size (it should be the same of full dim) =   246
With this action the energy is:  -0.134032
With this action the full dim is:  246  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859976987653008
#### CALL STEP #### 260
Action chosen at step:  [ 86.29565 110.      110.     ]
Basis size (it should be the same of full dim) =   247
With this action the energy is:  -0.134032
With this action the full dim is:  247  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.859976987653008
#### CALL STEP #### 261
Action chosen at step:  [ 79.09771 110.      110.     ]
Basis size (it should be the same of full dim) =   248
With

With this action the energy is:  -0.134046
With this action the full dim is:  270  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860950540143437
#### CALL STEP #### 284
Action chosen at step:  [110.       92.57071 110.     ]
Basis size (it should be the same of full dim) =   271
With this action the energy is:  -0.134046
With this action the full dim is:  271  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860950540143437
#### CALL STEP #### 285
Action chosen at step:  [110.      100.57918 110.     ]
Basis size (it should be the same of full dim) =   272
With this action the energy is:  -0.134046
With this action the full dim is:  272  and princip dim is:  75
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  8.860950540143437
#### CALL STEP #### 286
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   273
With this action th

With this action the energy is:  -7.58163e-35
With this action the full dim is:  8  and princip dim is:  8
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 9
Action chosen at step:  [103.14473 110.      110.     ]
Basis size (it should be the same of full dim) =   9
With this action the energy is:  -6.22832e-29
With this action the full dim is:  9  and princip dim is:  9
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 10
Action chosen at step:  [ 99.43628 110.      100.53493]
Basis size (it should be the same of full dim) =   10
With this action the energy is:  -2.41531e-28
With this action the full dim is:  10  and princip dim is:  10
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 11
Action chosen at step:  [ 97.49538 110.       82.87836]
Basis size (it should be the same of full dim) =   11
W

With this action the energy is:  0.00430208
With this action the full dim is:  33  and princip dim is:  17
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7597007334414148
#### CALL STEP #### 34
Action chosen at step:  [110.        45.050713 108.16087 ]
Basis size (it should be the same of full dim) =   34
With this action the energy is:  9.50829e-17
With this action the full dim is:  34  and princip dim is:  18
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786796197
#### CALL STEP #### 35
Action chosen at step:  [110.       42.59322 103.15099]
Basis size (it should be the same of full dim) =   35
With this action the energy is:  7.1735e-17
With this action the full dim is:  35  and princip dim is:  19
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786796197
#### CALL STEP #### 36
Action chosen at step:  [110.        60.088963  77.87172 ]
Basis size (it should be the same of full dim) = 

With this action the energy is:  -1.83052e-15
With this action the full dim is:  58  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786782696
#### CALL STEP #### 59
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   59
With this action the energy is:  -1.27604e-15
With this action the full dim is:  59  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978678678
#### CALL STEP #### 60
Action chosen at step:  [110.      105.11164 106.90582]
Basis size (it should be the same of full dim) =   60
With this action the energy is:  -6.75666e-15
With this action the full dim is:  60  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978674859
#### CALL STEP #### 61
Action chosen at step:  [110.       96.00751 110.     ]
Basis size (it should be the same of full dim) =   61
With this ac

With this action the energy is:  -4.11549e-14
With this action the full dim is:  83  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786509315
#### CALL STEP #### 84
Action chosen at step:  [101.33798   88.160965 110.      ]
Basis size (it should be the same of full dim) =   84
With this action the energy is:  1.35754e-17
With this action the full dim is:  84  and princip dim is:  23
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 85
Action chosen at step:  [108.71606 110.      110.     ]
Basis size (it should be the same of full dim) =   85
With this action the energy is:  -2.53829e-16
With this action the full dim is:  85  and princip dim is:  23
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 86
Action chosen at step:  [108.65726 108.84682 110.     ]
Basis size (it should be the same of full dim) =

With this action the energy is:  -2.06905e-15
With this action the full dim is:  108  and princip dim is:  28
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867811
#### CALL STEP #### 109
Action chosen at step:  [ 67.11635 110.      110.     ]
Basis size (it should be the same of full dim) =   109
With this action the energy is:  -1.51174e-15
With this action the full dim is:  109  and princip dim is:  28
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786785183
#### CALL STEP #### 110
Action chosen at step:  [ 36.55708 110.      106.67186]
Basis size (it should be the same of full dim) =   110
With this action the energy is:  -1.19253e-16
With this action the full dim is:  110  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794776
#### CALL STEP #### 111
Action chosen at step:  [ 38.660393 110.       103.87974 ]
Basis size (it should be the same of ful

With this action the energy is:  -3.41553e-15
With this action the full dim is:  133  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978677186
#### CALL STEP #### 134
Action chosen at step:  [110.     110.      78.1881]
Basis size (it should be the same of full dim) =   134
With this action the energy is:  -8.67098e-17
With this action the full dim is:  134  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794953
#### CALL STEP #### 135
Action chosen at step:  [110.      110.       51.17488]
Basis size (it should be the same of full dim) =   135
With this action the energy is:  -1.46324e-17
With this action the full dim is:  135  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 136
Action chosen at step:  [110.       110.        94.038635]
Basis size (it should be the same of full 

With this action the energy is:  -8.4663e-16
With this action the full dim is:  158  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786789624
#### CALL STEP #### 159
Action chosen at step:  [110.       104.300964  94.83107 ]
Basis size (it should be the same of full dim) =   159
With this action the energy is:  -8.66214e-17
With this action the full dim is:  159  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794953
#### CALL STEP #### 160
Action chosen at step:  [104.76337 110.      101.96608]
Basis size (it should be the same of full dim) =   160
With this action the energy is:  -1.34471e-17
With this action the full dim is:  160  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 161
Action chosen at step:  [100.42988  110.        69.614174]
Basis size (it should be the same of

With this action the energy is:  -6.15813e-17
With this action the full dim is:  183  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679513
#### CALL STEP #### 184
Action chosen at step:  [ 99.61926  110.        92.081055]
Basis size (it should be the same of full dim) =   184
With this action the energy is:  -2.70273e-15
With this action the full dim is:  184  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786776834
#### CALL STEP #### 185
Action chosen at step:  [110.      110.       99.92735]
Basis size (it should be the same of full dim) =   185
With this action the energy is:  -5.35398e-16
With this action the full dim is:  185  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786791933
#### CALL STEP #### 186
Action chosen at step:  [110.      110.       95.74393]
Basis size (it should be the same of fu

With this action the energy is:  -6.13407e-16
With this action the full dim is:  207  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867914
#### CALL STEP #### 209
Action chosen at step:  [ 88.015366 110.        86.5477  ]
Basis size (it should be the same of full dim) =   209
With this action the energy is:  -7.04492e-18
With this action the full dim is:  208  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 210
Action chosen at step:  [96.55003  86.87271  72.205574]
Basis size (it should be the same of full dim) =   210
With this action the energy is:  -2.54907e-17
With this action the full dim is:  209  and princip dim is:  34
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 211
Action chosen at step:  [110.      110.       89.16641]
Basis size (it should be the same of ful

With this action the energy is:  -7.83724e-16
With this action the full dim is:  232  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786790157
#### CALL STEP #### 234
Action chosen at step:  [100.257706 110.       110.      ]
Basis size (it should be the same of full dim) =   234
With this action the energy is:  -1.38764e-16
With this action the full dim is:  233  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867946
#### CALL STEP #### 235
Action chosen at step:  [110.      110.      108.89642]
Basis size (it should be the same of full dim) =   235
With this action the energy is:  -4.29199e-16
With this action the full dim is:  234  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786792644
#### CALL STEP #### 236
Action chosen at step:  [ 91.28029 110.      110.     ]
Basis size (it should be the same of ful

With this action the energy is:  -7.18223e-15
With this action the full dim is:  257  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978674557
#### CALL STEP #### 259
Action chosen at step:  [106.28397 110.      110.     ]
Basis size (it should be the same of full dim) =   259
With this action the energy is:  -7.46987e-15
With this action the full dim is:  258  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786743794
#### CALL STEP #### 260
Action chosen at step:  [101.70128 110.      110.     ]
Basis size (it should be the same of full dim) =   260
With this action the energy is:  -1.97038e-14
With this action the full dim is:  259  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786658707
#### CALL STEP #### 261
Action chosen at step:  [ 94.02319  97.88727 110.     ]
Basis size (it should be the same of full 

With this action the energy is:  -5.93754e-16
With this action the full dim is:  282  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679158
#### CALL STEP #### 284
Action chosen at step:  [ 71.796974 110.       110.      ]
Basis size (it should be the same of full dim) =   284
With this action the energy is:  2.93606e-18
With this action the full dim is:  283  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 285
Action chosen at step:  [ 83.4187   76.67985 110.     ]
Basis size (it should be the same of full dim) =   285
With this action the energy is:  -1.33746e-15
With this action the full dim is:  284  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978678625
#### CALL STEP #### 286
Action chosen at step:  [ 83.97492  75.21154 110.     ]
Basis size (it should be the same of full

With this action the energy is:  0.00658049
With this action the full dim is:  8  and princip dim is:  8
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.9181401427071503
#### CALL STEP #### 9
Action chosen at step:  [110.      110.       64.58859]
Basis size (it should be the same of full dim) =   9
With this action the energy is:  0.00581674
With this action the full dim is:  9  and princip dim is:  9
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8650293773810205
#### CALL STEP #### 10
Action chosen at step:  [110.       110.        52.764164]
Basis size (it should be the same of full dim) =   10
With this action the energy is:  0.00581494
With this action the full dim is:  10  and princip dim is:  10
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8649042063465355
#### CALL STEP #### 11
Action chosen at step:  [110.      110.       66.94863]
Basis size (it should be the same of full dim) =   11
With th

With this action the energy is:  0.00383484
With this action the full dim is:  33  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7272091144679571
#### CALL STEP #### 34
Action chosen at step:  [ 83.267006 110.       110.      ]
Basis size (it should be the same of full dim) =   34
With this action the energy is:  0.00383646
With this action the full dim is:  34  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7273217683989905
#### CALL STEP #### 35
Action chosen at step:  [ 74.916145 110.       110.      ]
Basis size (it should be the same of full dim) =   35
With this action the energy is:  0.00383796
With this action the full dim is:  35  and princip dim is:  22
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7274260775943944
#### CALL STEP #### 36
Action chosen at step:  [ 58.53457 110.      110.     ]
Basis size (it should be the same of full dim) =   3

With this action the energy is:  -4.19936e-15
With this action the full dim is:  58  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786766354
#### CALL STEP #### 59
Action chosen at step:  [108.069214  83.82599  110.      ]
Basis size (it should be the same of full dim) =   59
With this action the energy is:  -2.51603e-15
With this action the full dim is:  59  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867779
#### CALL STEP #### 60
Action chosen at step:  [108.40034  91.21826 102.51279]
Basis size (it should be the same of full dim) =   60
With this action the energy is:  -7.4276e-17
With this action the full dim is:  60  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794953
#### CALL STEP #### 61
Action chosen at step:  [110.       106.47203   93.306984]
Basis size (it should be the same of full dim)

With this action the energy is:  -2.9079e-17
With this action the full dim is:  83  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 84
Action chosen at step:  [ 83.89936 109.71535  99.51524]
Basis size (it should be the same of full dim) =   84
With this action the energy is:  7.12715e-18
With this action the full dim is:  84  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 85
Action chosen at step:  [ 65.44746 110.       92.0141 ]
Basis size (it should be the same of full dim) =   85
With this action the energy is:  -2.53467e-16
With this action the full dim is:  85  and princip dim is:  33
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 86
Action chosen at step:  [ 81.02488 110.       75.18872]
Basis size (it should be the same of full dim) =   8

With this action the energy is:  -2.41858e-15
With this action the full dim is:  108  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978677879
#### CALL STEP #### 109
Action chosen at step:  [109.76367 110.       79.53806]
Basis size (it should be the same of full dim) =   109
With this action the energy is:  -1.2759e-15
With this action the full dim is:  109  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978678678
#### CALL STEP #### 110
Action chosen at step:  [108.45375  90.43367  82.80656]
Basis size (it should be the same of full dim) =   110
With this action the energy is:  -2.12197e-16
With this action the full dim is:  110  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794065
#### CALL STEP #### 111
Action chosen at step:  [110.       82.12663  86.78846]
Basis size (it should be the same of full di

With this action the energy is:  -4.48872e-16
With this action the full dim is:  133  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786792466
#### CALL STEP #### 134
Action chosen at step:  [ 93.72902  106.262314  96.22192 ]
Basis size (it should be the same of full dim) =   134
With this action the energy is:  -4.11371e-16
With this action the full dim is:  134  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786792644
#### CALL STEP #### 135
Action chosen at step:  [103.64765   86.66445   95.707016]
Basis size (it should be the same of full dim) =   135
With this action the energy is:  -3.68064e-17
With this action the full dim is:  135  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 136
Action chosen at step:  [87.086334 97.30223  86.97978 ]
Basis size (it should be the same o

With this action the energy is:  -0.0204006
With this action the full dim is:  158  and princip dim is:  53
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9581103832936986
#### CALL STEP #### 159
Action chosen at step:  [49.01733  94.220116 74.29287 ]
Basis size (it should be the same of full dim) =   159
With this action the energy is:  -0.0204922
With this action the full dim is:  159  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.964480198159654
#### CALL STEP #### 160
Action chosen at step:  [72.001175 92.36627  82.74171 ]
Basis size (it should be the same of full dim) =   160
With this action the energy is:  -0.0204926
With this action the full dim is:  160  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9645080139450943
#### CALL STEP #### 161
Action chosen at step:  [103.541855 100.54959   56.867466]
Basis size (it should be the same of full dim) =   

With this action the energy is:  -0.0205449
With this action the full dim is:  183  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9681449278914869
#### CALL STEP #### 184
Action chosen at step:  [ 80.61484  100.05473   61.752136]
Basis size (it should be the same of full dim) =   184
With this action the energy is:  -0.0205433
With this action the full dim is:  184  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9680336647497239
#### CALL STEP #### 185
Action chosen at step:  [77.53572  87.94448  62.212395]
Basis size (it should be the same of full dim) =   185
With this action the energy is:  -0.0205413
With this action the full dim is:  185  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9678945858225187
#### CALL STEP #### 186
Action chosen at step:  [90.602425 96.49124  87.453384]
Basis size (it should be the same of full dim) =  

With this action the energy is:  -0.0205235
With this action the full dim is:  208  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9666567833704018
#### CALL STEP #### 209
Action chosen at step:  [100.132675 110.       110.      ]
Basis size (it should be the same of full dim) =   209
With this action the energy is:  -0.0205229
With this action the full dim is:  209  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9666150596922414
#### CALL STEP #### 210
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   210
With this action the energy is:  -0.020523
With this action the full dim is:  210  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9666220136386006
#### CALL STEP #### 211
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   211
With this action the energ

With this action the energy is:  -0.020516
With this action the full dim is:  233  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9661352373933845
#### CALL STEP #### 234
Action chosen at step:  [ 79.43536 107.40517 110.     ]
Basis size (it should be the same of full dim) =   234
With this action the energy is:  -0.0205151
With this action the full dim is:  234  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9660726518761447
#### CALL STEP #### 235
Action chosen at step:  [ 60.62504 110.      110.     ]
Basis size (it should be the same of full dim) =   235
With this action the energy is:  -0.020515
With this action the full dim is:  235  and princip dim is:  54
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.9660656979297819
#### CALL STEP #### 236
Action chosen at step:  [ 54.171494 110.       110.      ]
Basis size (it should be the same of full dim) =   2

With this action the energy is:  -0.022523
With this action the full dim is:  258  and princip dim is:  57
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.1057009408428637
#### CALL STEP #### 259
Action chosen at step:  [ 42.73653   39.555084 110.      ]
Basis size (it should be the same of full dim) =   259
With this action the energy is:  -0.0225161
With this action the full dim is:  259  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.1052211185440104
#### CALL STEP #### 260
Action chosen at step:  [ 68.14372  81.29737 110.     ]
Basis size (it should be the same of full dim) =   260
With this action the energy is:  -0.0225179
With this action the full dim is:  260  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.1053462895784936
#### CALL STEP #### 261
Action chosen at step:  [ 52.403515 110.       110.      ]
Basis size (it should be the same of full dim) =

With this action the energy is:  -0.0225277
With this action the full dim is:  283  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.1060277763217954
#### CALL STEP #### 284
Action chosen at step:  [110.       78.31606  99.06671]
Basis size (it should be the same of full dim) =   284
With this action the energy is:  -0.0225291
With this action the full dim is:  284  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.1061251315708365
#### CALL STEP #### 285
Action chosen at step:  [110.        80.31948   96.986435]
Basis size (it should be the same of full dim) =   285
With this action the energy is:  -0.0225301
With this action the full dim is:  285  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.106194671034439
#### CALL STEP #### 286
Action chosen at step:  [110.        56.783073 109.885025]
Basis size (it should be the same of full dim) =

With this action the energy is:  0.00523224
With this action the full dim is:  8  and princip dim is:  8
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8243835609055754
#### CALL STEP #### 9
Action chosen at step:  [107.36218  110.       103.109085]
Basis size (it should be the same of full dim) =   9
With this action the energy is:  0.00479762
With this action the full dim is:  9  and princip dim is:  9
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7941603192348179
#### CALL STEP #### 10
Action chosen at step:  [110.      110.       90.89523]
Basis size (it should be the same of full dim) =   10
With this action the energy is:  0.00479254
With this action the full dim is:  10  and princip dim is:  10
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7938070587597199
#### CALL STEP #### 11
Action chosen at step:  [110.       110.        98.256004]
Basis size (it should be the same of full dim) =   11
With

With this action the energy is:  -5.36564e-17
With this action the full dim is:  33  and princip dim is:  17
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679513
#### CALL STEP #### 34
Action chosen at step:  [106.2739    99.57625   54.497738]
Basis size (it should be the same of full dim) =   34
With this action the energy is:  3.30521e-16
With this action the full dim is:  34  and princip dim is:  18
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786797973
#### CALL STEP #### 35
Action chosen at step:  [106.46574 107.58234  91.12456]
Basis size (it should be the same of full dim) =   35
With this action the energy is:  -1.11465e-18
With this action the full dim is:  35  and princip dim is:  18
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 36
Action chosen at step:  [103.276596  90.996704  99.03153 ]
Basis size (it should be the same of full dim

With this action the energy is:  -1.51863e-16
With this action the full dim is:  58  and princip dim is:  25
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867946
#### CALL STEP #### 59
Action chosen at step:  [110.        97.627686 103.76528 ]
Basis size (it should be the same of full dim) =   59
With this action the energy is:  7.643e-18
With this action the full dim is:  59  and princip dim is:  25
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 60
Action chosen at step:  [110.      101.22325 110.     ]
Basis size (it should be the same of full dim) =   60
With this action the energy is:  -2.7039e-16
With this action the full dim is:  60  and princip dim is:  25
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679353
#### CALL STEP #### 61
Action chosen at step:  [110.        69.828026  93.091125]
Basis size (it should be the same of full dim) =  

With this action the energy is:  -4.96496e-17
With this action the full dim is:  83  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679513
#### CALL STEP #### 84
Action chosen at step:  [110.       102.44893   89.265564]
Basis size (it should be the same of full dim) =   84
With this action the energy is:  -7.41857e-16
With this action the full dim is:  84  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786790335
#### CALL STEP #### 85
Action chosen at step:  [110.     110.     109.8745]
Basis size (it should be the same of full dim) =   85
With this action the energy is:  -2.00867e-16
With this action the full dim is:  85  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679424
#### CALL STEP #### 86
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   86
With this ac

With this action the energy is:  -8.28279e-16
With this action the full dim is:  108  and princip dim is:  32
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867898
#### CALL STEP #### 109
Action chosen at step:  [ 83.604164 110.       110.      ]
Basis size (it should be the same of full dim) =   109
With this action the energy is:  2.63838e-17
With this action the full dim is:  109  and princip dim is:  32
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 110
Action chosen at step:  [ 82.21266 110.      110.     ]
Basis size (it should be the same of full dim) =   110
With this action the energy is:  -1.44539e-14
With this action the full dim is:  110  and princip dim is:  32
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978669512
#### CALL STEP #### 111
Action chosen at step:  [107.89726  110.       107.425545]
Basis size (it should be the same of fu

With this action the energy is:  -1.55316e-16
With this action the full dim is:  133  and princip dim is:  36
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867946
#### CALL STEP #### 134
Action chosen at step:  [110.      110.      101.08382]
Basis size (it should be the same of full dim) =   134
With this action the energy is:  -4.93265e-15
With this action the full dim is:  134  and princip dim is:  36
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867612
#### CALL STEP #### 135
Action chosen at step:  [110.      110.       75.25998]
Basis size (it should be the same of full dim) =   135
With this action the energy is:  -5.03913e-16
With this action the full dim is:  135  and princip dim is:  36
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679211
#### CALL STEP #### 136
Action chosen at step:  [110.       110.        76.586075]
Basis size (it should be the same of full d

With this action the energy is:  -9.9481e-17
With this action the full dim is:  158  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794776
#### CALL STEP #### 159
Action chosen at step:  [110.        32.597206 104.18916 ]
Basis size (it should be the same of full dim) =   159
With this action the energy is:  -4.72167e-16
With this action the full dim is:  159  and princip dim is:  38
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679229
#### CALL STEP #### 160
Action chosen at step:  [110.       76.25534  94.53389]
Basis size (it should be the same of full dim) =   160
With this action the energy is:  -1.66421e-15
With this action the full dim is:  160  and princip dim is:  38
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978678394
#### CALL STEP #### 161
Action chosen at step:  [110.        49.340397  65.897316]
Basis size (it should be the same of f

With this action the energy is:  -9.04727e-17
With this action the full dim is:  183  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794953
#### CALL STEP #### 184
Action chosen at step:  [110.      110.       87.83418]
Basis size (it should be the same of full dim) =   184
With this action the energy is:  -3.77013e-17
With this action the full dim is:  184  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 185
Action chosen at step:  [106.165215 100.37404  104.123726]
Basis size (it should be the same of full dim) =   185
With this action the energy is:  -9.3328e-17
With this action the full dim is:  185  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794953
#### CALL STEP #### 186
Action chosen at step:  [110.       110.       100.178604]
Basis size (it should be the same of

With this action the energy is:  -4.76059e-17
With this action the full dim is:  208  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679513
#### CALL STEP #### 209
Action chosen at step:  [ 70.476776  90.83443  110.      ]
Basis size (it should be the same of full dim) =   209
With this action the energy is:  -2.46796e-18
With this action the full dim is:  209  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 210
Action chosen at step:  [ 66.3965   91.30942 110.     ]
Basis size (it should be the same of full dim) =   210
With this action the energy is:  -3.819e-17
With this action the full dim is:  210  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 211
Action chosen at step:  [ 47.380413  79.3394   110.      ]
Basis size (it should be the same of f

With this action the energy is:  -7.65858e-19
With this action the full dim is:  233  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 234
Action chosen at step:  [110.       87.45859 110.     ]
Basis size (it should be the same of full dim) =   234
With this action the energy is:  -2.84616e-17
With this action the full dim is:  234  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795486
#### CALL STEP #### 235
Action chosen at step:  [104.06241  109.511765 110.      ]
Basis size (it should be the same of full dim) =   235
With this action the energy is:  -6.64582e-16
With this action the full dim is:  235  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786791045
#### CALL STEP #### 236
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   23

With this action the energy is:  -1.3907e-16
With this action the full dim is:  258  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.460536397867946
#### CALL STEP #### 259
Action chosen at step:  [110.        98.27054   96.187454]
Basis size (it should be the same of full dim) =   259
With this action the energy is:  -1.04886e-16
With this action the full dim is:  259  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786794776
#### CALL STEP #### 260
Action chosen at step:  [110.        91.503494 101.73726 ]
Basis size (it should be the same of full dim) =   260
With this action the energy is:  -7.45005e-18
With this action the full dim is:  260  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 261
Action chosen at step:  [110.       94.40308 110.     ]
Basis size (it should be the same of f

With this action the energy is:  -1.79777e-15
With this action the full dim is:  283  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978678305
#### CALL STEP #### 284
Action chosen at step:  [110.       89.84306 110.     ]
Basis size (it should be the same of full dim) =   284
With this action the energy is:  -2.87232e-16
With this action the full dim is:  284  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679353
#### CALL STEP #### 285
Action chosen at step:  [110.        66.760086  96.78554 ]
Basis size (it should be the same of full dim) =   285
With this action the energy is:  -2.62945e-16
With this action the full dim is:  285  and princip dim is:  42
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 286
Action chosen at step:  [110.       61.50699  99.61798]
Basis size (it should be the same of full

With this action the energy is:  1.34495e-31
With this action the full dim is:  8  and princip dim is:  8
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 9
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   9
With this action the energy is:  -2.5562e-31
With this action the full dim is:  9  and princip dim is:  9
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 10
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   10
With this action the energy is:  -8.14665e-31
With this action the full dim is:  10  and princip dim is:  10
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 11
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   11
With this action the energy is:  -2.22624e-29
Wi

With this action the energy is:  8.5057e-17
With this action the full dim is:  33  and princip dim is:  20
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786796197
#### CALL STEP #### 34
Action chosen at step:  [ 71.80201  87.45505 110.     ]
Basis size (it should be the same of full dim) =   34
With this action the energy is:  0.00414581
With this action the full dim is:  34  and princip dim is:  20
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.7488338014643112
#### CALL STEP #### 35
Action chosen at step:  [ 77.6255  99.9048 110.    ]
Basis size (it should be the same of full dim) =   35
With this action the energy is:  -2.39783e-18
With this action the full dim is:  35  and princip dim is:  21
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786795664
#### CALL STEP #### 36
Action chosen at step:  [110.        83.670235 110.      ]
Basis size (it should be the same of full dim) =   36


With this action the energy is:  -1.58203e-14
With this action the full dim is:  58  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978668553
#### CALL STEP #### 59
Action chosen at step:  [ 66.234245 107.902054 110.      ]
Basis size (it should be the same of full dim) =   59
With this action the energy is:  -4.09939e-16
With this action the full dim is:  59  and princip dim is:  29
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786792644
#### CALL STEP #### 60
Action chosen at step:  [ 91.930244  69.526596 110.      ]
Basis size (it should be the same of full dim) =   60
With this action the energy is:  -1.14278e-14
With this action the full dim is:  60  and princip dim is:  30
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786716083
#### CALL STEP #### 61
Action chosen at step:  [104.92421   59.799736 110.      ]
Basis size (it should be the same of full

With this action the energy is:  -2.4379e-16
With this action the full dim is:  83  and princip dim is:  35
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 84
Action chosen at step:  [ 92.79631 110.      110.     ]
Basis size (it should be the same of full dim) =   84
With this action the energy is:  -2.82437e-15
With this action the full dim is:  84  and princip dim is:  34
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786775946
#### CALL STEP #### 85
Action chosen at step:  [ 99.13406 110.      100.70598]
Basis size (it should be the same of full dim) =   85
With this action the energy is:  -1.82958e-15
With this action the full dim is:  85  and princip dim is:  35
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786782696
#### CALL STEP #### 86
Action chosen at step:  [108.753296 110.       110.      ]
Basis size (it should be the same of full dim) =

With this action the energy is:  -0.00285055
With this action the full dim is:  108  and princip dim is:  39
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.26231067989689727
#### CALL STEP #### 109
Action chosen at step:  [ 88.48648   20.908173 106.58776 ]
Basis size (it should be the same of full dim) =   109
With this action the energy is:  -0.00432621
With this action the full dim is:  109  and princip dim is:  40
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.15969407503777688
#### CALL STEP #### 110
Action chosen at step:  [ 82.053345  29.296165 110.      ]
Basis size (it should be the same of full dim) =   110
With this action the energy is:  -0.00528394
With this action the full dim is:  110  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.09309404456210757
#### CALL STEP #### 111
Action chosen at step:  [ 61.570564  82.797676 110.      ]
Basis size (it should be the same o

With this action the energy is:  -0.00786773
With this action the full dim is:  133  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08658132609844493
#### CALL STEP #### 134
Action chosen at step:  [ 94.11748 110.       98.3056 ]
Basis size (it should be the same of full dim) =   134
With this action the energy is:  -0.00786065
With this action the full dim is:  134  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08608898669614362
#### CALL STEP #### 135
Action chosen at step:  [ 93.53035 110.       70.62735]
Basis size (it should be the same of full dim) =   135
With this action the energy is:  -0.00786078
With this action the full dim is:  135  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08609802682641288
#### CALL STEP #### 136
Action chosen at step:  [ 91.122955 110.        67.852554]
Basis size (it should be the same of full di

With this action the energy is:  -0.00789695
With this action the full dim is:  158  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08861326922490242
#### CALL STEP #### 159
Action chosen at step:  [ 75.2913  110.       97.50166]
Basis size (it should be the same of full dim) =   159
With this action the energy is:  -0.0078967
With this action the full dim is:  159  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08859588435900001
#### CALL STEP #### 160
Action chosen at step:  [ 80.516975 110.       110.      ]
Basis size (it should be the same of full dim) =   160
With this action the energy is:  -0.00789508
With this action the full dim is:  160  and princip dim is:  44
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.0884832304279648
#### CALL STEP #### 161
Action chosen at step:  [107.875565 110.        92.71565 ]
Basis size (it should be the same of full d

With this action the energy is:  -0.00784564
With this action the full dim is:  182  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08504519934747456
#### CALL STEP #### 184
Action chosen at step:  [ 68.71396  99.29904 110.     ]
Basis size (it should be the same of full dim) =   184
With this action the energy is:  -0.0078463
With this action the full dim is:  183  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08509109539345161
#### CALL STEP #### 185
Action chosen at step:  [ 72.53311   73.44926  106.692764]
Basis size (it should be the same of full dim) =   185
With this action the energy is:  -0.00780682
With this action the full dim is:  184  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.08234567737044074
#### CALL STEP #### 186
Action chosen at step:  [ 89.46424  86.22426 110.     ]
Basis size (it should be the same of full dim

With this action the energy is:  -0.0115048
With this action the full dim is:  207  and princip dim is:  50
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.3395012229818537
#### CALL STEP #### 209
Action chosen at step:  [110.       54.33926 110.     ]
Basis size (it should be the same of full dim) =   209
With this action the energy is:  -0.0115102
With this action the full dim is:  208  and princip dim is:  50
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.339876736085305
#### CALL STEP #### 210
Action chosen at step:  [110.        26.529245 110.      ]
Basis size (it should be the same of full dim) =   210
With this action the energy is:  -0.0116975
With this action the full dim is:  209  and princip dim is:  50
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.3529014776179835
#### CALL STEP #### 211
Action chosen at step:  [110.        52.788307 110.      ]
Basis size (it should be the same of full dim) =

With this action the energy is:  -0.0305084
With this action the full dim is:  228  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.661001373491331
#### CALL STEP #### 236
Action chosen at step:  [ 77.00191 110.      110.     ]
Basis size (it should be the same of full dim) =   230
With this action the energy is:  -0.0305083
With this action the full dim is:  229  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.66099441954497
#### CALL STEP #### 237
Action chosen at step:  [ 68.01427   93.321335 103.613655]
Basis size (it should be the same of full dim) =   231
With this action the energy is:  -0.0305084
With this action the full dim is:  230  and princip dim is:  58
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.661001373491331
#### CALL STEP #### 238
Action chosen at step:  [ 60.296585  94.059265 110.      ]
Basis size (it should be the same of full dim) =   

With this action the energy is:  -0.0305738
With this action the full dim is:  253  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6655492544109087
#### CALL STEP #### 261
Action chosen at step:  [110.        95.20175   83.943275]
Basis size (it should be the same of full dim) =   255
With this action the energy is:  -0.0305738
With this action the full dim is:  254  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6655492544109087
#### CALL STEP #### 262
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   256
With this action the energy is:  -0.0305738
With this action the full dim is:  255  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6655492544109087
#### CALL STEP #### 263
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   257
With this action the ener

With this action the energy is:  -0.0305325
With this action the full dim is:  278  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6626772745641407
#### CALL STEP #### 286
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   280
With this action the energy is:  -0.0305325
With this action the full dim is:  279  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6626772745641407
#### CALL STEP #### 287
Action chosen at step:  [100.709526 110.       110.      ]
Basis size (it should be the same of full dim) =   281
With this action the energy is:  -0.0305325
With this action the full dim is:  280  and princip dim is:  60
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  1.6626772745641407
#### CALL STEP #### 288
Action chosen at step:  [110.       92.35545 110.     ]
Basis size (it should be the same of full dim) =   282
With this 

With this action the energy is:  0.00547463
With this action the full dim is:  10  and princip dim is:  10
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8412392314880979
#### CALL STEP #### 11
Action chosen at step:  [ 84.86528  104.55374   92.467834]
Basis size (it should be the same of full dim) =   11
With this action the energy is:  0.00545241
With this action the full dim is:  11  and princip dim is:  11
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8396940646068565
#### CALL STEP #### 12
Action chosen at step:  [83.48657 96.28146 94.33641]
Basis size (it should be the same of full dim) =   12
With this action the energy is:  0.00538544
With this action the full dim is:  12  and princip dim is:  12
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.8350370067294204
#### CALL STEP #### 13
Action chosen at step:  [ 59.77626   86.80885  102.094315]
Basis size (it should be the same of full dim) =   13
W

With this action the energy is:  6.50066e-16
With this action the full dim is:  35  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.46053639786800105
#### CALL STEP #### 36
Action chosen at step:  [102.53102 110.       93.52977]
Basis size (it should be the same of full dim) =   36
With this action the energy is:  -2.61871e-16
With this action the full dim is:  36  and princip dim is:  26
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 37
Action chosen at step:  [110.      110.      107.30472]
Basis size (it should be the same of full dim) =   37
With this action the energy is:  -2.58309e-16
With this action the full dim is:  37  and princip dim is:  27
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.4605363978679389
#### CALL STEP #### 38
Action chosen at step:  [ 85.806366 110.        77.21524 ]
Basis size (it should be the same of full dim) = 

With this action the energy is:  -0.00147356
With this action the full dim is:  60  and princip dim is:  30
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.358065825882397
#### CALL STEP #### 61
Action chosen at step:  [100.01304 110.       25.01146]
Basis size (it should be the same of full dim) =   61
With this action the energy is:  -0.00277895
With this action the full dim is:  61  and princip dim is:  31
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.26728970549081055
#### CALL STEP #### 62
Action chosen at step:  [110.       110.        21.737816]
Basis size (it should be the same of full dim) =   62
With this action the energy is:  -0.00511905
With this action the full dim is:  62  and princip dim is:  32
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  -0.10456040671546063
#### CALL STEP #### 63
Action chosen at step:  [ 81.292755 110.        44.42757 ]
Basis size (it should be the same of full dim) =

With this action the energy is:  -0.00665864
With this action the full dim is:  85  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.002501856051745577
#### CALL STEP #### 86
Action chosen at step:  [110.      110.      103.55203]
Basis size (it should be the same of full dim) =   86
With this action the energy is:  -0.00665704
With this action the full dim is:  86  and princip dim is:  37
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.002390592909980782
#### CALL STEP #### 87
Action chosen at step:  [110.      110.      109.62343]
Basis size (it should be the same of full dim) =   87
With this action the energy is:  -0.00665704
With this action the full dim is:  87  and princip dim is:  36
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.002390592909980782
#### CALL STEP #### 88
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   88
With this act

With this action the energy is:  -0.00680421
With this action the full dim is:  110  and princip dim is:  40
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.012624715768307482
#### CALL STEP #### 111
Action chosen at step:  [ 44.906494 110.        47.636154]
Basis size (it should be the same of full dim) =   111
With this action the energy is:  -0.00754573
With this action the full dim is:  111  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.06418961881855978
#### CALL STEP #### 112
Action chosen at step:  [ 48.726418 110.        57.339844]
Basis size (it should be the same of full dim) =   112
With this action the energy is:  -0.00830827
With this action the full dim is:  112  and princip dim is:  41
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.11721624139373077
#### CALL STEP #### 113
Action chosen at step:  [ 55.606777 110.        78.1662  ]
Basis size (it should be the same of 

With this action the energy is:  -0.00912635
With this action the full dim is:  135  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.17410508577736117
#### CALL STEP #### 136
Action chosen at step:  [110.        58.267403 110.      ]
Basis size (it should be the same of full dim) =   136
With this action the energy is:  -0.00912338
With this action the full dim is:  136  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.17389855357046535
#### CALL STEP #### 137
Action chosen at step:  [110.        54.439632 105.83801 ]
Basis size (it should be the same of full dim) =   137
With this action the energy is:  -0.00913299
With this action the full dim is:  137  and princip dim is:  43
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.17456682781568134
#### CALL STEP #### 138
Action chosen at step:  [110.       50.9036   97.80652]
Basis size (it should be the same of full

With this action the energy is:  -0.00912603
With this action the full dim is:  160  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.17408283314900963
#### CALL STEP #### 161
Action chosen at step:  [100.10354 110.      110.     ]
Basis size (it should be the same of full dim) =   161
With this action the energy is:  -0.0091246
With this action the full dim is:  161  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.17398339171605848
#### CALL STEP #### 162
Action chosen at step:  [104.65741 110.       91.40523]
Basis size (it should be the same of full dim) =   162
With this action the energy is:  -0.00912566
With this action the full dim is:  162  and princip dim is:  45
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.1740571035474776
#### CALL STEP #### 163
Action chosen at step:  [110.        78.47687   81.799255]
Basis size (it should be the same of full dim)

With this action the energy is:  -0.00895202
With this action the full dim is:  185  and princip dim is:  46
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.161982271087604
#### CALL STEP #### 186
Action chosen at step:  [110.      110.       94.29346]
Basis size (it should be the same of full dim) =   186
With this action the energy is:  -0.00895202
With this action the full dim is:  186  and princip dim is:  46
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.161982271087604
#### CALL STEP #### 187
Action chosen at step:  [110.      110.       64.54889]
Basis size (it should be the same of full dim) =   187
With this action the energy is:  -0.00895839
With this action the full dim is:  187  and princip dim is:  46
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.16242523747074777
#### CALL STEP #### 188
Action chosen at step:  [110. 110. 110.]
Basis size (it should be the same of full dim) =   188
With this a

With this action the energy is:  -0.00901366
With this action the full dim is:  210  and princip dim is:  47
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.1662686836240379
#### CALL STEP #### 211
Action chosen at step:  [ 94.585236  84.10828  110.      ]
Basis size (it should be the same of full dim) =   211
With this action the energy is:  -0.00901422
With this action the full dim is:  211  and princip dim is:  47
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.16630762572365398
#### CALL STEP #### 212
Action chosen at step:  [ 94.8479    91.426216 103.667786]
Basis size (it should be the same of full dim) =   212
With this action the energy is:  -0.00898946
With this action the full dim is:  212  and princip dim is:  47
#### THE ACTION IS A GOOD ONE #### --> Store the energy got!
Reward is  0.16458582860486715
#### CALL STEP #### 213
Action chosen at step:  [ 82.21376  84.65474 100.34808]
Basis size (it should be the same of full 

In [None]:
import matplotlib.pyplot as plt

scores = np.loadtxt('scores_RL.out')
energies = np.loadtxt('energies_RL.out')
dim = np.loadtxt('princip_dim_RL.out')

plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')

In [None]:
plt.plot(np.arange(len(energies)), energies)
plt.ylabel('Eenergies (mK)')
plt.xlabel('Episode #')

In [None]:
plt.plot(np.arange(len(dim)), dim)
plt.ylabel('dim (mK)')
plt.xlabel('Episode #')

In [None]:
rewards = []

for i in range(10):
    rew_i_episode = []
    for j in range(20):
        rew_i_episode.append(j)
    rewards.append(rew_i_episode)
    print('Episode {} ... Score: {:.3f}'.format(i, np.sum(rewards[i])))
    
name_rewards = 'rewards_RL.out'
file_rewards = open(name_rewards,'w')
np.savetxt(file_rewards, rewards, fmt="%f")
file_rewards.close()

## Random search as in original SVM

In [None]:
state = env.reset()
scores = []
step = 0
score = 0.0

while True:
    print(".....STEP.....", step)
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    step = step + 1
    score += reward
    scores.append(score)
    state = next_state
    if done:
        break