# Stochastic Variational Method with RL algorithms

In [1]:
import numpy as np
import gym
import svm_env as svm
import torch

## Expoloring environment

In [2]:
env = gym.make('svm_env:svmEnv-v0', file_sigmas ="./svmCodeSVD/sigmas.dat" )

obs_space = env.observation_space

print('###### Observation space ####### \n', obs_space)

state_size = env.observation_space.shape[-1]

print('###### Size of observation space ####### \n', state_size)

act_space = env.action_space

print('###### Action space ####### \n', act_space)

act_size = env.action_space.shape[-1]

print('###### Number of actions ####### \n', act_size)

state = env.reset()

print('##### State after reset ###### \n', state)

print('##### File where will be stored sigmas \n', env.file_sigmas)

act_space.sample()

###### Observation space ####### 
 Box(-inf, inf, (1,), float32)
###### Size of observation space ####### 
 1
###### Action space ####### 
 Box(-1.0, 1.0, (3,), float32)
###### Number of actions ####### 
 3
*****CALL RESET******
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
##### State after reset ###### 
 [0.]
##### File where will be stored sigmas 
 ./svmCodeSVD/sigmas.dat


array([ 0.6571757 ,  0.23591614, -0.36692822], dtype=float32)

## DDPG from `stable_baseline3`

In [None]:
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

# The noise objects for DDPG
action_noise = NormalActionNoise(mean=np.zeros(act_size), sigma=0.1 * np.ones(act_size))

model = DDPG("MlpPolicy", env, action_noise = action_noise, \
            batch_size=128, gamma=1.0, verbose=1)

# (policy, env, learning_rate=0.001, buffer_size=1000000,learning_starts=100, batch_size=100, 
# tau=0.005, gamma=0.99, train_freq=(1, 'episode'),  gradient_steps=- 1, action_noise=None, 
# replay_buffer_class=None, replay_buffer_kwargs=None,  optimize_memory_usage=False, 
# tensorboard_log=None, create_eval_env=False, policy_kwargs=None,  verbose=0, seed=None, 
# device='auto', _init_setup_model=True)

model.learn(total_timesteps=1000, log_interval=5)

# learn(total_timesteps, callback=None, log_interval=4, eval_env=None, eval_freq=- 1,
# n_eval_episodes=5, tb_log_name='DDPG', eval_log_path=None, reset_num_timesteps=True)

## From my `ddpg_agent.py` code

In [3]:
from ddpg_agent import Agent

In [4]:
env = gym.make('svm_env:svmEnv-v0')
# Instance of the ddpg agent
agent = Agent(1, 3, random_seed=2)

### Training loop
def run_ddpg(max_t_step = 200, n_episodes = 3):        
    """Deep Deterministic Policy Gradient learning for Reacher Unity Environment.
    
    Params Input
    ==========
        n_episode (int): maximum number of episodes
        queue (int): number of consecutive episodes 
        
    Params Output
    ==========
        scores_all (list of floats): are the scores collected at the end of each episode
        
    """
    
    ##Inizialization
    scores_all = []                         
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()                  
        score = 0.0
        scores = []
        last_energies = []
        
        ## Training loop of each episode
        for t_step in range(max_t_step):
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)                   
            agent.step(state, action, reward, next_state, done)
            score += reward
            state = next_state  
            if done:                                  
                break
        
        last_energies.append(state[0])
        scores.append(score)
        
        print('Episode {} ... Reward: {:.3f}'.format(i_episode, score))

    return scores

In [5]:
scores = run_ddpg()
torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

*****CALL RESET******
Action chosen at reset:  [0.]
Actions taken at reset:  []
Energies got at reset:  [0.0]
****CALL STEP****
Action chosen at step:  [50.08913  39.656174 59.902096]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [ 26.44507 100.86172   0.     ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  0.0
With this action the full dim is:  0  and princip dim is:  0
The new action:  [ 26.44507 100.86172   0.     ]  makes the energy positive:  True
The new action:  [ 26.44507 100.86172   0.     ]  makes the energy greater than:  0.0  the previous one:  True
The new action:  [ 26.44507 100.86172   0.     ]  makes the energy less than: -0.151 False
The new action:  [ 26.44507 100.86172   0.     ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [77.7017   44.600365 46.2004  ]
Triangular inequality not satisfied!
***

With this action the energy is:  -0.107714
With this action the full dim is:  6  and princip dim is:  6
The new action:  [42.484425    0.          0.79273224]  makes the energy positive:  False
The new action:  [42.484425    0.          0.79273224]  makes the energy greater than:  -0.107714  the previous one:  True
The new action:  [42.484425    0.          0.79273224]  makes the energy less than: -0.151 False
The new action:  [42.484425    0.          0.79273224]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [46.249588 34.590763 22.928848]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [108.35698  80.90715   0.     ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.107714
With this action the full dim is:  6  and princip dim is:  6
The new action:  [108.35698  80.90715   0.     ]  makes the energy positive:  False

With this action the energy is:  -0.116555
With this action the full dim is:  9  and princip dim is:  9
The new action:  [ 87.287025 110.         0.      ]  makes the energy positive:  False
The new action:  [ 87.287025 110.         0.      ]  makes the energy greater than:  -0.116555  the previous one:  True
The new action:  [ 87.287025 110.         0.      ]  makes the energy less than: -0.151 False
The new action:  [ 87.287025 110.         0.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [50.092056 61.511192 58.355858]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.        105.456924    3.7639885]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.116632
With this action the full dim is:  10  and princip dim is:  10
The new action:  [110.        105.456924    3.7639885]  makes the energy positive:  Fal

With this action the energy is:  -0.140175
With this action the full dim is:  12  and princip dim is:  12
The new action:  [13.880978 15.472881  0.      ]  makes the energy positive:  False
The new action:  [13.880978 15.472881  0.      ]  makes the energy greater than:  -0.140175  the previous one:  True
The new action:  [13.880978 15.472881  0.      ]  makes the energy less than: -0.151 False
The new action:  [13.880978 15.472881  0.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [17.86102  29.136763  0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.140175
With this action the full dim is:  12  and princip dim is:  12
The new action:  [17.86102  29.136763  0.      ]  makes the energy positive:  False
The new action:  [17.86102  29.136763  0.      ]  makes the energy greater than:  -0.140175  the previous one:  True
The new actio

With this action the energy is:  -0.142964
With this action the full dim is:  16  and princip dim is:  16
The new action:  [47.73337   1.483818 55.293877]  makes the energy positive:  False
The new action:  [47.73337   1.483818 55.293877]  makes the energy greater than:  -0.142931  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 16.00528
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [47.997963 20.055508 45.572163]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [97.11958  88.801056  0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.142964
With this action the full dim is:  16  and princip dim is:  16
The new action:  [97.11958  88.801056  0.      ]  makes the energy positive:  False
The new action:  [97.11958  88.801056  0.      ]  makes the energy greater than:  -0.142964  the pre

With this action the energy is:  -0.144317
With this action the full dim is:  21  and princip dim is:  21
The new action:  [68.08411   23.213064   5.6032677]  makes the energy positive:  False
The new action:  [68.08411   23.213064   5.6032677]  makes the energy greater than:  -0.144228  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 21.01869
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [77.76814   14.186256  15.2438545]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.146017
With this action the full dim is:  22  and princip dim is:  22
The new action:  [77.76814   14.186256  15.2438545]  makes the energy positive:  False
The new action:  [77.76814   14.186256  15.2438545]  makes the energy greater than:  -0.144317  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED

With this action the energy is:  -0.146355
With this action the full dim is:  31  and princip dim is:  31
The new action:  [110.       43.54634  16.08329]  makes the energy positive:  False
The new action:  [110.       43.54634  16.08329]  makes the energy greater than:  -0.146355  the previous one:  True
The new action:  [110.       43.54634  16.08329]  makes the energy less than: -0.151 False
The new action:  [110.       43.54634  16.08329]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [105.69978   64.627754  28.116459]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.146366
With this action the full dim is:  32  and princip dim is:  32
The new action:  [105.69978   64.627754  28.116459]  makes the energy positive:  False
The new action:  [105.69978   64.627754  28.116459]  makes the energy greater than:  -0.146355  the previous one:  False
Thi

With this action the energy is:  -0.146545
With this action the full dim is:  43  and princip dim is:  43
The new action:  [110.        11.107876  41.944862]  makes the energy positive:  False
The new action:  [110.        11.107876  41.944862]  makes the energy greater than:  -0.146519  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 43.011179999999996
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.        15.094627  28.817146]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.146788
With this action the full dim is:  44  and princip dim is:  44
The new action:  [110.        15.094627  28.817146]  makes the energy positive:  False
The new action:  [110.        15.094627  28.817146]  makes the energy greater than:  -0.146545  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy

With this action the energy is:  -0.147582
With this action the full dim is:  56  and princip dim is:  56
The new action:  [110.         47.21365     7.2379227]  makes the energy positive:  False
The new action:  [110.         47.21365     7.2379227]  makes the energy greater than:  -0.147581  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 56.00056000000001
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.         68.84654     6.9484024]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147593
With this action the full dim is:  57  and princip dim is:  57
The new action:  [110.         68.84654     6.9484024]  makes the energy positive:  False
The new action:  [110.         68.84654     6.9484024]  makes the energy greater than:  -0.147582  the previous one:  False
This action is NOT REMOVED from actions taken and sigm

With this action the energy is:  -0.147667
With this action the full dim is:  58  and princip dim is:  58
The new action:  [18.26786 39.1289   0.     ]  makes the energy positive:  False
The new action:  [18.26786 39.1289   0.     ]  makes the energy greater than:  -0.147667  the previous one:  True
The new action:  [18.26786 39.1289   0.     ]  makes the energy less than: -0.151 False
The new action:  [18.26786 39.1289   0.     ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [10.787155 22.54686  18.710278]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [60.355415  0.        0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147667
With this action the full dim is:  58  and princip dim is:  58
The new action:  [60.355415  0.        0.      ]  makes the energy positive:  False
The new action:  [60.355415  0.

With this action the energy is:  -0.149767
With this action the full dim is:  65  and princip dim is:  65
The new action:  [110.        60.86139   37.316647]  makes the energy positive:  False
The new action:  [110.        60.86139   37.316647]  makes the energy greater than:  -0.149766  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 65.00065000000001
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.        63.542446  46.81018 ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.        33.072487   0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.149767
With this action the full dim is:  65  and princip dim is:  65
The new action:  [110.        33.072487   0.      ]  makes the energy positive:  False
The new action:  [110.        33.072487   0.      ]  makes the energy greate

With this action the energy is:  -0.149893
With this action the full dim is:  76  and princip dim is:  76
The new action:  [110.        15.214252  83.95325 ]  makes the energy positive:  False
The new action:  [110.        15.214252  83.95325 ]  makes the energy greater than:  -0.149892  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 76.00076
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [110.       19.61464  88.89255]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.149894
With this action the full dim is:  77  and princip dim is:  77
The new action:  [110.       19.61464  88.89255]  makes the energy positive:  False
The new action:  [110.       19.61464  88.89255]  makes the energy greater than:  -0.149893  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store t

With this action the energy is:  -0.149894
With this action the full dim is:  78  and princip dim is:  78
The new action:  [85.150536  0.       64.3711  ]  makes the energy positive:  False
The new action:  [85.150536  0.       64.3711  ]  makes the energy greater than:  -0.149894  the previous one:  True
The new action:  [85.150536  0.       64.3711  ]  makes the energy less than: -0.151 False
The new action:  [85.150536  0.       64.3711  ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [89.48688  0.      89.00069]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.149894
With this action the full dim is:  78  and princip dim is:  78
The new action:  [89.48688  0.      89.00069]  makes the energy positive:  False
The new action:  [89.48688  0.      89.00069]  makes the energy greater than:  -0.149894  the previous one:  True
The new action:  [89.4

With this action the energy is:  -0.0311146
With this action the full dim is:  7  and princip dim is:  7
The new action:  [ 60.997967  22.751934 110.      ]  makes the energy positive:  False
The new action:  [ 60.997967  22.751934 110.      ]  makes the energy greater than:  -0.0311146  the previous one:  True
The new action:  [ 60.997967  22.751934 110.      ]  makes the energy less than: -0.151 False
The new action:  [ 60.997967  22.751934 110.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [42.07864  0.      92.70979]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.0311146
With this action the full dim is:  7  and princip dim is:  7
The new action:  [42.07864  0.      92.70979]  makes the energy positive:  False
The new action:  [42.07864  0.      92.70979]  makes the energy greater than:  -0.0311146  the previous one:  True
The new ac

With this action the energy is:  -0.0313824
With this action the full dim is:  8  and princip dim is:  8
The new action:  [110.        0.       98.60041]  makes the energy positive:  False
The new action:  [110.        0.       98.60041]  makes the energy greater than:  -0.0313824  the previous one:  True
The new action:  [110.        0.       98.60041]  makes the energy less than: -0.151 False
The new action:  [110.        0.       98.60041]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [110.   0. 110.]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.   0. 110.]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.   0. 110.]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.   0. 110.]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.   0.

With this action the energy is:  -0.0557923
With this action the full dim is:  13  and princip dim is:  13
The new action:  [110.       30.14754  76.6233 ]  makes the energy positive:  False
The new action:  [110.       30.14754  76.6233 ]  makes the energy greater than:  -0.0556924  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 13.012986999999999
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [87.326096 28.186419 85.27302 ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.        33.850708 105.6941  ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.        32.839977 110.      ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.        30.574377 110.      ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [110.     

With this action the energy is:  -0.0954447
With this action the full dim is:  23  and princip dim is:  23
The new action:  [ 48.371     35.083496 110.      ]  makes the energy positive:  False
The new action:  [ 48.371     35.083496 110.      ]  makes the energy greater than:  -0.0954149  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 23.006853999999997
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [ 59.62819   36.404587 110.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.0955805
With this action the full dim is:  24  and princip dim is:  24
The new action:  [ 59.62819   36.404587 110.      ]  makes the energy positive:  False
The new action:  [ 59.62819   36.404587 110.      ]  makes the energy greater than:  -0.0954447  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the en

With this action the energy is:  -0.0973087
With this action the full dim is:  34  and princip dim is:  34
The new action:  [46.081078  8.346741 67.99398 ]  makes the energy positive:  False
The new action:  [46.081078  8.346741 67.99398 ]  makes the energy greater than:  -0.0968169  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 34.167212
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [56.288364 23.403301 75.42358 ]
Triangular inequality not satisfied!
****CALL STEP****
Action chosen at step:  [28.005714 25.450596 71.11191 ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.098279
With this action the full dim is:  35  and princip dim is:  35
The new action:  [28.005714 25.450596 71.11191 ]  makes the energy positive:  False
The new action:  [28.005714 25.450596 71.11191 ]  makes the energy greater than:  -0.0973087  the

With this action the energy is:  -0.098279
With this action the full dim is:  35  and princip dim is:  35
The new action:  [ 31.747105   0.       110.      ]  makes the energy positive:  False
The new action:  [ 31.747105   0.       110.      ]  makes the energy greater than:  -0.098279  the previous one:  True
The new action:  [ 31.747105   0.       110.      ]  makes the energy less than: -0.151 False
The new action:  [ 31.747105   0.       110.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [  8.230896   0.       102.4402  ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.098279
With this action the full dim is:  35  and princip dim is:  35
The new action:  [  8.230896   0.       102.4402  ]  makes the energy positive:  False
The new action:  [  8.230896   0.       102.4402  ]  makes the energy greater than:  -0.098279  the previous one

With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [  7.979725   0.       110.      ]  makes the energy positive:  False
The new action:  [  7.979725   0.       110.      ]  makes the energy greater than:  -0.099157  the previous one:  True
The new action:  [  7.979725   0.       110.      ]  makes the energy less than: -0.151 False
The new action:  [  7.979725   0.       110.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 20.036507   0.       110.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [ 20.036507   0.       110.      ]  makes the energy positive:  False
The new action:  [ 20.036507   0.       110.      ]  makes the energy greater than:  -0.099157  the previous one

With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [  0.       41.69272 110.     ]  makes the energy positive:  False
The new action:  [  0.       41.69272 110.     ]  makes the energy greater than:  -0.099157  the previous one:  True
The new action:  [  0.       41.69272 110.     ]  makes the energy less than: -0.151 False
The new action:  [  0.       41.69272 110.     ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [  0.        33.632828 110.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [  0.        33.632828 110.      ]  makes the energy positive:  False
The new action:  [  0.        33.632828 110.      ]  makes the energy greater than:  -0.099157  the previous one:  True
The 

With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [  0.   0. 110.]  makes the energy positive:  False
The new action:  [  0.   0. 110.]  makes the energy greater than:  -0.099157  the previous one:  True
The new action:  [  0.   0. 110.]  makes the energy less than: -0.151 False
The new action:  [  0.   0. 110.]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 10.248638   0.       110.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.099157
With this action the full dim is:  36  and princip dim is:  36
The new action:  [ 10.248638   0.       110.      ]  makes the energy positive:  False
The new action:  [ 10.248638   0.       110.      ]  makes the energy greater than:  -0.099157  the previous one:  True
The new action:  [ 10.248638   0.       110.      ]  makes the e

With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.      0.     88.6579]  makes the energy positive:  False
The new action:  [ 0.      0.     88.6579]  makes the energy greater than:  -0.145399  the previous one:  True
The new action:  [ 0.      0.     88.6579]  makes the energy less than: -0.151 False
The new action:  [ 0.      0.     88.6579]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.        0.       90.526855]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.        0.       90.526855]  makes the energy positive:  False
The new action:  [ 0.        0.       90.526855]  makes the energy greater than:  -0.145399  the previous one:  True
The new action:  [ 0.        0.      

With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.       48.767292 49.28086 ]  makes the energy positive:  False
The new action:  [ 0.       48.767292 49.28086 ]  makes the energy greater than:  -0.145399  the previous one:  True
The new action:  [ 0.       48.767292 49.28086 ]  makes the energy less than: -0.151 False
The new action:  [ 0.       48.767292 49.28086 ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.       48.890045 50.88019 ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.       48.890045 50.88019 ]  makes the energy positive:  False
The new action:  [ 0.       48.890045 50.88019 ]  makes the energy greater than:  -0.145399  the previous one:  True
The new actio

With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.       0.      73.40751]  makes the energy positive:  False
The new action:  [ 0.       0.      73.40751]  makes the energy greater than:  -0.145399  the previous one:  True
The new action:  [ 0.       0.      73.40751]  makes the energy less than: -0.151 False
The new action:  [ 0.       0.      73.40751]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.       0.      84.60387]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.145399
With this action the full dim is:  37  and princip dim is:  37
The new action:  [ 0.       0.      84.60387]  makes the energy positive:  False
The new action:  [ 0.       0.      84.60387]  makes the energy greater than:  -0.145399  the previous one:  True
The new action:  [ 0.       0.    

With this action the energy is:  -0.140711
With this action the full dim is:  5  and princip dim is:  5
The new action:  [ 9.446281 15.634407 35.38981 ]  makes the energy positive:  False
The new action:  [ 9.446281 15.634407 35.38981 ]  makes the energy greater than:  -0.140581  the previous one:  False
This action is NOT REMOVED from actions taken and sigmas, the energy is STORED!
Store the energy got!
Reward is positive! 5.006499999999999
Calculate the diff between dim: 
Diff 2:  0
****CALL STEP****
Action chosen at step:  [23.544569  0.       18.878376]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.140711
With this action the full dim is:  5  and princip dim is:  5
The new action:  [23.544569  0.       18.878376]  makes the energy positive:  False
The new action:  [23.544569  0.       18.878376]  makes the energy greater than:  -0.140711  the previous one:  True
The new action:  [23.544569  0.       18.878376]  makes the energy less than: -0.151 Fals

With this action the energy is:  -0.140713
With this action the full dim is:  6  and princip dim is:  6
The new action:  [ 0.       13.562778 18.366444]  makes the energy positive:  False
The new action:  [ 0.       13.562778 18.366444]  makes the energy greater than:  -0.140713  the previous one:  True
The new action:  [ 0.       13.562778 18.366444]  makes the energy less than: -0.151 False
The new action:  [ 0.       13.562778 18.366444]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [10.363644 15.980728 30.706158]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.141976
With this action the full dim is:  7  and princip dim is:  7
The new action:  [10.363644 15.980728 30.706158]  makes the energy positive:  False
The new action:  [10.363644 15.980728 30.706158]  makes the energy greater than:  -0.140713  the previous one:  False
This action is N

With this action the energy is:  -0.142827
With this action the full dim is:  11  and princip dim is:  11
The new action:  [ 0.54823303  0.         33.012985  ]  makes the energy positive:  False
The new action:  [ 0.54823303  0.         33.012985  ]  makes the energy greater than:  -0.142827  the previous one:  True
The new action:  [ 0.54823303  0.         33.012985  ]  makes the energy less than: -0.151 False
The new action:  [ 0.54823303  0.         33.012985  ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.        0.       47.429234]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.142827
With this action the full dim is:  11  and princip dim is:  11
The new action:  [ 0.        0.       47.429234]  makes the energy positive:  False
The new action:  [ 0.        0.       47.429234]  makes the energy greater than:  -0.142827  the previous 

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [2.9263725 0.        0.       ]  makes the energy positive:  False
The new action:  [2.9263725 0.        0.       ]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [2.9263725 0.        0.       ]  makes the energy less than: -0.151 False
The new action:  [2.9263725 0.        0.       ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [23.607363  0.        0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [23.607363  0.        0.      ]  makes the energy positive:  False
The new action:  [23.607363  0.        0.      ]  makes the energy greater than:  -0.147554  the previous one:  True
The new actio

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 9.625286  0.       18.390743]  makes the energy positive:  False
The new action:  [ 9.625286  0.       18.390743]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 9.625286  0.       18.390743]  makes the energy less than: -0.151 False
The new action:  [ 9.625286  0.       18.390743]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.        0.       23.082048]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        0.       23.082048]  makes the energy positive:  False
The new action:  [ 0.        0.       23.082048]  makes the energy greater than:  -0.147554  the previous one:  True
The new actio

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [7.3278008 0.        0.       ]  makes the energy positive:  False
The new action:  [7.3278008 0.        0.       ]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [7.3278008 0.        0.       ]  makes the energy less than: -0.151 False
The new action:  [7.3278008 0.        0.       ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [14.773937  0.        0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [14.773937  0.        0.      ]  makes the energy positive:  False
The new action:  [14.773937  0.        0.      ]  makes the energy greater than:  -0.147554  the previous one:  True
The new actio

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.       13.503407 28.577547]  makes the energy positive:  False
The new action:  [ 0.       13.503407 28.577547]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.       13.503407 28.577547]  makes the energy less than: -0.151 False
The new action:  [ 0.       13.503407 28.577547]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.        34.981155   7.3853035]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        34.981155   7.3853035]  makes the energy positive:  False
The new action:  [ 0.        34.981155   7.3853035]  makes the energy greater than:  -0.147554  the previous one:  True
The 

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        0.       22.263435]  makes the energy positive:  False
The new action:  [ 0.        0.       22.263435]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.        0.       22.263435]  makes the energy less than: -0.151 False
The new action:  [ 0.        0.       22.263435]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.        0.       55.272766]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        0.       55.272766]  makes the energy positive:  False
The new action:  [ 0.        0.       55.272766]  makes the energy greater than:  -0.147554  the previous one:  True
The new actio

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        0.       19.734009]  makes the energy positive:  False
The new action:  [ 0.        0.       19.734009]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.        0.       19.734009]  makes the energy less than: -0.151 False
The new action:  [ 0.        0.       19.734009]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [6.863476 0.       0.      ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [6.863476 0.       0.      ]  makes the energy positive:  False
The new action:  [6.863476 0.       0.      ]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [6.86

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.        0.       24.111668]  makes the energy positive:  False
The new action:  [ 0.        0.       24.111668]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.        0.       24.111668]  makes the energy less than: -0.151 False
The new action:  [ 0.        0.       24.111668]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [0.      0.      8.91011]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [0.      0.      8.91011]  makes the energy positive:  False
The new action:  [0.      0.      8.91011]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [0.      0.   

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.       0.      65.86098]  makes the energy positive:  False
The new action:  [ 0.       0.      65.86098]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.       0.      65.86098]  makes the energy less than: -0.151 False
The new action:  [ 0.       0.      65.86098]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [ 0.       0.      72.57361]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [ 0.       0.      72.57361]  makes the energy positive:  False
The new action:  [ 0.       0.      72.57361]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [ 0.       0.    

With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [23.411383  0.        0.      ]  makes the energy positive:  False
The new action:  [23.411383  0.        0.      ]  makes the energy greater than:  -0.147554  the previous one:  True
The new action:  [23.411383  0.        0.      ]  makes the energy less than: -0.151 False
The new action:  [23.411383  0.        0.      ]  makes the energy nan:  False
The energy is greater than previous energy --> Set reward:  -1.0
Store the energy got!
****CALL STEP****
Action chosen at step:  [1.1912766 0.        0.       ]
**** TRIANGULAR INEQUALITY SATISFIED ****
With this action the energy is:  -0.147554
With this action the full dim is:  12  and princip dim is:  12
The new action:  [1.1912766 0.        0.       ]  makes the energy positive:  False
The new action:  [1.1912766 0.        0.       ]  makes the energy greater than:  -0.147554  the previous one:  True
The new actio

## Random search as in original SVM

In [None]:
state = env.reset()
scores = []
step = 0
score = 0.0

while True:
    print(".....STEP.....", step)
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    step = step + 1
    score += reward
    scores.append(score)
    state = next_state
    if done:
        break