In [1]:
import os
import numpy as np
import gym
from gym import wrappers
import pybullet_envs

In [10]:
class Hp():
    def __init__(self):
        self.nb_steps=2
        self.episode_length=1000
        self.learning_rate=0.02
        self.nb_directions=16
        self.nb_best_directions=4
        assert self.nb_best_directions<=self.nb_directions
        self.noise=0.03
        self.seed=1
        self.env_name='HalfCheetahBulletEnv-v0'

In [3]:
class Normalizer():
    
    def __init__(self,nb_inputs):
        self.n=np.zeros(nb_inputs)
        self.mean=np.zeros(nb_inputs)
        self.mean_diff = np.zeros(nb_inputs)
        self.var=np.zeros(nb_inputs)
        
    def observe(self,x):
        self.n+=1.
        last_mean=self.mean.copy()
        self.mean+=(x-self.mean)/self.n
        self.mean_diff+=(x-last_mean)*(x-self.mean)
        self.var=(self.mean_diff/self.n).clip(min=1e-2)
            
    def normalize(self,inputs):
        obs_mean=self.mean
        obs_std=np.sqrt(self.var)
        return (inputs-obs_mean)/obs_std

In [4]:
class Policy():
    def __init__(self,input_size,output_size):
        self.theta=np.zeros((output_size,input_size))
    
    def evaluate(self,hp,input,delta=None,direction=None):
        if direction is None:
            return self.theta.dot(input)
        elif direction=="positive":
            return(self.theta+hp.noise*delta).dot(input)
        else:
            return(self.theta-hp.noise*delta).dot(input)
    
    def sample_deltas(self,hp):
        return[np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
    
    def update(self,hp,rollouts,sigma_r):
        step=np.zeros(self.theta.shape)
        for r_pos,r_neg,d in rollouts:
            step+=(r_pos-r_neg)*d
            self.theta+=hp.learning_rate/(hp.nb_best_directions*sigma_r)*step
        return self.theta
            

In [5]:
def explore(hp,env,normalizer,policy,direction=None,delta=None):
    state=env.reset()
    done=False
    num_plays=0.
    sum_rewards=0.
    while not done and num_plays<hp.episode_length:
        #env.render()
        normalizer.observe(state)
        state=normalizer.normalize(state)
        action=policy.evaluate(hp,state,delta,direction)
        state,reward,done,_=env.step(action)
        reward=max(min(reward,1),-1)
        sum_rewards+=reward
        num_plays+=1
    return sum_rewards

In [6]:
def train(env,policy,normalizer,hp):
    for step in range(1):
        deltas=policy.sample_deltas(hp)
        positive_rewards=[0]*hp.nb_directions
        negative_rewards=[0]*hp.nb_directions

        for k in range(hp.nb_directions):
            positive_rewards[k]=explore(hp,env,normalizer,policy,direction="positive",delta=deltas[k])

        for k in range(hp.nb_directions):
            negative_rewards[k]=explore(hp,env,normalizer,policy,direction="negative",delta=deltas[k])

        all_rewards=np.array(positive_rewards+negative_rewards)
        sigma_r=all_rewards.std()

        scores={k:max(r_pos,r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards,negative_rewards))}
        order=sorted(scores.keys(),key=lambda x:scores[x], reverse=True)[:hp.nb_best_directions]
        rollouts=[(positive_rewards[k],negative_rewards[k],deltas[k]) for k in order]

        if step==0:
            weights=np.array(policy.update(hp,rollouts,sigma_r))
            reward_evaluation=np.array(explore(hp,env,normalizer,policy))
        else:
            weights=np.vstack((weights,policy.update(hp,rollouts,sigma_r)))
            reward_evaluation=np.vstack((reward_evaluation,explore(hp,env,normalizer,policy)))

        
        print('Reward:',reward_evaluation,'weights:',weights,sigma_r)
    return reward_evaluation,weights,sigma_r

In [7]:
class co_op():
    def __init__(self,hp_seed,env,weights=None,update=False):
        #self.test=hp_seed
        self.hp=Hp()
        self.hp.seed=hp_seed
        #self.hp.seed=int(time.time())
        np.random.seed(self.hp.seed)
        #self.env=gym.make(self.hp.env_name)
        #self.work_dir=mkdir('exp','brs{}'.format(hp_seed))
        #self.monitor_dir=mkdir(self.work_dir,'monitor')
        #self.env=wrappers.Monitor(self.env,self.monitor_dir,force=True)
        self.nb_inputs=env.observation_space.shape[0]
        self.nb_outputs=env.action_space.shape[0]
        self.policy=Policy(self.nb_inputs,self.nb_outputs)
        if update:
            self.policy.theta=weights
        self.normalizer=Normalizer(self.nb_inputs)
    #return env,policy,normalizer
    #print(hp.nb_directions)
        self.reward_evaluation,self.weights,self.sigma_r=train(env,self.policy,self.normalizer,self.hp)
    #return reward_evaluation,weights

In [8]:
import os
def mkdir(base,name):
    path=os.path.join(base,name)
    if not os.path.exists(path):
        os.makedirs(path)
    return path
#work_dir=mkdir('exp','brs')
#monitor_dir=mkdir(work_dir,'monitor')
#work_dir1=mkdir('exp','brs1')
#monitor_dir1=mkdir(work_dir1,'monitor')

In [11]:
import time

hp=Hp()
np.random.seed(hp.seed)
env1=gym.make(hp.env_name)
work_dir1=mkdir('exp','brs1')
monitor_dir1=mkdir(work_dir1,'monitor')
hp.seed=2
np.random.seed(hp.seed)
env2=gym.make(hp.env_name)
work_dir2=mkdir('exp','brs2')
monitor_dir2=mkdir(work_dir2,'monitor')
Weights_1=np.zeros([env1.action_space.shape[0],env1.observation_space.shape[0]])
Weights_2=np.zeros([env2.action_space.shape[0],env2.observation_space.shape[0]])


for it in range(10):
    Weighting_1=np.zeros([3])
    Weighting_2=np.zeros([3])
    Weighting_1d=np.zeros([3])
    Weighting_2d=np.zeros([3])
    local_1=np.zeros([env1.action_space.shape[0],env1.observation_space.shape[0]])
    local_2=np.zeros([env2.action_space.shape[0],env2.observation_space.shape[0]])
    local_1d=np.zeros([env1.action_space.shape[0],env1.observation_space.shape[0]])
    local_2d=np.zeros([env2.action_space.shape[0],env2.observation_space.shape[0]])
    #env1=wrappers.Monitor(env1,monitor_dir1,force=False)
    #env2=wrappers.Monitor(env2,monitor_dir2,force=False)
    for j in range(10):
        print(it,j)
        if j==0 and it==0:
            co_op_1=co_op(1,env1)
            co_op_2=co_op(2,env2)
            creward_1=0
            creward_2=0
        elif j!=0:
            co_op_1=co_op(1,env1,weights_1c,True)
            co_op_2=co_op(2,env2,weights_2c,True)
        else:
            co_op_1=co_op(1,env1,weights_1,True)
            co_op_2=co_op(2,env2,weights_2,True)
        weights_1c=(0.1*co_op_2.weights)+(0.9*co_op_1.weights)
        weights_2c=(0.1*co_op_1.weights)+(0.9*co_op_2.weights)
        if co_op_1.sigma_r<0.01:
            co_op_1.sigma_r=0.01
        if co_op_2.sigma_r<0.01:
            co_op_2.sigma_r=0.01
        creward_1+=(np.exp(co_op_1.reward_evaluation/250)*np.power(1.2,j))/(co_op_1.sigma_r)
        creward_2+=(np.exp(co_op_2.reward_evaluation/250)*np.power(1.2,j))/(co_op_2.sigma_r)
        Weighting_1=np.vstack((Weighting_1,[np.exp((co_op_1.reward_evaluation)/250),co_op_1.sigma_r,np.power(1.2,j)]))
        Weighting_2=np.vstack((Weighting_2,[np.exp((co_op_2.reward_evaluation)/250),co_op_2.sigma_r,np.power(1.2,j)]))
        local_1=np.vstack((local_1,weights_1c))
        local_2=np.vstack((local_2,weights_2c))
    #env1=wrappers.Monitor(env1,monitor_dir1,force=False)
    #env2=wrappers.Monitor(env2,monitor_dir2,force=False)
    for j in range(10):
        print(it,j)
        if j==0 and it==0:
            co_op_1d=co_op(1,env1)
            co_op_2d=co_op(2,env2)
            creward_1d=0
            creward_2d=0
        elif j!=0:
            co_op_1d=co_op(1,env1,weights_1d,True)
            co_op_2d=co_op(2,env2,weights_2d,True)
        else:
            co_op_1d=co_op(1,env1,weights_1,True)
            co_op_2d=co_op(2,env2,weights_2,True)
        weights_1d=(1.0*co_op_1d.weights)
        weights_2d=(1.0*co_op_2d.weights)
        if co_op_1d.sigma_r<0.01:
            co_op_1d.sigma_r=0.01
        if co_op_2d.sigma_r<0.01:
            co_op_2d.sigma_r=0.01
        creward_1d+=(np.exp(co_op_1d.reward_evaluation/250)*np.power(1.2,j))/(co_op_1d.sigma_r)
        creward_2d+=(np.exp(co_op_2d.reward_evaluation/250)*np.power(1.2,j))/(co_op_2d.sigma_r)
        Weighting_1d=np.vstack((Weighting_1d,[np.exp((co_op_1d.reward_evaluation)/250),co_op_1d.sigma_r,np.power(1.2,j)]))
        Weighting_2d=np.vstack((Weighting_2d,[np.exp((co_op_2d.reward_evaluation)/250),co_op_2d.sigma_r,np.power(1.2,j)]))
        local_1d=np.vstack((local_1d,weights_1d))
        local_2d=np.vstack((local_2d,weights_2d))
    std_1=Weighting_1[1:,0].std()
    std_2=Weighting_2[1:,0].std()
    std_1d=Weighting_1d[1:,0].std()
    std_2d=Weighting_2d[1:,0].std()
    if std_1<0.01:
        std_1=0.01
    if std_2<0.01:
        std_2=0.01
    if std_1d<0.01:
        std_1d=0.01
    if std_2d<0.01:
        std_2d=0.01
    creward_1=creward_1/std_1
    creward_2=creward_2/std_2
    creward_1d=creward_1d/std_1d
    creward_2d=creward_2d/std_2d
    if creward_1+creward_2>creward_1d+creward_2d:
        avg_weighting_1=(Weighting_1[1:,0]*Weighting_1[1:,2])/Weighting_1[1:,1]
        avg_weighting_2=(Weighting_2[1:,0]*Weighting_2[1:,2])/Weighting_2[1:,1]
        local_1=local_1[env1.action_space.shape[0]:,:]
        local_2=local_2[env2.action_space.shape[0]:,:]
        local_1l=local_1.reshape([-1,10])
        local_2l=local_2.reshape([-1,10])
        #local_1=local_1.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        #local_2=local_2.reshape([env2.action_space.shape[0],env2.observation_space.shape[0]])
        weights_1=np.average(local_1l,axis=1,weights=avg_weighting_1)
        weights_2=np.average(local_2l,axis=1,weights=avg_weighting_2)
        weights_1=weights_1.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        weights_2=weights_2.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        
        c=1

    else:
        avg_weighting_1d=(Weighting_1d[1:,0]*Weighting_1d[1:,2])/Weighting_1d[1:,1]
        avg_weighting_2d=(Weighting_2d[1:,0]*Weighting_2d[1:,2])/Weighting_2d[1:,1]
        local_1d=local_1d[env1.action_space.shape[0]:,:]
        local_2d=local_2d[env2.action_space.shape[0]:,:]
        local_1dl=local_1d.reshape([-1,10])
        local_2dl=local_2d.reshape([-1,10])
        #local_1d=local_1d.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        #local_2d=local_2d.reshape([env2.action_space.shape[0],env2.observation_space.shape[0]])
        weights_1=np.average(local_1dl,axis=1,weights=avg_weighting_1d)
        weights_2=np.average(local_2dl,axis=1,weights=avg_weighting_2d)
        weights_1=weights_1.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        weights_2=weights_2.reshape([env1.action_space.shape[0],env1.observation_space.shape[0]])
        c=0
    Weights_1=np.vstack((Weights_1,weights_1))
    Weights_2=np.vstack((Weights_2,weights_2))
    print('Co_operation: ',c,'creward_1',creward_1,'creward_2',creward_2,'creward_1d',creward_1d,'creward_2d',creward_2d)
    creward_1=0
    creward_2=0
    creward_1d=0
    creward_2d=0





[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
WalkerBase::__init__ start
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
WalkerBase::__init__ start
0 0
Reward: -569.4133560693544 weights: [[-0.08636579 -0.08009395 -0.06327591  0.04626613 -0.0847776   0.01136787
  -0.09953647 -0.1454941  -0.00669371  0.04380159  0.07235349 -0.00338654
  -0.10144298  0.08635581  0.077788   -0.00495757 -0.0115437  -0.09929605
   0.09012688 -0.005657   -0.08981253 -0.01437197 -0.03125253 -0.10081363
   0.07105217 -0.04769717]
 [ 0.03028551  0.03960193  0.11063752 -0.04623872  0.10270012  0.02057326
  -0.028576    0.06932047  0.00169121  0.01765271  0.05076964  0.1768241
  -0

Reward: -989.4575919360791 weights: [[-0.0644311  -0.25342442 -0.09266261  0.07868055 -0.04557029  0.2046381
  -0.21046255 -0.33970255 -0.07710507  0.11137695  0.09203856 -0.07757127
  -0.08558563  0.00668403  0.08051693  0.10907112  0.15202239 -0.06336154
   0.28917551  0.13065578  0.02980744 -0.10835416 -0.18280068 -0.08068818
  -0.03817839 -0.01813334]
 [-0.25399397 -0.13233312  0.31182197 -0.22469867  0.04771828  0.20842542
  -0.17607418  0.08329979 -0.04118263 -0.14977028 -0.05660753  0.32779123
  -0.15367714 -0.1115786   0.13902041  0.19911184  0.1665403   0.02059794
  -0.09970283 -0.04794829  0.0812675  -0.11341813  0.00769297  0.14920364
   0.02648685 -0.11669354]
 [ 0.29994034  0.12119151 -0.01192067  0.06045414  0.13427961 -0.02455154
   0.00317526  0.04399099  0.12257392  0.19667812  0.00873775  0.07292759
   0.00914791  0.17930531 -0.08983873  0.06671143  0.28797793  0.09373123
  -0.07331036 -0.02628803 -0.02085652 -0.36069725 -0.08656218  0.35130373
   0.07772698 -0.349346

Reward: 156.4209053272795 weights: [[-0.09382463 -0.29872923 -0.08837837  0.14420321 -0.04673054  0.10413282
  -0.37279512 -0.4372674  -0.0311125   0.30131811  0.05475312  0.00323071
  -0.2192929   0.17695668  0.03869009  0.1028068   0.21192797 -0.02839705
   0.25013143  0.15266813 -0.11295635 -0.09064134 -0.3588661  -0.28370647
  -0.09295295 -0.07027869]
 [-0.17619733 -0.02006319  0.53390418 -0.11123821  0.11819525  0.28934193
  -0.06626944  0.17169834 -0.00966138 -0.24222211  0.03339832  0.32152242
  -0.31993008 -0.25445442  0.34901434  0.29469311  0.11587428  0.05723384
  -0.25281075  0.06170378 -0.09504551 -0.11315748  0.07728959  0.25808278
  -0.08876119 -0.18598867]
 [ 0.37301271  0.24101215 -0.04659774 -0.06321891  0.09949175 -0.1828157
  -0.18913055  0.23989277  0.39801407  0.09751596 -0.03333396  0.2132515
   0.10029644  0.28515391 -0.22157353  0.10086357  0.28273299  0.2031161
   0.02144178 -0.07724531  0.08009885 -0.44395613 -0.13518483  0.43635432
   0.16772544 -0.28308085]

Reward: 11.941950581015652 weights: [[-0.11965387 -0.50177942 -0.25040513  0.12481575 -0.03965905 -0.04982602
  -0.41313494 -0.63076512  0.04086469  0.46551017  0.01020779 -0.01368129
  -0.29292874  0.27505881  0.14103871  0.07491022  0.1960452  -0.25573576
   0.32695076  0.07773385 -0.1852054  -0.00480957 -0.48971181 -0.28199237
  -0.01592979 -0.11115009]
 [-0.25151872  0.0348947   0.68723829  0.05887954  0.17632226  0.30627728
  -0.06904743  0.13249213  0.04153038 -0.2691509   0.0169581   0.58662707
  -0.35719669 -0.27512405  0.49898653  0.3799986   0.04228121  0.11262096
  -0.4082028   0.04065249 -0.02907283  0.01746484  0.1475194   0.23739901
  -0.05893507 -0.25602439]
 [ 0.35864862  0.33977389 -0.16990328  0.0208729   0.12414483 -0.26999256
  -0.25008689  0.3693246   0.39519277  0.2868377  -0.06528195  0.21510652
   0.16318593  0.47617532 -0.17216412  0.21441377  0.4956182   0.17780178
   0.13617167 -0.19018215  0.19543958 -0.516207    0.03200419  0.56873969
   0.38747082 -0.27809

Reward: -932.9714937607341 weights: [[-1.46216318e-02 -4.09664917e-01 -1.24264408e-01  2.82788780e-01
  -6.50485333e-02 -1.33131505e-01 -3.03413734e-01 -4.49220277e-01
  -8.49547572e-02  4.95989334e-01 -1.01113826e-02  7.33833301e-02
  -3.14625884e-01  1.43598220e-01 -4.08835230e-02 -2.24237358e-02
   1.60943460e-01 -4.64517920e-02  2.37549826e-01  1.22413769e-01
  -2.67992539e-01  1.20321784e-02 -2.94844897e-01 -3.55807568e-01
   1.16652554e-01 -9.46042857e-02]
 [-2.35962406e-01  8.91623229e-02  4.25971176e-01  2.58481074e-02
   2.09547290e-01  1.76139132e-01  1.17387202e-01  2.65249766e-02
   7.61322206e-02 -1.42368775e-01  5.67062248e-02  4.91302975e-01
  -3.81200980e-01 -2.08176472e-01  2.55636234e-01  2.32921343e-01
   6.72536152e-02  1.86873677e-01 -4.07825689e-01 -3.24336876e-02
  -1.85681781e-01 -4.49681079e-02  1.64893046e-01  1.44673928e-01
   1.00790126e-01 -6.00380518e-04]
 [ 3.02639329e-01  3.08513671e-01 -2.80972449e-01  2.35934974e-01
   1.87845333e-01 -1.43928235e-01 -1

Reward: -913.7983459006512 weights: [[ 0.15103361 -0.0764819   0.34758241  0.09647068  0.06802733 -0.00627514
  -0.09229085 -0.0998934  -0.01800832 -0.1172833   0.05031242 -0.07871097
   0.06538992  0.11042019 -0.05968508  0.13787466 -0.01971651  0.1190363
   0.05489422  0.11477748 -0.11932891 -0.00807986  0.16705895 -0.07606225
  -0.20105909 -0.22379447]
 [-0.2768382  -0.25729937 -0.04347518  0.1272171  -0.02046359  0.02173668
  -0.01569117  0.00341076 -0.01742398 -0.00998799  0.05092623 -0.13277651
   0.13486182  0.10978998 -0.0512796   0.0869869  -0.08409072 -0.05488535
   0.0185116  -0.02464222 -0.05011892 -0.08586101 -0.05292668  0.10745673
  -0.22598894  0.04794094]
 [-0.01383943  0.07117269  0.07230266 -0.30385834  0.07668231  0.04003699
   0.01009677 -0.05023492  0.09214265 -0.06919998  0.2160754   0.05450148
   0.24467194 -0.0911376  -0.21270084  0.07987498  0.10312218  0.01152362
  -0.16854287 -0.04887016  0.05764141 -0.00370662 -0.00254299 -0.01043161
  -0.08841707 -0.074864

Reward: -795.3495196564847 weights: [[ 0.07082183 -0.1978506   0.40943924  0.19719     0.03566096 -0.11434845
  -0.08446465 -0.14700093  0.02517571  0.0488784   0.06856829 -0.1255489
   0.03927319  0.07186612 -0.21155347  0.14393936 -0.06055015  0.12297257
   0.11702393  0.07775217 -0.25741993 -0.03838415  0.22589928 -0.12412517
  -0.24388897 -0.37272874]
 [-0.23213765 -0.19371435 -0.03154118  0.15729407  0.02437269  0.00595841
   0.11128318  0.03751976  0.1166178  -0.05193079 -0.02099092 -0.13458232
   0.18511377  0.04431298 -0.04519552  0.10082096 -0.04986331  0.04965497
  -0.09839928  0.06402235 -0.16055157 -0.0328935  -0.06524562 -0.01565262
  -0.16059315  0.21423075]
 [-0.08634259  0.2390257   0.03641121 -0.33422124  0.09042505 -0.04040932
   0.02620288  0.01447158  0.05032065 -0.19089861  0.23124772  0.10448774
   0.37300323 -0.10754483 -0.12871214  0.1168661   0.13494563  0.27804893
  -0.24446222 -0.0302833   0.00818662 -0.14052977  0.16647346  0.10513696
  -0.03248939 -0.022774

Reward: 403.6353188656134 weights: [[ 0.06566831  0.05146521  0.10604311 -0.25334201 -0.10161233 -0.08076545
   0.07742847 -0.05297316 -0.28319516  0.66667016  0.16513736 -0.24796341
   0.04668842  0.4314294  -0.19172073  0.20109476 -0.04360663  0.23964502
   0.09704334 -0.38789398  0.14142167  0.01334373  0.09676948 -0.2068138
   0.17624595  0.09531231]
 [-0.17553062  0.14867418 -0.05472057  0.13358224 -0.05429728 -0.15262953
   0.50453015 -0.24390308  0.21558802  0.1840539   0.05574467  0.00825884
  -0.10548007  0.05543005 -0.08092224  0.01790148  0.14917616 -0.45501103
  -0.38672379  0.20746753 -0.40855059  0.33598767  0.01936166 -0.09796034
   0.00528416 -0.16461833]
 [ 0.17178366  0.17104554 -0.112042   -0.04705842 -0.15300116 -0.10185814
   0.22247489 -0.04409712  0.04969948 -0.40817143 -0.2774042   0.09600137
   0.36157398 -0.22260353  0.35323111  0.3525998  -0.0678537  -0.37621423
  -0.25043936 -0.10922822  0.20155124 -0.016851    0.08271602 -0.20736083
  -0.39191315  0.3040612

Reward: -989.2255250961073 weights: [[-3.19236956e-02 -1.05912160e-01  7.83210823e-02 -2.98833548e-01
  -8.60274329e-02 -5.64030113e-02  1.08190866e-01 -6.54030304e-02
  -1.42902577e-01  7.46176728e-01  1.50570131e-01 -1.14656567e-01
   4.12033834e-02  5.23170169e-01 -7.01797931e-02  4.81710455e-02
   6.06201138e-02  1.94494691e-01  1.17438805e-01 -3.62722509e-01
   3.08165610e-01 -3.33720246e-02  2.57984002e-01 -2.76598680e-01
   6.71705537e-02  2.62420025e-02]
 [-3.16651792e-01  5.49870288e-02 -9.70416423e-02  4.13122982e-03
  -6.04600264e-02 -9.62990446e-02  5.15334396e-01 -1.22494838e-01
   1.88561173e-01  2.49494041e-01  1.72745272e-01  2.14769106e-01
  -1.38669388e-02 -1.23250730e-02 -5.64172300e-02  1.57133287e-01
   1.62573256e-01 -5.26439464e-01 -4.89565335e-01  1.97378554e-01
  -2.46643252e-01  4.36573437e-01  4.50120403e-02 -5.18989149e-02
  -3.93616790e-02  7.49144477e-03]
 [ 1.39228852e-01  9.54613500e-02  7.48990422e-02 -1.34662521e-02
   8.01783481e-03  1.38413017e-01 -3

Reward: -720.306711442995 weights: [[ 0.06203701 -0.07663598  0.13709387 -0.36598741 -0.09904362 -0.03751836
   0.08061003 -0.01571065 -0.31879936  0.67477398  0.26091921 -0.11345022
   0.02904303  0.39817919 -0.09774579 -0.01617857  0.01899303  0.3415882
   0.25016072 -0.42877428  0.30785368 -0.1739406   0.33961896 -0.2721703
  -0.06692008  0.08226284]
 [-0.33979194  0.26894089 -0.06248477 -0.09864114 -0.10178972 -0.00785938
   0.47231985 -0.19671711  0.21013682  0.10363147  0.16757279  0.30505366
   0.01034924  0.12358183 -0.07911245 -0.01871334  0.23900527 -0.60158654
  -0.63444885  0.3556443  -0.35508362  0.39655487  0.05782192 -0.04671064
  -0.03569309 -0.00980996]
 [ 0.21201699  0.09500419  0.08151858 -0.00502013  0.04304875  0.16775464
  -0.07447243  0.09598794 -0.07910057 -0.35337171 -0.57627058 -0.06151949
   0.22026802 -0.23770125  0.6475094   0.4465509  -0.28518703 -0.40187051
  -0.23993886  0.11248168  0.18923597  0.0343752  -0.06478157 -0.07735383
  -0.43325796  0.10780506

Reward: 505.4299212486485 weights: [[ 0.02813782 -0.02831377 -0.14942023 -0.08795354  0.16595495 -0.31883343
   0.19640476  0.01463345  0.02624229  0.128143    0.21700834 -0.01973077
  -0.16111556 -0.09954406  0.03974088 -0.08343407 -0.03182161 -0.1351949
  -0.00870319  0.06143593  0.03951919  0.18555577 -0.1799544   0.04653434
   0.11427559 -0.07167602]
 [ 0.1010624  -0.06332129  0.05092351  0.08714616 -0.02515518 -0.05560877
   0.0879026  -0.23466502 -0.06100965 -0.11746115 -0.2300149   0.10627729
   0.09753712  0.08684365  0.02833981 -0.26889046 -0.12594332  0.21169684
   0.00323073 -0.08946209 -0.01913312  0.16699679  0.05518083  0.03823021
   0.18258799 -0.14041534]
 [-0.2123793   0.09527109 -0.05471766  0.25015359  0.18428066 -0.0111202
   0.11548329  0.37226672 -0.19344513  0.19839995 -0.13331549  0.04508045
  -0.24510343  0.17849384  0.20833216 -0.11970841  0.15100976 -0.08462744
  -0.04823856 -0.12071227  0.00473763  0.10948021  0.0547919   0.0057244
   0.08501597  0.23174408]

Reward: -428.8119924922875 weights: [[ 1.01638705e-01  4.64991672e-02 -1.67978875e-01 -2.06191744e-04
   1.13498576e-01 -3.94227884e-01  2.99516341e-01 -3.54854789e-02
   1.37200342e-01  1.66715869e-01  3.42409653e-01 -1.63670592e-01
  -2.20129440e-01  6.66255748e-02  1.96842816e-01 -1.30414841e-01
  -1.14488494e-01 -1.52689726e-01 -8.18834953e-02  1.80154357e-02
  -1.53441928e-01  2.51493888e-01  1.16565357e-01  1.38413993e-01
   1.77117556e-01 -1.77203318e-01]
 [ 1.19798171e-01 -1.72550245e-01  2.53592439e-02  1.33595580e-01
  -3.08042539e-02  5.82518680e-03 -3.13021538e-02 -1.97741904e-01
  -5.54194862e-02  8.55349805e-04 -2.14490210e-01  9.09625117e-02
   2.65399869e-01  1.86799663e-01 -5.16971139e-02 -2.92729800e-01
  -1.69253519e-01  3.26326210e-01  1.36891777e-02 -3.39094712e-02
  -4.47206043e-02  2.96149601e-01  1.44684016e-01  1.09208238e-01
   2.23077491e-01 -2.11132289e-01]
 [-2.62252796e-01  8.07031106e-02 -1.02789797e-01  3.60834157e-02
   1.26258307e-01  1.52053607e-02  8

Reward: -991.6005492677007 weights: [[ 1.18627022e-01 -8.69247102e-02 -2.12358295e-01 -2.16359123e-01
   3.67560659e-02 -5.01811002e-01  3.23962652e-01 -1.47801906e-01
   7.38587754e-02 -7.71472137e-03  3.86630313e-01 -3.23809629e-01
  -2.44721678e-01  1.29432326e-01  1.04407764e-01 -1.07934461e-01
  -9.78361561e-02 -4.18809489e-01 -6.74813397e-02  3.99710811e-02
  -2.12802146e-01  2.57626386e-01  9.94144624e-02  3.03221560e-01
   2.01623427e-01 -2.33792047e-01]
 [ 1.38204382e-01 -2.02776205e-01  1.84639318e-01  2.25726029e-01
  -1.19978680e-01 -3.90628930e-02 -1.52666139e-01 -2.23785852e-01
  -2.62524883e-01 -5.66883803e-02 -3.41639082e-01  2.01826812e-01
   4.07144462e-01  2.42903352e-01  1.45543064e-04 -3.08648513e-01
  -2.79985247e-01  4.99705682e-01 -1.34510757e-01 -9.33971757e-02
   1.81825059e-02  5.58717702e-01  1.52193141e-01  9.83556969e-02
   1.23328837e-01 -1.13165653e-01]
 [-4.24712431e-01  3.57138846e-02 -2.84055978e-01  2.68922170e-01
   1.56618639e-01 -9.76239683e-02 -4

Reward: -670.3372018607389 weights: [[-0.00620167 -0.12816522 -0.20140283 -0.23523801 -0.0969726  -0.40528616
   0.38884695 -0.07536471 -0.04550305  0.17691953  0.40394398 -0.27745774
  -0.32126942 -0.06821259 -0.16928145 -0.20083924 -0.23449925 -0.28142335
  -0.12104112  0.14176493 -0.13273696  0.15922852 -0.03790472  0.24000517
   0.05291793 -0.19942554]
 [ 0.18971575 -0.07075269  0.14264983  0.18866385 -0.14472532  0.01881336
   0.06691587 -0.23525947 -0.26759493 -0.11267581 -0.4244174   0.24872604
   0.22244694  0.16088851  0.11961153 -0.43845309 -0.28490563  0.43149881
  -0.09680359 -0.08928211 -0.08491479  0.53612801  0.08179811  0.04736902
   0.27370561 -0.16836314]
 [-0.56262221  0.23119893 -0.4147556   0.38421695  0.18512147 -0.1421189
   0.07778824  0.45284584 -0.3457155   0.26272569 -0.23931423  0.06092578
  -0.22266234  0.22880308  0.53274931  0.19244758  0.5124193  -0.53025073
  -0.17703466 -0.11438951  0.01222553  0.05375542  0.00536484 -0.30627365
   0.06218457  0.311275

Reward: -998.8785124425285 weights: [[-0.0433767  -0.0008896  -0.166114    0.05865495 -0.13806255 -0.24031331
   0.06639628  0.04611942 -0.17284887  0.0767149   0.09203494  0.0861766
  -0.23833884  0.12124189 -0.03403107  0.10407905 -0.02241641 -0.19030045
  -0.20232697  0.22026203 -0.19325626  0.24392578 -0.11033225  0.14879337
   0.04418849 -0.09926375]
 [ 0.10929096 -0.11732057  0.18117926  0.11065611  0.03638331  0.07415251
   0.03254835 -0.19845518 -0.02605662  0.05553441 -0.0204885  -0.03033681
   0.22327948  0.12129127 -0.11338516 -0.30831255 -0.12049713  0.32093723
   0.06837242 -0.03620156 -0.05209869  0.21585776  0.02404696  0.04333602
   0.15654654 -0.26098616]
 [-0.30821888  0.11846265 -0.27366668  0.24695283  0.28597839 -0.13714685
   0.12250287  0.11093778 -0.19203629  0.09110765 -0.04212285  0.03216718
  -0.18032557 -0.02400911  0.26899626  0.07607011  0.34327916 -0.27868539
  -0.07061816 -0.09898448 -0.0402533   0.14234077 -0.00505556 -0.11162776
   0.17553842  0.181015

Reward: 237.38503877896056 weights: [[ 0.20080031 -0.13436615 -0.15600513 -0.08869786  0.39307771 -0.49017113
   0.24634068 -0.1708462   0.06553393  0.19596303  0.2463912  -0.20271215
  -0.09073933  0.03836156  0.20901021  0.0465901   0.00527967 -0.19276192
   0.09461742  0.18151707 -0.09183768  0.29633857 -0.08343376  0.08946264
   0.02929867 -0.19887865]
 [-0.19991479 -0.23446502  0.1451173   0.2074642  -0.1325841   0.05582888
  -0.16657741 -0.30033947 -0.05325728 -0.1315465  -0.22834115  0.11189783
   0.16010412  0.16780944  0.06918671 -0.09216903 -0.03005049  0.33015796
  -0.04680554 -0.15088419  0.06824152  0.28456537  0.0726118   0.14059853
   0.04731555 -0.15469028]
 [-0.2083035   0.08794181  0.00582041  0.06406014  0.27535988  0.02627418
   0.07012675  0.33626826 -0.22466019  0.42706701  0.07577394  0.07220152
  -0.09580355  0.0068424   0.15147737  0.03393181  0.39275258 -0.02235541
  -0.13473819 -0.24740843  0.1865439   0.12560903  0.15159828 -0.0416639
   0.00806624  0.234599

Reward: -959.9430974839786 weights: [[ 0.26284278 -0.063823   -0.20173749 -0.09582131  0.5278861  -0.50813818
   0.24159421 -0.15847505  0.11139344  0.13291602  0.26133439 -0.19238117
  -0.10812702  0.19088762  0.3694554   0.06993787 -0.04152215 -0.20012902
   0.05117679  0.20522073 -0.08706673  0.38047067 -0.10143285  0.1570267
   0.00422888 -0.20222286]
 [-0.30604125 -0.35372632  0.22660058  0.33149517 -0.15874084  0.12659312
  -0.35814319 -0.32853591 -0.08150347 -0.08182435 -0.20138831  0.0433413
   0.14975546  0.24706406  0.11640266 -0.03347395 -0.03726247  0.30308244
  -0.00771694 -0.19827227  0.1560604   0.27500682  0.15755607  0.26678318
  -0.0561929  -0.30582896]
 [-0.15384408  0.03186717  0.08451836 -0.09995911  0.25715274  0.03395436
  -0.02959646  0.30769324 -0.24767152  0.50223127  0.10473633  0.08345562
  -0.15620747 -0.0942078   0.00695106  0.0873837   0.41026185 -0.16786563
  -0.03904989 -0.29823901  0.29122914  0.21389846  0.07918569 -0.14048482
  -0.02242015  0.1972170

Reward: -809.7437708626408 weights: [[ 0.24430115 -0.13524115 -0.26907702  0.02689258  0.65046993 -0.57285779
   0.09328707 -0.14605055  0.21200489  0.19906686  0.2322051  -0.15653358
  -0.17554576  0.2172923   0.38612825  0.04580485 -0.04481439 -0.20343106
   0.10666705  0.15273943 -0.05650165  0.4254413  -0.26601464  0.20650797
  -0.06872699 -0.28292521]
 [-0.34592809 -0.35916281  0.42315262  0.48838625 -0.11727307  0.17733626
  -0.40558685 -0.36526237  0.06584433 -0.17514136 -0.30663647 -0.00147335
   0.14241734  0.17132375  0.22775267  0.01042342  0.01078407  0.36610344
  -0.15301351 -0.18463976  0.16534675  0.22238096  0.24087673  0.25382039
   0.00286795 -0.36675757]
 [-0.00776922  0.17602831  0.11499549 -0.16267139  0.2898563  -0.0686115
  -0.10833635  0.44953859 -0.31548865  0.52815927  0.09742996  0.13257368
  -0.17740336 -0.07930939 -0.07466866  0.12138639  0.4361192  -0.0929614
  -0.00743027 -0.31267933  0.3006205   0.14631498  0.10870313 -0.00307368
   0.05854675  0.1454383

Reward: -753.7201624299366 weights: [[ 2.12134855e-01 -1.58813583e-01 -2.80810263e-01  7.63103137e-02
   5.70084642e-01 -3.41918809e-01  7.97416011e-02 -1.30062249e-01
   8.56288559e-02  3.17309219e-01  7.70591775e-02 -2.01974632e-01
  -1.21008018e-01  1.08867702e-01  1.90471763e-01  7.36068103e-02
  -1.55168812e-02 -1.09826596e-01  1.85094847e-01  2.28112682e-01
   1.07727992e-01  2.85212996e-01 -2.92867829e-01  2.77684432e-01
  -2.17776208e-01 -1.95219525e-01]
 [-4.35932134e-01 -2.03402651e-01  3.83045838e-01  3.66398945e-01
  -2.93982170e-01  2.19576667e-01 -4.30611499e-01 -3.43676320e-01
   8.99155041e-02 -1.97840696e-01 -2.78302338e-01  8.99380840e-02
  -6.88478845e-02  1.25336849e-01  3.20485782e-01  7.52477332e-02
   1.77018186e-01  2.64054092e-01 -1.55906350e-01 -2.04051447e-01
   2.17305065e-01  1.42895281e-01  1.27172786e-01  1.77738707e-01
   1.47776313e-01 -2.60498538e-01]
 [ 5.62779556e-02  2.02892957e-01  3.29391327e-02 -2.92777238e-02
   2.31671229e-01 -5.85987968e-02 -1

Reward: -420.40833050585906 weights: [[-0.08852636  0.06234158 -0.05516194 -0.05294769  0.03436632 -0.04551407
   0.06129677  0.14761241  0.23955067  0.119019    0.06177748  0.02860966
   0.16968701  0.08010941 -0.0418563   0.19770552 -0.0146581  -0.05593916
  -0.27674178 -0.17127509  0.02913446  0.20026867  0.23934114  0.0743043
   0.03669325 -0.10635822]
 [ 0.06436997 -0.04716773  0.23656953  0.19745914  0.22930764  0.09335327
   0.13856187 -0.02060019 -0.08566153  0.06218108 -0.0091287  -0.02608425
  -0.00257076  0.04449799  0.06831611 -0.17750439 -0.27851384  0.00095496
   0.01574984  0.1574676   0.04464179  0.16581441  0.28212689 -0.05863505
  -0.010213   -0.17940466]
 [-0.03059978 -0.05499104  0.05929896  0.07150874  0.141191   -0.29226858
  -0.13935216 -0.21246557 -0.04857037 -0.27230158  0.03034352  0.1764383
   0.03446463  0.11301126  0.13483299  0.14231139  0.01056138  0.15644332
   0.24959201  0.29144659 -0.02247779  0.15473842  0.20764926 -0.06926024
   0.18003257  0.357495

Reward: -782.5858420896266 weights: [[-0.07600451  0.05637149  0.01306469 -0.12099712  0.01640669 -0.01288548
   0.00270614  0.10918017  0.25993058  0.06756843 -0.00897322  0.03256132
   0.10681597  0.13263785 -0.04089403  0.18069614  0.10545849 -0.06606059
  -0.28266652 -0.08038564 -0.02656598  0.12472979  0.08821003  0.0491223
  -0.00718329 -0.0538996 ]
 [ 0.21548637 -0.01248562  0.31787882  0.11932785  0.09233556  0.05321121
   0.18958066  0.04920581 -0.11304716 -0.14120294  0.00586002 -0.20640367
   0.0591387  -0.0462273   0.04449613 -0.12596551 -0.33897146  0.0090261
   0.03057483  0.26233143 -0.02901628  0.15459306  0.14807808  0.06695963
  -0.00760574 -0.16033966]
 [-0.00475479 -0.09937695  0.08684432  0.04325238  0.03056019 -0.32077372
  -0.07107319 -0.06166059  0.07271813 -0.30966867 -0.10312764  0.27602007
   0.10149333  0.10084464  0.07552773  0.11406419  0.09948949  0.01890013
   0.11157052  0.24981647  0.1265803   0.2251666   0.10793793 -0.10479017
   0.2050763   0.3503649

Reward: -944.5992203480348 weights: [[-0.05714073  0.25964676  0.13469598 -0.04232509  0.0639455  -0.04980425
   0.0547248   0.2338344   0.11535257  0.04698082 -0.10772399  0.16655792
   0.15764411  0.09743943 -0.29136276  0.13521964  0.01615011  0.07303149
  -0.30097055  0.03177794  0.01188629 -0.02963191  0.37822957  0.07408527
   0.03378404  0.01156559]
 [ 0.17019641  0.10733869  0.10478135  0.23219186  0.01608529 -0.03691173
   0.21889019  0.00565959 -0.16217183  0.08145936  0.24980996 -0.1295231
  -0.08983368  0.0702384   0.04705187 -0.10975957 -0.26346734 -0.06671976
   0.03560537  0.2204368  -0.0318189   0.16274224  0.11962588 -0.12672292
  -0.04731587 -0.02382431]
 [-0.10674994 -0.09401189 -0.08167848  0.1606069  -0.06010515 -0.28056941
  -0.1697793  -0.11424378  0.01828603 -0.33402885  0.01266464  0.15836948
   0.12473872  0.23431746  0.09439554 -0.0182629  -0.20864866  0.09640152
   0.28104112  0.18224969  0.10819337  0.25691116 -0.05652846 -0.17307968
  -0.08746905  0.354191

Reward: -116.19550255959062 weights: [[ 0.04528446  0.13410237  0.13413824  0.02011595  0.03467438 -0.01621147
   0.09850529  0.21829806  0.12310041  0.21616855 -0.06125982  0.1671026
  -0.13840017  0.28616568 -0.2588542   0.18379553  0.10847476  0.03978191
  -0.17068213  0.04646216 -0.08720749 -0.09173971  0.42106827  0.10039785
   0.14405816  0.02642354]
 [ 0.11866912  0.04559662  0.12290629  0.12020621 -0.08365002 -0.14929982
   0.17747397 -0.13318692  0.01931123  0.076341    0.44957828 -0.11091233
   0.05711301 -0.01788123 -0.14105893  0.03177416 -0.22064182 -0.15606637
   0.01915659  0.36157046 -0.03653651  0.20852105 -0.03226773  0.05836027
   0.16290418 -0.02223348]
 [ 0.14063581  0.06800458  0.00424709  0.07903012 -0.09585017 -0.28704176
   0.03326332 -0.02552421 -0.18595373 -0.2025749  -0.24546426  0.31570467
   0.11477029  0.05884969  0.21069227  0.17145375 -0.22383906 -0.07155637
  -0.07881131  0.10946761  0.43346831  0.1178616  -0.10284354 -0.05568781
  -0.07445175  0.22682

Reward: -857.017643097877 weights: [[ 0.03635461  0.20636179  0.10353688  0.01589303  0.02735839 -0.04306431
   0.16571982  0.29096181  0.068674    0.19841501 -0.10917859  0.15223308
  -0.08131584  0.37343542 -0.27030287  0.23531504  0.10867508  0.04918502
  -0.21703508 -0.00796301 -0.09026535 -0.03429867  0.46555365  0.07143911
   0.18010302  0.06907883]
 [ 0.11702363  0.04600286  0.06135795  0.18530134 -0.06671063 -0.11391675
   0.18516221 -0.17774473  0.01573376  0.15615123  0.51073232 -0.12552385
   0.03228789  0.0243719  -0.11952288  0.00958091 -0.24880842 -0.21662034
   0.02758307  0.38231876 -0.06417811  0.2357932   0.02321539  0.06957366
   0.05458327 -0.02032992]
 [ 0.10064216  0.01595609  0.00549682  0.05510256 -0.08907529 -0.29639808
  -0.02347929 -0.16119258 -0.16629611 -0.21278696 -0.18606331  0.31031177
   0.07078109  0.04204167  0.18381511  0.18823073 -0.31579011 -0.04627805
   0.01051495  0.08692565  0.45433603  0.2091765  -0.08563008 -0.12815988
  -0.10100388  0.287047

Reward: -772.0259842413682 weights: [[-0.1976207   0.20336291  0.09052725  0.09483291  0.10494597  0.07991239
   0.23482717  0.01761667  0.00562195 -0.03787232  0.08034045  0.25033826
  -0.13537514  0.19897014 -0.12635138 -0.04018802 -0.1128989  -0.1121906
   0.0086084   0.1965433   0.03615326 -0.05413845  0.29894765  0.27804555
   0.05032014 -0.01463705]
 [ 0.13307524  0.01117262  0.15279373  0.10043985 -0.22565067 -0.23995063
  -0.06007497  0.12870675  0.26640596  0.05453867  0.08178958 -0.19379605
  -0.05144162  0.04333264  0.02349223  0.17651329 -0.13478772 -0.06081612
   0.18640502  0.33551907  0.05318703 -0.00632759 -0.15287057  0.04320614
   0.28865659  0.0960915 ]
 [ 0.1947068  -0.01968367  0.04465004  0.11235868 -0.32060301 -0.07631361
   0.03015803  0.01388191 -0.2446195   0.0370049  -0.08656992  0.34348971
   0.04946212  0.09193043 -0.24087688  0.08047092  0.06259743 -0.18805577
  -0.0831496  -0.0517933   0.05794929 -0.17149121 -0.06348553 -0.15841816
  -0.1655854  -0.049182

Reward: -672.6308170996213 weights: [[-0.03574023  0.18350221 -0.09875685 -0.00306168  0.01676912  0.07445287
  -0.15651037  0.19018605  0.26741111  0.12956428 -0.36768553  0.26848426
   0.38806586  0.07164576  0.0620565   0.2581889   0.26723476 -0.03345198
  -0.26916922 -0.35845378  0.27086293  0.13617139  0.23205968  0.02869086
   0.135222    0.18096016]
 [ 0.03583302  0.11312139  0.07373679  0.17863872  0.24600621  0.07070273
   0.35381526 -0.01449588 -0.05550795  0.01500229  0.24470296  0.0644143
  -0.066901   -0.04671299 -0.09935452 -0.0668103  -0.2365984  -0.26212935
  -0.02194368  0.15833306  0.11416473  0.01021034  0.22463494 -0.17561111
   0.0289679  -0.15475939]
 [ 0.14239793 -0.22415935 -0.08656205  0.2145416   0.03687193 -0.19985486
  -0.16570904 -0.42032177  0.02009458 -0.17472133  0.03155193  0.12258038
   0.11292356  0.1121501   0.12809643 -0.04765941 -0.21391517  0.20647133
   0.46697066  0.31049246  0.07302326  0.30323771  0.30494407 -0.05540921
   0.2629668   0.356742

Reward: 465.36821050470695 weights: [[-2.77126211e-02 -4.05520447e-03  3.43983245e-01 -9.55813767e-02
   3.59479082e-02  1.04473579e-01 -1.26538042e-01 -2.63413894e-01
   2.67860835e-01  2.20493522e-01  8.08714836e-02  4.01355776e-01
  -2.45370816e-02  2.89631904e-01  1.51774004e-01 -3.43219610e-01
  -2.88070429e-02  1.18212356e-01  6.49982592e-02  2.04820078e-01
   3.25825490e-03 -3.01139840e-01  3.14069131e-01  3.39584587e-01
   1.13708092e-01 -1.56538353e-01]
 [ 1.04712234e-01  7.43983408e-02  2.63525857e-01 -1.35458886e-01
  -2.96545660e-01 -3.30166718e-01  4.74824066e-02  2.10054876e-01
   4.78220483e-02  2.67740751e-01  1.70602570e-01 -2.03044001e-01
  -6.08882145e-02  2.09933504e-01 -1.79594281e-01  2.58625560e-01
   8.77693486e-03  9.50557569e-02 -2.92589296e-02  2.27173028e-01
   9.13888180e-03  2.82530307e-01 -5.85344499e-02  1.03100282e-03
   3.45741399e-01  2.27054909e-01]
 [ 1.36385559e-01 -8.08539055e-02 -9.09771822e-02  5.55509345e-02
  -3.13434385e-01 -1.29077231e-01  1

Reward: -579.1061634790545 weights: [[ 0.17765144  0.07178134  0.4126152  -0.1225188  -0.04039769  0.10034959
  -0.05760769 -0.24397953  0.03687328  0.18949433  0.26629885  0.41476811
  -0.06008959  0.09436554  0.11776474 -0.37123053 -0.08858958  0.26379236
   0.02979002  0.1181436  -0.07108158 -0.38557336  0.36145503  0.27979434
   0.04588738 -0.10236496]
 [ 0.05574727  0.27831136  0.31726152 -0.22432532 -0.30545576 -0.24758108
  -0.04339001  0.16120086  0.12889193  0.14314745  0.19616224 -0.06618921
  -0.12749598  0.35357166 -0.28794216  0.15957771  0.03442545 -0.04189152
  -0.09588259  0.32355215 -0.14434825  0.26419673 -0.08865166  0.08324392
   0.39744141  0.17064867]
 [ 0.15686386 -0.02912274 -0.21480242  0.07307203 -0.35044465 -0.20937159
   0.1812382   0.22897416 -0.05084686  0.02969899 -0.29897631  0.11550331
  -0.0110741   0.08139104  0.05267889  0.07941893  0.09195852 -0.30703872
  -0.07668228  0.13289277  0.18181971 -0.11086271 -0.19364997  0.05832677
  -0.18414844  0.11782

Reward: -722.3405950837981 weights: [[ 2.12253582e-01 -9.16875458e-02  3.67890796e-01  1.40861956e-01
   1.38321198e-01 -1.24410020e-01 -1.79298659e-02  8.44670431e-02
   6.61755886e-02 -1.35978826e-01  1.64436350e-01 -1.03273455e-01
  -2.22004737e-02  6.00887042e-02  6.21431608e-02  1.09436233e-01
   3.25698624e-02  2.04164254e-01  1.49054142e-01 -4.41313557e-02
  -1.06124227e-01 -4.15414982e-02  3.07964237e-01  4.05465030e-02
  -9.36533565e-02 -2.08812500e-01]
 [-1.65414995e-01 -1.28004816e-01 -1.64776325e-02  2.13504574e-01
   9.24654307e-02  5.20176471e-02  5.89793998e-02  1.57217808e-02
   1.13531909e-01  4.85672765e-02 -8.40346388e-03 -1.09051670e-01
   3.01014126e-01  2.02875219e-01 -7.25998557e-02  9.26986156e-02
  -7.63460407e-02 -3.98360052e-02  3.12565952e-03  8.38367394e-02
   8.44570970e-02 -7.57429877e-02  1.79649392e-02  1.85757229e-01
  -1.40729283e-01  1.62916592e-02]
 [-1.30946310e-01  2.72987249e-01  1.23734152e-01 -1.89406625e-01
   1.38430583e-01  9.33423139e-03 -7

Reward: -899.2893037614253 weights: [[ 0.1849536  -0.00135055  0.27121392  0.23404918  0.28238278 -0.18736472
   0.08807313  0.03913465  0.1434848   0.32739521  0.08988387 -0.06224524
  -0.07704153  0.08222175  0.16618786 -0.06542588 -0.01152582  0.31747537
   0.08783947  0.09323727 -0.10019761 -0.08276381  0.27426163  0.00552256
  -0.14290253 -0.21042665]
 [-0.18562323 -0.04603699  0.04838216  0.17342059 -0.01082222  0.17073439
   0.02788018  0.12980569  0.25030173  0.02674438  0.04012573 -0.15546272
   0.1156321   0.09124821  0.02456965  0.2120014   0.04495289 -0.08278339
   0.07682893  0.17674796  0.08295163 -0.04478209  0.0418084   0.26261316
  -0.01136606 -0.11081404]
 [-0.21564414  0.41246498  0.09416019 -0.55719327 -0.04705023  0.03374133
  -0.0928465   0.01018399  0.04973898  0.0057097  -0.06808403  0.25865779
   0.10228514 -0.04184494 -0.17770392 -0.0106934  -0.11576322  0.32768854
  -0.00849637  0.1039247  -0.03964329 -0.14680537 -0.01222915  0.09478542
  -0.04251492  0.09650

Reward: -964.9265366700802 weights: [[ 0.18208501 -0.16701059  0.24295222  0.28104055  0.27611114 -0.35940681
  -0.10205361 -0.11000082  0.2030417   0.20852197  0.18188914  0.04919352
  -0.19650881  0.13339402  0.43070267 -0.13148031  0.18947546  0.08983236
   0.22961651 -0.10061947 -0.15896275 -0.02176741  0.21764242 -0.02174276
   0.3458804  -0.29608856]
 [-0.13605026 -0.24763271  0.18061768  0.04227034  0.30364512 -0.10351044
   0.08146269  0.11753349  0.29768816  0.03147489 -0.04525262 -0.05392932
   0.4367277   0.05436453 -0.31392685  0.20405499 -0.0473743   0.07843849
  -0.0898577   0.10852695  0.11220732 -0.05106691  0.19024138  0.24585697
   0.07201182  0.06414713]
 [-0.0133561   0.34850975  0.13982084 -0.29371468  0.06814424  0.06415785
   0.06922805  0.20671985 -0.04792419 -0.15043601  0.01654014  0.15774491
   0.23740271  0.01847266 -0.2098625   0.06369305  0.07779357  0.38899209
  -0.02324224  0.1058639  -0.02227012  0.03800791  0.29240696  0.25381326
   0.25200181 -0.03970

Reward: -685.4103619655452 weights: [[ 8.59565824e-02 -6.01432175e-02  2.84213250e-01  1.26009780e-01
   9.47908123e-02 -2.10105911e-01 -1.55333320e-02 -4.04628477e-01
   1.79979760e-01  8.18880262e-01  7.97894137e-02  1.04372014e-01
  -3.81984964e-01  6.40507688e-01 -1.25080522e-04 -1.11294052e-02
   2.54805823e-01  2.78676267e-01 -7.72169180e-03 -1.78289986e-02
   2.14896088e-03 -1.16614036e-01 -6.56417198e-02  1.03152017e-01
   5.34484386e-01 -2.84820771e-01]
 [-2.03108912e-01 -2.37877772e-01  3.36510213e-01 -2.05658702e-02
   4.27821652e-02 -4.80460670e-01  2.05453577e-01  3.30139904e-01
   1.90768230e-01  1.84681393e-01  4.53224555e-02 -1.33302716e-01
   6.68289797e-02 -1.79408675e-01 -1.49750180e-01  5.21204828e-01
  -1.34980118e-01  8.90847583e-02 -7.20292775e-02 -1.45196177e-02
  -2.61967493e-03 -7.37450121e-03  1.10711636e-02  3.43003581e-01
   2.13301458e-01  1.37281488e-01]
 [ 3.56823090e-02  5.17520362e-01 -3.49456853e-02 -5.54078365e-01
  -4.86868745e-01  7.98462468e-02  3

Reward: -903.4595493353501 weights: [[ 0.16694752 -0.20436828  0.41106918  0.09090219  0.04543602 -0.24282246
   0.09354945 -0.5546197   0.26050534  0.74601628  0.16909033  0.00829845
  -0.15465157  0.61031504  0.16155033 -0.20329417  0.34737675  0.3177388
  -0.00711605 -0.17908591 -0.05722741 -0.10643064 -0.18543186  0.12781263
   0.67365639 -0.28227412]
 [-0.21703126 -0.32566793  0.37656325 -0.00891053 -0.01422029 -0.43191563
   0.20875724  0.38420268  0.22376723  0.25591763 -0.11901376 -0.13479838
  -0.04191801 -0.01493543 -0.25404441  0.60918536 -0.15292093  0.0664974
  -0.13985241 -0.18239924 -0.09476278  0.2135386   0.1704324   0.43156128
   0.17090444  0.27987384]
 [-0.09703372  0.54567168 -0.21636756 -0.51707717 -0.48590726 -0.02584061
   0.35688282  0.33696525  0.27447942 -0.11567546  0.15934379  0.38900215
   0.42729814 -0.11981091 -0.34706358  0.1491426  -0.10007    -0.16860993
  -0.09423193  0.14338516  0.74811728 -0.45676492  0.45247952 -0.03571554
  -0.27966784  0.3058910

Reward: -508.2812478840413 weights: [[ 3.09480583e-01 -6.68502906e-02  3.65714844e-01  1.34389133e-01
   1.31363435e-01 -1.51254957e-01  4.54586383e-02  1.12232005e-01
   6.12619949e-02 -2.01684038e-01  1.68707260e-01 -2.03838631e-01
  -1.22546564e-01  1.06934596e-01  1.18525406e-01 -5.51378072e-03
  -7.95778738e-02  1.90872082e-01  1.10639801e-01 -5.04924488e-02
  -1.84318534e-01  7.22756087e-03  3.65909222e-01  7.95082016e-02
   3.08118007e-02 -2.29623813e-01]
 [-1.24892347e-01 -1.83961749e-01 -1.11817418e-01  2.35934806e-01
   3.29317416e-02 -3.79831987e-02  7.03258624e-02 -2.70400978e-02
   3.17756227e-03  6.15769587e-02 -1.09690323e-01 -1.45406764e-01
   3.76221100e-01  3.26110482e-01 -1.42559629e-01 -3.11346322e-02
  -2.04705836e-01 -2.59344678e-02  4.41062190e-02  2.79563854e-02
   9.32203514e-02 -8.12888294e-02  5.51441687e-02  2.59794745e-01
  -3.27615878e-02  7.24700258e-02]
 [-1.42469507e-01  1.11958908e-01  1.14390104e-01 -5.05087828e-02
   1.03892123e-01  1.02703815e-01  5

Reward: -838.3279196796345 weights: [[ 0.36876693  0.06644061  0.50719475  0.0405781   0.25483761 -0.28725939
  -0.1241297   0.23773208  0.10826314 -0.40098905  0.03188493 -0.11850276
   0.04337396  0.0700739  -0.10143856  0.01481417  0.19312286  0.18168281
   0.01966637  0.00583286 -0.09356638 -0.13520096  0.291377    0.14624852
  -0.14310441 -0.24385739]
 [ 0.04972472 -0.1417131   0.0495762   0.33703379 -0.0748478  -0.04659875
   0.068863    0.02802057 -0.05801187 -0.14893814 -0.09965833 -0.41394218
   0.5042397   0.17163693 -0.1026489   0.04177464 -0.22301153  0.06347899
  -0.10319821  0.13536863  0.04246805  0.02738616 -0.04352033  0.18366073
  -0.28402417  0.13013303]
 [-0.2615511   0.19716853  0.03182113 -0.26044542  0.01400739 -0.05548458
  -0.1065048   0.10872988  0.12908419 -0.09681951  0.226864    0.09622519
   0.29598838  0.12013953 -0.23769943 -0.01027989  0.10086635  0.04699043
  -0.08960355  0.03021676 -0.02593257  0.5364661  -0.13276738 -0.20970219
  -0.14005428  0.22182

Reward: -796.1356926943376 weights: [[ 0.29490884  0.12879178  0.28439073 -0.05631193  0.31796937 -0.38934529
  -0.00354886  0.33158997  0.17718994 -0.37364429  0.06033749 -0.03036904
   0.02750266 -0.01378958  0.02609296 -0.06078551  0.20494113  0.08127581
  -0.04672857 -0.10131231 -0.04211296 -0.05826559  0.19308469  0.22597693
   0.04388233 -0.11510936]
 [ 0.20591164 -0.11333263  0.08512453  0.30167437  0.00575386 -0.05991176
   0.0630005  -0.015508   -0.02717853 -0.14734038 -0.20608826 -0.41902189
   0.53956217  0.11596158 -0.19573405 -0.01527022 -0.16536373  0.13679697
  -0.10024738  0.11218888  0.0948543   0.12657369  0.03994557  0.13578995
  -0.17234708  0.07138586]
 [-0.29271608  0.16806557  0.02290729 -0.14002903  0.01718371 -0.06516931
  -0.09593732  0.20421964 -0.0820184  -0.08898556  0.08106119  0.0789617
   0.15882662  0.0503032  -0.09185661 -0.02232147  0.03703727  0.0286915
   0.017625    0.05204055 -0.02296745  0.62701161 -0.04224521 -0.26010476
  -0.06337521  0.3195429

Reward: -937.4911862431009 weights: [[ 2.71824777e-01  1.76165293e-01  3.07789771e-01  6.82464100e-02
   1.66691938e-01 -1.90880659e-01 -2.41516234e-01  2.81328261e-01
   2.07363966e-01 -4.41071258e-01 -6.27794116e-02  4.68719748e-02
   3.97221023e-02  9.66774616e-02  8.71770530e-02  9.61489196e-04
   2.99127127e-01  7.81738893e-02  6.96514453e-03 -2.16092378e-01
   1.21182544e-02 -1.47085032e-01  2.58464187e-01  1.67288917e-01
   8.79402025e-02 -6.93468443e-02]
 [ 2.21599347e-01 -5.48045155e-02  4.86773909e-02  2.13524227e-01
   7.43568381e-02 -7.77402354e-02  1.00892818e-01  6.05489553e-02
  -4.48555497e-02 -1.27752959e-01 -3.67103500e-02 -2.87891880e-01
   5.18570525e-01  1.06160269e-01 -2.07491409e-01  2.74624542e-02
  -2.12179635e-01  2.35874046e-02 -1.26917478e-01  1.81813591e-01
   1.00915244e-01 -5.78712029e-02  2.41905856e-02  1.12899256e-01
  -1.76331142e-01  8.00212570e-02]
 [-8.48930945e-02  5.74961634e-02  2.30755725e-02 -1.29989900e-01
  -5.79439508e-02 -5.18848938e-02 -1

Reward: -800.3681986264567 weights: [[ 2.78775789e-01  2.18144192e-01  3.34368716e-01  1.25647458e-01
   1.34169036e-01 -8.43731364e-02 -2.67379340e-01  3.32596512e-01
   2.16635043e-01 -3.86977353e-01 -2.28103783e-01  4.85279910e-02
   1.10225333e-01  1.74959000e-01  6.83817205e-02  6.57490549e-02
   3.40068732e-01  1.40100857e-01 -1.66176329e-02 -2.63211359e-01
   6.59590277e-02 -1.70631863e-01  3.03002849e-01  2.18443040e-01
  -1.19138162e-02 -3.77756094e-02]
 [ 1.88167940e-01  4.84383252e-02 -1.12897216e-02  2.03662707e-01
  -3.05748839e-02 -7.49812650e-02  7.44201366e-02  9.04185646e-02
   3.09078400e-02 -9.31577097e-02  7.84619081e-02 -3.61790188e-01
   4.62528961e-01  1.07292303e-01 -2.26769800e-01  1.13809251e-01
  -1.51621336e-01 -6.19232965e-02 -1.28512319e-01  2.21872249e-01
   1.11873611e-01 -1.04698899e-01 -2.09968459e-02  8.86069789e-02
  -1.59574364e-01  1.76906287e-01]
 [-4.69697468e-02 -2.78803750e-03  3.84897374e-02 -2.00305004e-01
  -1.67611866e-01 -5.29449690e-02 -1

Reward: -866.8669907764947 weights: [[-0.18525469 -0.0980489  -0.17003855  0.09846667  0.12353557 -0.09649213
   0.10571492 -0.07693937  0.23157016  0.38792678  0.12023678  0.05484829
  -0.05856758  0.1964239   0.13290839  0.16349365 -0.16677462 -0.10200571
  -0.00169041 -0.07609829 -0.0780476   0.13100911 -0.04650714 -0.07331012
  -0.00083931 -0.08295438]
 [ 0.10671582  0.01698428  0.17860178  0.2105598   0.19615628  0.29020544
   0.05167742  0.05092009  0.18078384 -0.02236231 -0.05939158  0.15547335
   0.04800575 -0.05437907  0.1953108   0.06126617  0.08280896  0.1302588
  -0.01009152  0.18806418  0.12020992  0.10326402  0.12562381  0.03226826
   0.02062711 -0.17112563]
 [-0.09047797  0.24258937  0.15102001 -0.3466607   0.05093197 -0.07448207
  -0.07609962  0.21365161 -0.11066534 -0.04030859 -0.18086873  0.18864802
   0.01735977 -0.10087086 -0.0270055   0.03186183  0.1428132   0.11253914
   0.11588445 -0.01217413  0.1277234  -0.13980325  0.33271673  0.17194064
   0.37192219  0.098180

Reward: -431.92448365343074 weights: [[ 0.21204767  0.03529033  0.05226929  0.11354185  0.21216562 -0.00393828
   0.0942473   0.18201581  0.04120021 -0.20169858  0.25042219  0.12946435
  -0.31145447 -0.16169894  0.0343466  -0.07204479 -0.00699371 -0.1994718
   0.02491841  0.12607856  0.06022059  0.08411984  0.14628937 -0.0673639
  -0.24222018  0.03653691]
 [-0.01850168  0.22798213  0.09411794 -0.04669441  0.03009953  0.06329852
  -0.23952996  0.07833212  0.04283549 -0.26081101 -0.07685203  0.07596045
   0.05803829  0.1697033  -0.00619501  0.00841619 -0.05851879  0.01255913
   0.09574363  0.19605494  0.17917954 -0.09709115 -0.04336108  0.15544067
   0.06627606  0.19630519]
 [ 0.17667635 -0.00368847  0.10388763 -0.22165652 -0.05570489  0.28307782
  -0.11400936  0.2126216   0.04012171 -0.07346032 -0.321839   -0.19774372
   0.05205464  0.01149884 -0.06691574 -0.15892259  0.11239736 -0.13798407
   0.11529446  0.04190461 -0.16752578 -0.06107946 -0.19258041  0.02832546
   0.04183152  0.026540

Reward: -398.46176636593793 weights: [[ 0.20758774 -0.11023399  0.11989784  0.10557039  0.02690053  0.06811037
   0.03928265  0.02719306  0.16087944 -0.05618804  0.24875946  0.09827427
   0.01312359 -0.05637734  0.24846308 -0.24685075 -0.0078823  -0.09931857
  -0.04309472  0.01287733  0.1022667   0.09105261  0.03241174 -0.05954649
  -0.13294731 -0.01947762]
 [-0.0914507   0.18808653  0.09674075 -0.01273542 -0.05132848  0.12492697
  -0.16939029  0.06384446  0.06176126 -0.10188831 -0.15541541  0.04418173
  -0.00927887  0.33872201 -0.07758633  0.05250473 -0.08968876  0.07776206
  -0.0172863   0.08210447  0.12043536  0.08603662  0.09212696  0.10600985
   0.130695    0.24766861]
 [-0.03386227 -0.06224693 -0.07650877 -0.13223205 -0.04905317  0.14844073
  -0.11217907  0.27516654 -0.07752981 -0.04490987 -0.2923754  -0.15005661
  -0.04126148  0.1148674  -0.06723647 -0.21289528  0.09209145 -0.24391577
   0.15940602  0.14399867  0.0336018  -0.07112531 -0.14541195  0.18134112
   0.17455432  0.0456

Reward: -254.5799886341913 weights: [[-3.72313000e-02 -7.86236379e-02 -7.91452859e-02  1.19964616e-02
   4.52242162e-02  4.03335234e-02 -1.07813157e-01  2.56119522e-02
   1.71199151e-01 -1.50569147e-01  2.09419602e-01  1.36359317e-01
  -1.63780001e-01 -5.66349012e-02  1.64224812e-01 -8.87746361e-02
   3.11267307e-02 -2.77674772e-01  5.29689752e-02 -1.04632500e-01
   1.31128469e-01 -1.29445828e-02 -1.76486146e-01 -1.28391595e-02
   1.43975568e-01 -6.14366978e-03]
 [ 2.06595438e-01 -4.26370210e-02  2.38682270e-01 -8.79129692e-02
   2.05573558e-01  6.47859551e-02  7.74285971e-04  5.75903959e-02
  -7.14654219e-02 -3.11524094e-01 -3.60446162e-01  1.38164160e-01
   2.77137875e-01  6.22499261e-02 -1.04241577e-01 -9.54025711e-02
  -1.59916501e-01  1.17102021e-01 -3.87931103e-02  1.34828380e-01
   1.88495109e-01 -4.21984649e-02  6.87710560e-02  8.83114633e-02
   1.53389265e-01  4.14267462e-02]
 [ 8.15801748e-02  6.86232272e-03  2.85082734e-02 -1.69642624e-02
   2.90454052e-03  7.77032725e-02  1

Reward: -983.2697952894554 weights: [[-2.23958335e-01  5.54257854e-02 -3.14025478e-01 -2.31125922e-02
  -1.78398905e-02 -8.77256854e-02  1.90109987e-01  1.28482261e-01
   1.62469826e-01  2.93941917e-01  9.07556812e-02  1.39848000e-01
  -9.70009834e-02  1.14827501e-01  9.85836306e-02  1.64522379e-02
  -1.36806210e-01 -1.13327776e-01 -1.69833462e-01 -1.59867197e-01
  -3.02790721e-03  1.47899270e-01 -1.29723039e-01 -2.19503513e-02
   2.02786314e-01  9.08292060e-02]
 [ 4.68610474e-01  2.74578222e-01  9.02986125e-02  7.27640905e-02
   1.77777911e-01  1.14005849e-01  1.23606941e-01  3.04627819e-02
   1.29186551e-01  6.45504950e-02 -3.11652153e-02  1.01361448e-01
  -1.58678751e-03 -9.52586325e-02  1.19208107e-01 -1.40822900e-01
   5.72101302e-02  2.23694850e-01  8.38254254e-03  1.86187265e-01
   6.48232076e-02  1.44495564e-01  1.31995712e-01 -4.17010783e-02
   1.55265202e-01 -8.68965143e-02]
 [-1.48079630e-01  3.80866084e-02  1.23649931e-01  1.78876697e-03
  -2.45687580e-02 -7.25560174e-02 -2

Reward: -895.2649660856644 weights: [[-3.65038623e-02  9.57661164e-02 -3.08635093e-01  1.34442886e-01
   2.03518807e-01 -3.26796570e-01  2.30631431e-01  1.38902122e-01
   1.36084347e-01  5.11306072e-01 -1.33892714e-02  1.00494044e-01
  -8.52415656e-02  8.33076328e-02  3.90594038e-01 -2.66387291e-01
  -4.96787663e-02 -5.25125565e-02 -1.38093366e-01 -4.58565205e-02
  -4.94417653e-04  1.87921068e-01 -8.63843431e-02 -9.42537709e-02
   2.14787903e-01  2.26352301e-02]
 [ 3.86928689e-01  3.15485739e-01  3.24081384e-03  1.16584764e-01
   7.54684414e-02  1.01436760e-01  8.90598848e-02  1.89413120e-02
   3.80313256e-01  7.63743088e-02  8.03626765e-02 -2.01246871e-02
   2.90060010e-02 -7.77630035e-02  6.82718325e-02 -8.19299026e-02
   4.54894335e-02  1.33881976e-01  6.23315154e-02  2.37117093e-01
   1.11653011e-01  1.63497647e-01  1.18083194e-01  5.06727737e-02
   2.55529238e-01 -2.07701174e-01]
 [-2.09009814e-01  8.31305107e-02  1.52068705e-01 -2.47005262e-01
  -1.45342249e-01  1.09307403e-01  5

Reward: -890.8217409527063 weights: [[-0.07297535 -0.01822647 -0.39181196  0.11344764  0.08547546 -0.20551263
   0.20138327  0.18459841  0.24265466  0.43750132  0.11864725 -0.0523419
  -0.24544468  0.14677979  0.53373648 -0.31387566 -0.07032674 -0.12665654
  -0.16614542 -0.13760819  0.01787435  0.28886905 -0.31130491  0.01408366
   0.12959884 -0.07841399]
 [ 0.5385475   0.34789374  0.16093091 -0.00163459  0.03420828  0.03553975
  -0.07110464  0.02198916  0.49676085  0.00672464 -0.06552664 -0.18203798
   0.14920496 -0.10958214  0.11759379 -0.16450278 -0.04484826  0.24720013
   0.06308446  0.31480306  0.0989393   0.13476337  0.11584558  0.18664445
   0.38549054 -0.23603985]
 [-0.08812333  0.03965304  0.39128417 -0.29890794 -0.17348858  0.03423626
   0.16929532  0.31532766  0.08411843 -0.10404121 -0.41642685  0.03925244
  -0.29873432 -0.053326   -0.09593248 -0.15163966  0.09980927 -0.07638108
  -0.1214563   0.17328597 -0.07346916  0.08035403  0.19790023  0.14443305
   0.5439966   0.245482

Reward: 79.53987642924426 weights: [[-0.11899807  0.06035829 -0.35489293  0.08374316  0.07625162 -0.24199425
   0.2618593   0.3181209   0.12817513  0.34529164  0.2262548   0.02290353
  -0.29529784 -0.04544453  0.28057519 -0.42305755 -0.19442018 -0.05441387
  -0.21614594 -0.06039714  0.0663179   0.21979473 -0.31629954  0.00819016
   0.18374183 -0.06254993]
 [ 0.63672671  0.36219492  0.10370616  0.02525838  0.08719117 -0.03046252
   0.03214019 -0.03666056  0.39412843  0.05702839 -0.16985787 -0.14733272
   0.07420743 -0.08666292  0.16315603 -0.34013903 -0.06628943  0.28665288
   0.08177086  0.23869884  0.04283476  0.13795812  0.13181571  0.07670445
   0.41505255 -0.21220194]
 [-0.19318606  0.12031525  0.29088102 -0.09434317 -0.09890437  0.01493189
   0.18228823  0.4678418  -0.01298976 -0.18879641 -0.44871907 -0.03012858
  -0.40954768  0.18119251  0.00376213 -0.22262001 -0.0265477  -0.09692721
  -0.10220782  0.17160099 -0.24297945  0.0754965   0.06158733  0.14850949
   0.40531792  0.274968

Reward: -651.1036484141192 weights: [[-1.55531425e-01  1.28097129e-01 -3.81083228e-01  1.30645135e-01
  -6.12226360e-02 -3.34845626e-01  3.34686308e-01  4.31242545e-01
   1.95742565e-01  3.78809685e-01  1.67762758e-01  5.40004817e-03
  -3.54486553e-01  4.43123915e-02  2.88990349e-01 -5.35680025e-01
  -3.08067875e-01 -8.21360676e-02 -3.27884133e-01 -2.44856306e-01
  -2.32597080e-02  2.78895072e-01 -1.74755400e-01  3.73280329e-03
   2.95159847e-01 -1.08497360e-01]
 [ 8.61568051e-01  5.81466684e-01 -8.94997066e-02  8.30948297e-02
   1.05147570e-01 -2.05625000e-01  1.57520821e-01 -4.08836167e-02
   4.80759631e-01  2.30772722e-01 -6.90494249e-02 -1.69043814e-01
   1.30761814e-01 -6.49705651e-03  1.26870924e-01 -4.69221818e-01
  -2.19791745e-01  3.06320489e-01  7.29837720e-02  3.33555850e-01
  -5.10714284e-02  2.02779401e-01  1.58956240e-01 -4.30117662e-02
   4.99025267e-01 -6.21105074e-02]
 [-3.01324641e-01  1.43206716e-03  2.90522711e-01 -1.90349509e-02
  -2.33339586e-01  1.61884176e-02  1

Reward: -638.2301931450247 weights: [[ 0.06147242 -0.17269013  0.01379028  0.08934486  0.08394408 -0.02018033
  -0.12614279 -0.08985686  0.10703372  0.14085194  0.09787061  0.0066715
  -0.08883854  0.11779143 -0.00580742 -0.10011729  0.0438285  -0.07588996
   0.14819889  0.14597716 -0.07328057 -0.07874189 -0.11390609 -0.0399465
  -0.04170156  0.12985615]
 [-0.03927925  0.11269927  0.29683508  0.10284062  0.13307977  0.11561884
   0.04393044  0.11459373  0.03800412 -0.06080655  0.02634638  0.09782317
   0.09225508 -0.04999857  0.26973573 -0.0365899  -0.01341757  0.0897249
  -0.08349603  0.15310329 -0.07649769  0.07045383  0.10151904  0.16889642
   0.01312313 -0.0900354 ]
 [ 0.07669715  0.13307     0.01353488 -0.08118758  0.05839483 -0.08504569
  -0.07837441  0.16304538  0.27099187 -0.08050867 -0.08205915  0.25031897
   0.05321948  0.26747893  0.00413702  0.12007344  0.08989889  0.21223757
   0.18299086 -0.09401943 -0.08941617 -0.13120263  0.04215828  0.14383937
   0.18519149 -0.17961043

Reward: -930.4803045116564 weights: [[ 0.22463659 -0.14017628  0.0868254   0.17989685  0.16253551  0.01058933
  -0.28225884 -0.08763278  0.10971001  0.13548411 -0.06835665 -0.09066064
  -0.02778155  0.27079173  0.0806759   0.00290749  0.17313348 -0.0081683
   0.16195893  0.19403184 -0.13732205 -0.00732231 -0.05906744 -0.0433267
  -0.06355615  0.11030999]
 [-0.09656331  0.16960099  0.33530677  0.11892463 -0.00872863  0.11137752
  -0.06010423  0.10964417  0.00948861 -0.12006484  0.13970029 -0.13815937
   0.05800564 -0.04072092  0.16802853  0.06553252  0.12170887  0.18124996
  -0.16511362  0.12826857 -0.08827685  0.00641653  0.1181452   0.21580377
  -0.01645427  0.07597659]
 [ 0.24915509 -0.09166385  0.0230108  -0.02063745  0.02976535 -0.07080171
  -0.13694863  0.06398869  0.38399693 -0.02397279  0.14995056  0.23270725
   0.18714981  0.02025444 -0.12782009  0.09174076  0.11672955  0.25392402
   0.12790677 -0.08317027  0.09618577 -0.0275372   0.01513887  0.07428267
   0.08339364 -0.1522406

Reward: -619.8186351496131 weights: [[-0.0254734  -0.01357783  0.17726975 -0.02365996  0.09302257  0.20008633
  -0.39310762 -0.03803775  0.20295804  0.29326102 -0.09997551 -0.33812746
   0.46426328  0.12173685  0.33504678 -0.11013131 -0.04730719  0.05631107
   0.39497792  0.03203795  0.27190816 -0.19853736  0.36141736  0.27855884
  -0.07235906  0.06273564]
 [ 0.04216062  0.3815144   0.053401    0.16859161 -0.1068517   0.05512626
   0.0232352   0.0321722  -0.08573401 -0.0049581   0.15717025 -0.21159487
   0.14222009  0.05906727  0.30686897 -0.14264047  0.39923296  0.21063085
  -0.27192468  0.17506032  0.1070313   0.20425621  0.2325042  -0.35911213
   0.05419798  0.07453537]
 [ 0.03538205 -0.17836269  0.02736308  0.1556223  -0.01069683 -0.15721576
  -0.28099607  0.01219938  0.02008238 -0.00058233  0.256063   -0.03879654
   0.05155584  0.21473275  0.24028026 -0.02994786 -0.0088853   0.38801545
   0.29109349  0.02317205 -0.17348058  0.09963739  0.24878285  0.20209945
   0.20599919 -0.17979

Reward: -906.2504008555641 weights: [[-0.06255552  0.060464    0.13720515 -0.06362641  0.05606995  0.23962466
  -0.31186287  0.06142488 -0.00954623  0.36595563  0.00125339 -0.18838714
   0.411428    0.22278902  0.35051667 -0.07947908 -0.03092672  0.01086363
   0.35377323  0.20364573  0.45398458 -0.11654473  0.48463316  0.18683174
  -0.13550169 -0.03052281]
 [-0.10995378  0.37720171  0.03832953  0.10512735 -0.0383582   0.07253271
  -0.00284948  0.10309467 -0.1443987  -0.02048775  0.19620327 -0.14265932
   0.14669023 -0.12498651  0.28652194 -0.11631785  0.44668172  0.16404577
  -0.18736547  0.24540845  0.15385725  0.04032932  0.24772543 -0.14582679
  -0.05176648  0.1804843 ]
 [-0.02140343 -0.22153661 -0.00674637  0.1362408   0.04558428 -0.03723383
  -0.50163872 -0.02481108 -0.08194797  0.09336632  0.24183358  0.12081285
   0.0676006   0.05739658  0.29575459 -0.03739067 -0.15182632  0.45285215
   0.19227934  0.06490684 -0.20984281  0.08146083  0.31916743  0.15199764
   0.30464029 -0.21246

Reward: -353.44953640643524 weights: [[ 0.10249905  0.20090173  0.09100737  0.09993535  0.13446188  0.34353777
  -0.49781659  0.23164841  0.02473981  0.42417532 -0.13716851 -0.29872672
   0.49253504  0.17303668  0.36745368 -0.07479259 -0.26155953 -0.00867766
   0.41672217  0.32907077  0.44684946 -0.03332468  0.67880806  0.15743703
  -0.17046013  0.01498962]
 [-0.17675969  0.56983931 -0.09276875  0.2342181  -0.10602685  0.05270748
  -0.10095771 -0.15069453 -0.1106429  -0.05124345  0.27762603 -0.2188716
   0.26107418  0.04260484  0.35247519 -0.26525466  0.47583726  0.26726804
  -0.22168686  0.46148519  0.25650524 -0.18183423  0.19933616 -0.37678623
   0.12766106 -0.00539172]
 [ 0.06694092 -0.43974673  0.0284042   0.10408559 -0.0248763  -0.02620542
  -0.43120706 -0.23102993 -0.11791884 -0.05742391  0.18548953  0.15961795
   0.11475092  0.05114286  0.35964998 -0.25151648 -0.15865476  0.60385081
   0.11822221  0.01910204 -0.41953742  0.0682815   0.09995278  0.12999662
   0.41186904 -0.09223

Reward: -913.0020459451583 weights: [[ 1.95892369e-01  6.46277928e-02  1.69153151e-01  6.10513661e-03
   1.43083603e-01  1.54577940e-01 -3.06534037e-01 -3.86837904e-02
   2.40717350e-01  2.20384737e-01  3.02777728e-02 -1.60182636e-01
   1.26294926e-01 -6.51154873e-02  1.70650397e-01  5.72857622e-02
  -1.95434008e-01 -7.89761621e-02  1.00359537e-02  1.74785890e-01
   1.64848633e-01 -1.60543897e-03  1.05145560e-01  8.97449285e-02
  -3.90176983e-02 -1.60227508e-01]
 [-1.83347197e-02  2.73084821e-01  6.50020453e-02  1.09458765e-02
   4.00715168e-02 -1.56406720e-01 -1.05173244e-01  8.69568220e-02
  -1.07931485e-01 -9.34134877e-03  1.69361917e-01 -1.62277060e-01
   7.70470851e-02  1.10776311e-02 -7.56018700e-02 -2.19195322e-03
   2.04382527e-01  2.17945183e-01  1.52166107e-01 -7.22891803e-02
   1.83798626e-01 -3.87984559e-02 -1.56700939e-01 -1.62870134e-01
   1.32851813e-01 -3.34238939e-02]
 [-2.65897316e-02 -2.00437118e-01 -1.65148528e-01 -7.58628620e-02
  -1.44588754e-01 -6.86739890e-03  1

Reward: -953.0414473492758 weights: [[ 0.25026149  0.11441087  0.15444051  0.03119711  0.16291882  0.18655177
  -0.31778086  0.0545521   0.16971261  0.12438809  0.00455767 -0.20062816
   0.15555125 -0.10446195  0.12194499  0.10939247 -0.27910076 -0.09589881
   0.00906295  0.17611456  0.11307765  0.06625416  0.05241007  0.05300411
  -0.06811733 -0.09246998]
 [-0.01221495  0.33174124  0.05811031  0.0416602   0.04915742 -0.11093984
  -0.10082727 -0.03023015 -0.07768544 -0.03596962  0.11990596 -0.17745285
   0.05026444  0.12220013 -0.07802064 -0.11804149  0.22421256  0.18834883
   0.16284328 -0.02426228  0.1236092  -0.10433086 -0.16595166 -0.19684698
   0.10872397 -0.10052729]
 [-0.00238175 -0.27560575 -0.20155802 -0.07040219 -0.06813033 -0.03198788
   0.25661823  0.06342148  0.09382108 -0.08311408  0.12909348  0.04341989
   0.3691389  -0.20823913  0.1286091  -0.14470506  0.22375001  0.04023933
  -0.10100021 -0.10914872 -0.18786641  0.01133934  0.08514958 -0.00495097
   0.40490133  0.37784

Reward: -978.9173938666671 weights: [[ 0.2974134   0.09086     0.21865933  0.00248792  0.25634692  0.21588955
  -0.25790945  0.11415641  0.13478544 -0.13317226  0.20475895 -0.04705904
  -0.07429234 -0.28917642  0.18941805 -0.08186901 -0.38690868 -0.37851156
   0.07881596  0.23266051  0.132275    0.10642787  0.30758894 -0.06658419
  -0.41485895 -0.01768657]
 [-0.04732146  0.43672398 -0.054064    0.02907983  0.03860056 -0.03637285
  -0.23875739  0.06993984 -0.13364755 -0.20649955  0.04723183 -0.15793118
  -0.0282755   0.07613848 -0.18072048 -0.05221744  0.17126408  0.14844968
   0.10675145  0.03483468  0.19520077 -0.19084592 -0.14226304 -0.10696776
   0.06240261 -0.0245337 ]
 [ 0.03582323 -0.34782452 -0.16256975 -0.17714306 -0.16683657  0.17166256
   0.03403634  0.16306961  0.07617812 -0.14361131 -0.12205008 -0.10971972
   0.32189379 -0.12873493  0.03085388 -0.28073368  0.28118631 -0.05199515
  -0.15089428 -0.1056127  -0.46463828 -0.07236537 -0.07071648 -0.03543714
   0.33952453  0.26063

Reward: -967.2517111583086 weights: [[ 3.35600245e-01  6.83623300e-02  3.17050872e-01 -1.22086530e-01
   1.94608010e-01  1.69673131e-01 -5.28395471e-01 -5.02400983e-02
   6.54849465e-02  9.67299213e-02  2.13078707e-01  3.41026917e-04
  -9.13487894e-02 -1.42262882e-01  1.88725509e-01 -9.74687915e-02
  -3.86180286e-01 -1.05597636e-01  2.10762639e-01  1.77572094e-01
   5.94007395e-02 -7.73656405e-02  2.39764502e-01 -5.53292613e-02
  -3.87118550e-01 -3.29686452e-02]
 [-2.93395840e-02  5.51577264e-01  1.57374266e-02 -9.71320684e-02
   5.43126879e-03 -9.15465721e-02 -5.10436113e-02 -2.00811928e-02
  -1.79426466e-01 -8.95767964e-02  6.74573773e-02 -2.70208474e-01
  -4.03341632e-02  2.19887852e-01 -2.69954751e-01 -1.56137588e-01
   2.82202742e-01  2.17540051e-01 -8.38527250e-02  9.57733668e-02
  -2.58870799e-02 -6.45184120e-02 -1.57651935e-01 -2.25422009e-01
   1.06748731e-01 -1.28663658e-01]
 [ 1.43791436e-01 -3.14621578e-01 -2.07410724e-01 -2.43008079e-01
  -1.99645095e-01  1.59474487e-01  2

Reward: -474.595615742382 weights: [[ 0.19508273  0.0723937   0.19777397 -0.10934155  0.25231457  0.08591194
  -0.41780742  0.0043911   0.06963571  0.23547881  0.23027118 -0.22040856
  -0.40593015 -0.23458903  0.11783424  0.01731035 -0.38384911 -0.01758552
   0.26958245  0.15887437  0.1173661  -0.18191891  0.40263196 -0.08487675
  -0.4493491  -0.11316201]
 [-0.02144412  0.57464633 -0.01976075 -0.19863417  0.10707075 -0.07192718
  -0.1031276  -0.03459519 -0.23223035 -0.16902003  0.25817156 -0.04995296
   0.10810958  0.06826627 -0.21294367 -0.18161087  0.21693019  0.20073689
  -0.13941034  0.15324331 -0.04265232 -0.08100242 -0.13759715 -0.13272983
   0.11639806 -0.12584906]
 [ 0.17767224 -0.23147969 -0.02110035 -0.17320511 -0.25184381  0.19149391
   0.27922058  0.24597044  0.31875499 -0.31551581 -0.23921189 -0.13597513
   0.42226656 -0.40046481  0.2659161  -0.06054826  0.27542206  0.03464408
  -0.20421021 -0.02809364 -0.47767944 -0.19699792 -0.0693117  -0.08776813
   0.04186981  0.430058

Reward: -948.4079084249443 weights: [[ 0.24155404  0.03838682  0.10272911  0.1302009   0.04180808  0.03671137
   0.13424482  0.19180892  0.10465473 -0.08578858 -0.03825919 -0.05527835
   0.01622413 -0.13883913  0.13579832 -0.04110744  0.05702731 -0.11671423
  -0.1218583   0.11392949 -0.27291944  0.11497565  0.22918215 -0.06331843
  -0.06741003  0.30644559]
 [ 0.1236713   0.0109206  -0.19968803  0.19341624  0.22496983  0.18818186
  -0.14519214  0.18189812 -0.15899923  0.05091998  0.12940891  0.0275189
  -0.07997477 -0.02341889 -0.03116666 -0.02565095  0.18708654 -0.05113775
   0.13731633 -0.08068356  0.10171998 -0.06344575  0.16151479  0.08713498
  -0.03130737 -0.00800916]
 [-0.03071705  0.03316097  0.28534844  0.05818511  0.064966   -0.14100873
  -0.14329711  0.11245399 -0.00913444  0.0138683   0.02230135 -0.04091451
  -0.09386256  0.20483171  0.01383921  0.03485429  0.10474798  0.13503971
   0.01558404 -0.09896963 -0.00592765  0.08299448 -0.21925462  0.15976799
  -0.07357731 -0.068421

Reward: -881.448690764938 weights: [[ 2.73782558e-01  7.47139010e-02  2.58641681e-01  1.55492216e-01
   1.76842078e-01  2.06773852e-01  2.91342877e-01  5.27595938e-01
   5.80538663e-02 -3.07090177e-01 -1.42145737e-01 -2.87303083e-01
   1.49449242e-02 -4.45510140e-02  1.39014733e-02 -7.35046805e-02
   1.30675434e-02 -3.30790624e-01 -2.46190876e-03  1.02803437e-01
  -2.47821901e-01  2.12357680e-01  3.20253766e-01  2.00203974e-02
  -5.70960433e-02  4.46997218e-01]
 [ 1.60634646e-01  7.80000404e-02 -1.94872764e-01  2.74537677e-01
   1.61495739e-01  1.43753151e-01 -1.08274567e-01 -2.08753259e-02
  -1.47022941e-01 -1.50130524e-01  4.96072547e-02  7.07952566e-02
  -9.77621425e-02  7.56731033e-02  5.08806733e-02 -1.44867868e-01
   1.57125823e-01 -2.14116221e-01 -6.60070055e-02  5.65907315e-02
   7.06308120e-02 -1.54477992e-01  3.75478064e-01  2.03050253e-01
  -1.66848003e-01  1.75830777e-01]
 [-1.48529804e-02 -5.02140733e-02  2.38504854e-01 -5.43491378e-02
   1.27416341e-01 -1.67344705e-01 -2.

Reward: -688.8750525562766 weights: [[ 3.49400200e-01  9.47728780e-03  3.74782315e-01  4.78716013e-02
   1.05877960e-01  1.66256570e-01 -6.42385115e-02  3.06187357e-01
   4.63085374e-03 -9.12765880e-02 -1.23615823e-01 -1.69233588e-01
   7.15651079e-03  7.41557108e-02 -5.34871353e-02 -4.59438948e-02
   2.51446446e-02  3.47617151e-02  9.88095194e-02  2.17152494e-01
  -2.56640679e-01 -2.49906115e-02  2.24412888e-01  6.89644232e-02
  -8.85678479e-02  2.45666616e-01]
 [ 3.67144668e-02  1.80618402e-01 -9.41936104e-02  4.02077832e-02
   4.23620298e-02 -5.02705657e-03  9.45369975e-03 -9.90714745e-02
  -2.10523991e-01 -2.87560514e-02  1.43886531e-01 -1.29962065e-02
   1.37985771e-02  2.54269853e-01 -9.17868431e-02 -2.33456471e-01
   2.13496297e-01 -2.46285921e-02 -1.54971533e-01  4.94184697e-02
  -3.68278670e-02 -1.26887093e-01  1.64508997e-01  5.63290937e-02
  -3.89617254e-02  7.79718319e-02]
 [ 4.80026909e-02 -6.20433026e-02  1.98259458e-01 -8.61785148e-02
   1.17547022e-01 -2.61702261e-02  6

Reward: -646.858535043482 weights: [[ 0.35383559 -0.00770321  0.42747624  0.02339077  0.05930147  0.13883929
   0.03092278  0.2589321  -0.0707226  -0.10855334 -0.05796076 -0.14307292
   0.00186213  0.06111073 -0.03142379 -0.13812469  0.13324139  0.04489102
   0.08124282  0.21901271 -0.22046139 -0.11801544  0.31711285  0.09706719
  -0.10566696  0.22777856]
 [ 0.00913088  0.15966718 -0.05749389 -0.00676304 -0.01672052  0.02711981
  -0.03127231 -0.00309238 -0.22143542 -0.08606029  0.18291411  0.08336182
  -0.00625179  0.21516023 -0.07641119 -0.15577102  0.16674817 -0.07911799
  -0.22934457  0.02525005 -0.03031771 -0.02630714  0.20055419  0.15874989
  -0.06025134  0.18563499]
 [-0.01716648  0.00607427  0.15058049 -0.02875171  0.09352484 -0.01911059
  -0.05704019  0.15034205  0.08735131  0.00485107 -0.02446092 -0.26428763
  -0.02159728  0.29990972  0.16910815  0.0094748  -0.00976524 -0.09353013
   0.02786289 -0.0254898   0.11304722  0.09144511 -0.31576659  0.03565258
  -0.2158619  -0.088571

Reward: -848.5659114787977 weights: [[ 0.48399741  0.25754834  0.41733377  0.06508185  0.15072023  0.12392803
  -0.15125813  0.20487058 -0.12310824 -0.31532158 -0.45328288 -0.00523958
   0.12384395 -0.0289387  -0.24640056 -0.18509352  0.21232011  0.10323858
   0.0596256   0.29305114 -0.07592777 -0.25913     0.35904234  0.01139359
   0.084792    0.35450175]
 [ 0.02403642 -0.0097466  -0.26871505  0.15404575 -0.15048012  0.0336013
   0.28161602  0.03025322 -0.6190695  -0.28052078  0.07484156  0.12359107
  -0.03372446  0.18346459 -0.08764958 -0.14310107 -0.08254296 -0.23686911
  -0.12151403 -0.11947767  0.0545928  -0.20193177  0.0739475   0.14530285
  -0.04684196  0.19604046]
 [ 0.02184999 -0.23328989 -0.27341662  0.28453477 -0.06660287  0.14955237
  -0.04203682 -0.04473237  0.15737681  0.1046896   0.05891636 -0.21634193
   0.12880428  0.57012513  0.11682687 -0.21723009  0.08493795 -0.30523725
   0.12514029 -0.10755904  0.1088157   0.38453163 -0.36739804 -0.28738136
  -0.32020417 -0.083400

Reward: -948.5540202704574 weights: [[ 0.07886885 -0.00196667 -0.09539912  0.16765573  0.12939947 -0.11698264
   0.11655294  0.14446269  0.21488763 -0.04710194 -0.05362004  0.04112473
  -0.27874898 -0.2305747   0.00667564  0.22256781  0.10678386 -0.14953072
  -0.26807804  0.26041814 -0.2336937   0.12541959  0.17726405 -0.22162619
  -0.28958752  0.15304665]
 [ 0.05799904 -0.11019805 -0.22179185  0.05463227  0.35787289  0.16331671
  -0.08460898  0.30677901 -0.10829319  0.09049257  0.33508809  0.21380021
   0.07793675 -0.2331771  -0.05581151  0.04992443  0.11059647 -0.05782636
   0.37314779 -0.1898944   0.20137497 -0.12458515 -0.19173602  0.02393247
  -0.02407008 -0.19598498]
 [-0.00404323  0.07004859  0.53553241  0.09181002  0.21985666  0.14209316
  -0.04678529  0.31835377  0.00290507 -0.14120716 -0.07879621 -0.00424173
   0.27427578 -0.14386149  0.16793725  0.21266096  0.24576141  0.02250926
  -0.22159742 -0.06031072 -0.02398026  0.05613232 -0.28028291  0.05739337
  -0.10498493 -0.04894

Reward: -836.2691228610054 weights: [[ 2.46090567e-01  3.38526836e-02  2.83766781e-02  3.97260670e-01
   1.81779591e-01 -8.76873412e-03 -8.51909323e-02  2.11969444e-01
   4.88365746e-01  1.13731252e-01 -2.76070848e-01 -3.44142714e-01
  -2.49148337e-01 -3.82632334e-01  7.47842523e-02  2.04864941e-01
  -1.69036922e-02 -1.10814824e-01 -3.15256991e-01  3.40991528e-01
  -3.96374565e-01  2.27393976e-02  3.16579789e-01 -7.88504257e-02
  -2.49007769e-01  2.06290559e-01]
 [ 1.06686212e-01  1.13432404e-01 -2.05695958e-01  1.30912347e-01
   2.71422779e-01  6.13972490e-02 -2.09493025e-01  9.71979580e-02
  -8.14309835e-02  1.15818761e-02  5.80970286e-01  2.62559164e-01
   1.50951876e-01 -2.46404046e-04 -2.58132329e-03 -3.30545185e-02
   8.43955958e-02  1.10163762e-01  1.69239723e-01 -1.52841264e-01
   2.33202277e-01 -1.33154579e-01 -1.51353580e-01 -2.77211401e-01
   2.77413862e-01 -3.63003958e-01]
 [-4.50670254e-02  1.53198258e-02  5.12929175e-01  1.27646088e-01
   5.16859911e-02 -1.03170569e-01  1

Reward: -590.3974990063742 weights: [[ 9.79164501e-02 -1.55408897e-01  6.07055399e-02  3.04287770e-01
   1.84043891e-01 -1.52147833e-02 -1.29116545e-01  2.15324756e-01
   6.74488952e-01  4.84006443e-02 -2.60735844e-01 -4.43290749e-01
  -1.41185646e-01 -4.33534622e-01  1.40348570e-01  1.41834979e-01
   7.41590086e-02 -1.08283903e-01 -1.79236701e-01  3.21396701e-01
  -2.02624029e-01 -8.19790789e-02  4.53199757e-01 -1.73405099e-02
  -4.43740528e-01  1.23766665e-01]
 [ 6.47485910e-02  1.40610441e-01 -3.01957189e-01  1.53202829e-01
   1.77479722e-01  1.19245274e-01 -1.76908167e-01  1.16728935e-01
  -2.12386664e-01  2.15436749e-03  6.35815626e-01  2.68428287e-01
   2.83086005e-01 -6.19858508e-02 -1.13793246e-02 -4.63174982e-02
   7.92557884e-02  1.86993005e-01  9.84263414e-02 -2.07773672e-01
   3.17193757e-01  6.22212437e-03 -7.90384860e-02 -3.66988142e-01
   1.98298718e-01 -2.19847855e-01]
 [-1.87573901e-01  9.08128954e-03  5.80642378e-01  2.71258469e-01
   1.39494245e-01 -5.53744013e-02 -1

Reward: -514.9146673942535 weights: [[ 0.17992675 -0.251284    0.159007    0.15250189  0.07689545 -0.03634354
  -0.35343272  0.1388597   0.62949624  0.19946825 -0.20367333 -0.27105196
  -0.22264102 -0.3756062   0.22442842  0.12321154  0.13000894  0.06529586
  -0.18112902  0.3831567  -0.23516152 -0.22256709  0.48159224 -0.09421695
  -0.52661782  0.03403721]
 [ 0.0122236   0.23142191 -0.26146627 -0.06119763  0.2155436   0.14514365
  -0.05545131  0.08298231 -0.38966977  0.12733509  0.79291023  0.28535434
   0.27896887 -0.02516559 -0.24214798 -0.10207666  0.17850737  0.23995355
   0.02068151 -0.22574344  0.18116814  0.14130452 -0.07929246 -0.31395618
   0.14584369 -0.18574557]
 [-0.22460327 -0.01132753  0.47243064  0.29349847  0.27969981 -0.07558605
  -0.04470259  0.70239981 -0.04353826 -0.32532567 -0.20177178 -0.20841595
   0.42129625 -0.11796593  0.55923986  0.11039658  0.14193775  0.13348224
  -0.13237841  0.17210289  0.24049373  0.08159379 -0.56080002  0.34564982
   0.07391831 -0.07370

Reward: -429.0721699379716 weights: [[ 0.14112145 -0.17460895  0.14578851  0.09646787  0.03584587 -0.02956152
  -0.28860208  0.16091276  0.4718099   0.32646198 -0.17595658 -0.46247366
  -0.21366878 -0.34355276  0.29011181  0.09211936  0.0428899   0.16428364
  -0.05374876  0.22784929 -0.24161315 -0.2396559   0.60445294 -0.09535859
  -0.43732295  0.13054097]
 [ 0.08675973  0.31252169 -0.30548021 -0.02341959  0.24574744  0.22967265
  -0.0166668  -0.02311026 -0.41103517  0.17648623  0.75071647  0.26539764
   0.19795043  0.04061103 -0.16246144 -0.18907347  0.25883334  0.17523265
  -0.15955994 -0.10682975  0.00578012  0.25964468  0.15418612 -0.28704455
   0.09973185 -0.1553612 ]
 [-0.1793692  -0.02092719  0.44234826  0.36879645  0.21280953 -0.26198922
  -0.02732156  0.55572488  0.04027575 -0.27561873 -0.15917542 -0.18535478
   0.21618609 -0.06429698  0.52703909  0.20011811  0.11479966  0.22217211
  -0.00889586  0.13635852  0.12841072  0.09882387 -0.48660271  0.33761259
  -0.01079217 -0.02839

Reward: -969.4079378441152 weights: [[-0.2894905  -0.04447892 -0.05996622  0.01778977  0.22016194 -0.20348727
   0.08770224 -0.03303961  0.07685342  0.40597914  0.06001218  0.04397251
   0.03572802  0.26346663  0.04230817  0.01007011 -0.29768567 -0.08116245
   0.05133781 -0.00745708 -0.25120495  0.1509841  -0.02952726 -0.08776989
  -0.04957896 -0.15317266]
 [ 0.11004383  0.00771584  0.31997785  0.29310326  0.14164571  0.24561143
  -0.08866626  0.11089273  0.0903574   0.02284927  0.13805369  0.05429901
  -0.20107043  0.01177508  0.33129035  0.16636402  0.08890115  0.03896914
  -0.0654732   0.21562331 -0.07304149  0.27302507  0.18125877 -0.0465047
  -0.29195294 -0.02483368]
 [-0.12179197  0.23962875 -0.00777919 -0.32685121 -0.03986434 -0.09639818
  -0.26527431  0.23480284  0.02889595 -0.13797844 -0.12404671  0.02141585
   0.16278723 -0.07828733 -0.08439795  0.21082707 -0.10128832  0.2602515
   0.40643831 -0.12185201  0.01905118 -0.04920329  0.20800493  0.20563825
   0.16880307  0.0449994

Reward: -457.9101778642205 weights: [[-0.26635353 -0.11734246 -0.03488069 -0.10500271  0.23777154 -0.16748067
   0.17532434  0.11335448  0.08628801  0.21269708 -0.10078928 -0.23775055
  -0.27532397  0.23152661 -0.14731038  0.14510924 -0.009321   -0.04107425
   0.07721792  0.02465534 -0.00700304  0.14171252 -0.17115304 -0.23124401
  -0.034227   -0.0418708 ]
 [ 0.16032586 -0.13545905  0.20797535 -0.27579053  0.27899904  0.15695394
   0.14760796 -0.13998797  0.02702735 -0.02915684  0.14813007  0.32780674
  -0.10210077 -0.11819897  0.08652181  0.06462432 -0.11553989  0.01046373
  -0.13771165  0.07131655 -0.15661794  0.20843877  0.09514994  0.14061438
  -0.26785537  0.01794128]
 [ 0.11812013  0.26371136 -0.13606813 -0.06393266  0.25142822  0.22395729
   0.1472255   0.34947021  0.03267427  0.02516257 -0.17439586  0.24155924
   0.06969516 -0.34332262  0.0745947   0.36168174 -0.17977877  0.05324679
   0.19245767 -0.09917675  0.02352767 -0.12674038  0.14472289  0.04225897
   0.05969448  0.12154

Reward: -765.5407977262363 weights: [[-0.16582948  0.24642035 -0.15688291 -0.07841773  0.35777912 -0.30568863
   0.08998919  0.05093723  0.20757642  0.37898823 -0.02717458  0.06045484
   0.06819284  0.25016292  0.30090972 -0.0898798  -0.07891279 -0.0357927
  -0.06028438  0.00289079  0.01254063  0.14260827  0.01358247 -0.21586676
  -0.01691968  0.05344386]
 [ 0.20193352  0.01288455  0.20104369  0.19664889  0.2367798   0.34911296
  -0.01250863  0.01822732  0.05349181  0.07948111  0.42905866  0.23836767
  -0.22857564 -0.0815724   0.37916044  0.13907429 -0.06327739 -0.12756015
   0.03631043  0.25194534 -0.05115009  0.31746666  0.18275439  0.06628673
  -0.44941747 -0.53032266]
 [-0.09380802  0.28708114 -0.13596963 -0.62226788 -0.05921627  0.063115
  -0.39050607  0.26339708  0.1418215  -0.1032626  -0.27407758  0.11588903
   0.08500289  0.01407248 -0.0903517   0.15080544 -0.30755575  0.14363652
   0.58702355 -0.20526293 -0.08133026  0.11047005  0.00445198 -0.05938722
   0.28282291  0.16356858

Reward: -290.3801115236019 weights: [[-0.15953987 -0.05098044 -0.06254987 -0.11787902  0.19996975 -0.2119711
   0.01611762  0.03937845  0.09723464  0.29029535 -0.02294986 -0.09282661
  -0.22235972  0.18831141 -0.03721909  0.03098901 -0.14057052  0.02462934
   0.02694424  0.01482249 -0.092555    0.13567331 -0.25355999 -0.26627873
  -0.00773222 -0.10018347]
 [ 0.18418836  0.02769564  0.26877963 -0.18139295  0.28876384  0.1943195
   0.12560697 -0.13795412 -0.05002545  0.01298424  0.13087143  0.28414904
  -0.18570631  0.00095167  0.04342296 -0.01006893 -0.10111373  0.08576698
  -0.09035113  0.11062076 -0.24747669  0.19827962  0.11409953  0.13174217
  -0.26605745 -0.11663406]
 [ 0.02037841  0.20651432 -0.31747797 -0.20132934  0.13236869  0.10364938
   0.12314739  0.3013973   0.1238481  -0.03018862 -0.22264989  0.18836885
   0.0487965  -0.2857103   0.06069469  0.25146156 -0.12804199  0.13319103
   0.26659032 -0.14140087 -0.0533969  -0.13276236  0.10283025  0.05022409
   0.23434262  0.3201157

Reward: -924.7122461492094 weights: [[ 0.21755389  0.10678659  0.30972235  0.00441315  0.25765996  0.04437139
  -0.03374717  0.22384031  0.04431778  0.28378493 -0.27984485 -0.3072018
  -0.13035798  0.28306934 -0.12350911  0.14726591  0.00633741 -0.19260723
   0.07703533  0.18051886 -0.16081708  0.15703669  0.08703423 -0.10428209
   0.17362823  0.24673798]
 [ 0.27310008  0.04861672  0.19799625 -0.24247506  0.15861831 -0.09648773
   0.14273857 -0.34560415  0.17862005 -0.13065254  0.26941166  0.11073728
  -0.2121118  -0.08612975  0.05692585  0.10411037  0.03893086  0.00052068
  -0.27446269  0.24766922 -0.03062994  0.10217713  0.0546048  -0.08473903
   0.04040079 -0.17434468]
 [ 0.41157175  0.34525145 -0.36673008 -0.1667976   0.08757616 -0.0016289
   0.41342379  0.22790934  0.11584809 -0.07230978 -0.21551795  0.20603508
   0.17251667 -0.07486337  0.13738961  0.07639999 -0.0189822  -0.06729723
   0.09303377 -0.287432   -0.08597115  0.01087907  0.04232774  0.06319549
  -0.01519552  0.0780570

Reward: -485.4224916072623 weights: [[-1.77133521e-01 -1.10841257e-01 -4.95831516e-02  1.44356025e-01
   1.71416148e-01 -3.63171609e-01 -1.47551630e-01  1.13662865e-01
   1.77770866e-01  3.05143979e-01 -9.87020397e-02  1.96024958e-01
  -9.19748000e-03 -5.62451595e-02 -3.96159473e-01 -1.64409235e-01
  -2.30647004e-01  4.04967358e-02 -9.61862486e-05 -2.10919220e-01
  -2.42040928e-01  1.15935563e-01 -1.89386582e-01 -9.29603375e-02
   9.35745973e-02 -1.19770542e-01]
 [ 3.24227645e-01  9.19730716e-02  2.47259012e-01  5.94228896e-01
   1.81621767e-01  7.80892586e-02  4.39486949e-01 -1.88909116e-02
  -5.79145560e-02 -2.87114704e-01 -2.04447454e-01 -1.14292097e-02
  -7.48976968e-02 -1.34681577e-01  2.34800288e-01 -1.49043624e-01
  -4.92908617e-02  2.23933336e-01 -3.33102491e-01  1.02754772e-01
  -3.16865816e-01  2.02623153e-01  1.77285806e-01 -2.90288373e-01
   2.46564951e-02  2.71691873e-01]
 [-3.99152488e-02  2.68072059e-01 -2.88447725e-01  2.87583113e-01
   9.80339768e-02 -1.62271978e-01 -2

Reward: -912.4377879428093 weights: [[-0.11053507 -0.07922194 -0.04587234  0.16326411  0.22602759 -0.40259696
  -0.13135871  0.16441661  0.20583999  0.21934713 -0.09442875  0.1372445
  -0.00203506 -0.05425848 -0.38936195 -0.19945243 -0.23345951  0.02426163
  -0.0014182  -0.23150441 -0.2231075   0.12086661 -0.11711539  0.02790075
   0.10317068 -0.16817429]
 [ 0.31461846  0.03831839  0.23714232  0.62722004  0.12669266  0.03439121
   0.33922438 -0.05701387 -0.05540156 -0.26646472 -0.26499904 -0.06326013
   0.02489585 -0.08141395  0.18798312 -0.16586708 -0.04950507  0.28072443
  -0.36064173  0.07488683 -0.27113706  0.23781277  0.18784596 -0.30927834
   0.05533469  0.33018935]
 [-0.03639604  0.23931282 -0.28443902  0.27543647  0.08173501 -0.14111891
  -0.26348742  0.50656234 -0.29137784 -0.08242992 -0.03699567 -0.05693666
   0.24665666  0.04924887  0.13614868  0.25122895 -0.113373    0.14299601
   0.24328611 -0.0408099   0.0418307   0.10751529  0.29982157 -0.01129376
   0.04096005  0.193539

Reward: -657.4571628027264 weights: [[-0.10046137 -0.07020126 -0.06055329  0.29374601  0.22053725 -0.41010366
  -0.08982862  0.12575793  0.18063639  0.52792837 -0.12332962  0.0324762
  -0.08724913 -0.12933617 -0.43040441 -0.39184116 -0.31033869  0.1349297
   0.02938558 -0.08272063 -0.25846892  0.02389273 -0.0929728  -0.07374571
  -0.00117386 -0.21886674]
 [ 0.34083608  0.16809038  0.18727109  0.59495489  0.01223334  0.12420322
   0.41681685 -0.00117326  0.01741533 -0.33263791 -0.25620718 -0.00677265
  -0.12058495 -0.14047987  0.37187475 -0.18980963 -0.05281157  0.22773279
  -0.31694342  0.18209347 -0.32412866  0.21292988  0.12455176 -0.27707993
   0.22317608  0.21394703]
 [-0.10183484  0.38092274 -0.34735487  0.13196087 -0.02804283 -0.09238334
  -0.25132281  0.517638   -0.18504537 -0.09571293 -0.2430279  -0.01449735
   0.19791618  0.25686877  0.18418969  0.16420342 -0.1190756   0.23727558
   0.14533546 -0.00836412 -0.12644965 -0.12865386  0.16899161  0.06570653
   0.03631533  0.1685855

Reward: -540.5404548966765 weights: [[ 0.20184255 -0.08373641  0.0275048   0.21596491 -0.00905812  0.06278866
  -0.05593326 -0.01133934  0.06352697 -0.00560905 -0.18749218 -0.10031843
  -0.08063559  0.2756634   0.22487073  0.15539928  0.22598734 -0.07946589
   0.20188772 -0.10382317  0.02723051 -0.0509879   0.24402908  0.28027905
  -0.02152953 -0.21057153]
 [-0.14122703  0.14965064 -0.15185692 -0.04000123  0.00225191 -0.07434646
  -0.24681408  0.07649515  0.26372434  0.0773911   0.18583918 -0.01384432
   0.14089627  0.15307271 -0.13795797  0.16884065  0.02085802 -0.00830456
  -0.03095047  0.08334549  0.18878784 -0.02553026 -0.0929547   0.12349161
   0.03262449  0.16692726]
 [ 0.22833778 -0.05135345  0.14447898 -0.0054722  -0.0707232  -0.08617104
  -0.0115291  -0.32703156  0.05666205  0.41286485  0.13500402 -0.09395648
  -0.03413102 -0.16598166 -0.16397614  0.06636056  0.20750772  0.21487019
   0.01977462  0.04373639  0.13843746  0.08491351  0.08317194 -0.03531764
   0.06890374  0.10984

Reward: -781.625760853238 weights: [[ 0.2761828   0.02489734  0.11716381  0.26874416  0.11506744  0.06469866
   0.01476569  0.10768551 -0.00284351  0.04725442 -0.19362577 -0.03919463
  -0.10474111  0.30485594  0.24656999  0.03268552  0.07438349  0.08232171
   0.07794587 -0.02849718 -0.04899229 -0.00483338  0.14316122  0.27524768
   0.05397228 -0.20263343]
 [-0.13737763  0.02112424 -0.10334673  0.01240361 -0.04263161 -0.05368122
  -0.22889901  0.10725924  0.26361679  0.06922462  0.00697023 -0.27839431
   0.10396214  0.25804806 -0.22698951  0.07685709  0.06399951 -0.08135282
   0.04177051  0.00109468  0.17115818 -0.11419853  0.01941113  0.24063745
   0.14694701  0.17898286]
 [ 0.19509616 -0.14035185  0.19076371 -0.05930232 -0.10807199 -0.04960218
   0.16265432 -0.25168537 -0.02122319  0.2371389   0.04176055 -0.0792615
  -0.11424701 -0.23541116 -0.24754344 -0.01332573  0.16847118  0.07910864
   0.03993451  0.07075819  0.14162472  0.14970344  0.03462375 -0.07030251
  -0.08803407  0.0141087

Reward: -261.44242678228875 weights: [[ 1.29346967e-01  1.08209796e-01 -2.13806820e-03  2.59570643e-01
  -6.91534283e-02  2.14067068e-01 -1.74511044e-01  8.88044997e-02
  -2.49796036e-02  1.18779777e-01 -2.92468929e-01  1.33998384e-01
  -6.52952350e-02  2.87363417e-01  1.93178324e-01  1.59351494e-02
   1.45313143e-01  1.01008200e-01  5.11850834e-02 -1.33671163e-01
   1.22018900e-01 -1.31506706e-01 -4.73017518e-02  2.54469652e-01
   1.17065583e-01 -7.67965506e-02]
 [ 3.00036222e-02  2.33332619e-01  3.78860022e-03 -6.54288183e-02
   3.03186658e-02 -4.92696043e-02 -9.81304503e-02  2.10360309e-01
   2.64951289e-01  4.05493561e-02  6.86952690e-02 -9.22884462e-02
  -2.69738147e-02  1.42670134e-01 -6.78990474e-02  7.16341232e-03
   7.00800842e-02 -1.59235596e-01  4.86263803e-03  6.33452408e-02
   1.59526471e-01 -2.15978899e-01 -7.18911016e-03  1.15881743e-01
   2.39209063e-01  5.77068191e-02]
 [ 2.80033075e-01 -1.35471336e-01  9.76008953e-02  6.28884900e-02
  -1.85260593e-01 -1.26951365e-01  

Reward: 189.76746686564206 weights: [[ 0.09455152  0.06804157 -0.00894543  0.36333193  0.02149938  0.17228617
  -0.06656958  0.06682099 -0.17679614  0.42260798 -0.400754    0.08848146
  -0.14478479  0.44462482  0.2290305  -0.09607136 -0.07457791  0.19486942
   0.05326467 -0.00398003  0.04596039 -0.13615529 -0.12102616  0.30898459
   0.06777305 -0.18777684]
 [-0.15022908  0.27064275  0.05201285  0.01890905 -0.13202885 -0.027782
  -0.1814884   0.24401474  0.41160212  0.10437898  0.03158821 -0.14324046
  -0.16365528  0.29840967  0.02132703  0.04578349  0.11705556 -0.28025802
   0.02874918  0.02834159  0.22993253 -0.24783625  0.02925442  0.17454015
   0.3906351   0.08859317]
 [ 0.16531085 -0.12879393  0.13168136  0.00482149 -0.31154329 -0.04707936
   0.28303573 -0.24862591  0.10359837  0.15204526 -0.17673148 -0.16558504
  -0.16761124 -0.01216721 -0.23345051 -0.1688079   0.17306218 -0.00831299
   0.28079214  0.07904332  0.08451035  0.03275861  0.02442108  0.13524659
  -0.12169991 -0.2022340

Reward: -442.27358851544426 weights: [[ 0.10001367  0.02487316 -0.0057198   0.39913716 -0.11932083  0.23021137
  -0.0359414   0.06488595 -0.1623503   0.42815459 -0.16535085  0.04598905
  -0.20291368  0.39729816  0.2324419  -0.05125529 -0.11763323  0.32737636
   0.00183275 -0.031451    0.01182355 -0.04457823 -0.12749284  0.29039653
   0.07864534 -0.20569107]
 [-0.14329311  0.28431305  0.05826116 -0.06737629 -0.0252071  -0.06265848
  -0.15151993  0.17355125  0.48963245  0.22054235  0.05358624 -0.08719469
  -0.16194496  0.32809511  0.03523432 -0.12538931  0.14330851 -0.14058638
   0.03701295 -0.02362675  0.09634069 -0.26724473  0.05068826  0.1461249
   0.49913866  0.0021656 ]
 [ 0.26265864 -0.08124986  0.23575974  0.10117123 -0.11457677 -0.0321347
   0.2966254  -0.17022225  0.06855298  0.13841077 -0.08310697 -0.1237564
  -0.17261812 -0.17648178 -0.19152816 -0.02521705  0.15166134 -0.07371515
   0.20377254  0.12972753  0.03569104 -0.16507339 -0.00821269  0.06164994
  -0.21539894 -0.2459724

Reward: -474.8357399322362 weights: [[ 2.53352304e-01 -7.01236135e-02 -2.62930678e-03  3.23923685e-01
   1.66690701e-02  5.75033864e-02 -7.87235944e-02  4.59061502e-02
   9.22729597e-02  1.95149749e-02 -2.59234948e-01 -1.82694181e-01
  -9.61128398e-02  3.32106638e-01  1.99231922e-01  1.09243998e-01
   1.87957427e-01 -5.27814081e-02  2.17490188e-01 -1.42244260e-01
   6.93489782e-02 -4.05304832e-02  3.09769495e-01  4.08929142e-01
  -9.93604160e-02 -2.80505391e-01]
 [-1.80697518e-01  2.40408619e-01 -1.59475755e-01  5.28149991e-02
  -1.09407600e-01 -1.06614076e-01 -3.69879354e-01  3.92495451e-02
   3.66706219e-01  1.56683497e-01  2.12475899e-01 -2.60726078e-02
   1.21551478e-01  2.04990485e-01 -5.41515929e-02  1.84207607e-01
   5.40710486e-02  2.25836383e-02 -1.00125806e-01  8.93108446e-02
   2.22852218e-01 -1.56882124e-02 -8.10788783e-02  6.58061483e-02
   1.10156140e-01  2.38939951e-01]
 [ 2.85146463e-01 -5.35943298e-02  1.48609344e-01 -2.18475525e-02
  -1.35729727e-01 -9.55889163e-02 -4

Reward: -274.40833683334864 weights: [[ 0.25424998 -0.08175984 -0.01070887  0.3210874   0.06625973  0.11344411
  -0.09246386  0.00062799  0.05871686  0.0672042  -0.37436997 -0.16260571
  -0.19589392  0.47561946  0.2753567   0.10461354  0.07032156 -0.06968825
   0.21219053 -0.10836377 -0.03541992 -0.01715003  0.21541176  0.36614441
  -0.00808521 -0.2354253 ]
 [-0.23706292  0.13797244 -0.13683042  0.09590775 -0.13863571 -0.07126725
  -0.32662014  0.06837537  0.23225889  0.06546118  0.11446894 -0.07985646
   0.05737797  0.2758765  -0.10354028  0.20921838 -0.0062682  -0.09730108
  -0.05581903  0.03590465  0.26820887 -0.12525029 -0.01020683  0.22106596
   0.15025857  0.248596  ]
 [ 0.32388107 -0.1998182   0.1391462   0.05563263 -0.20585809 -0.05576642
   0.02237336 -0.46119034  0.0468278   0.55675265  0.1010075  -0.11668776
  -0.1246314  -0.2313689  -0.28469966  0.07857288  0.29087897  0.04358853
   0.0901277   0.03708904  0.2644838   0.10456763  0.08249756 -0.07258377
  -0.00286945  0.0101

Reward: -240.19507366565227 weights: [[ 0.2564291  -0.02125854 -0.00966033  0.26239557  0.09181022  0.11257063
  -0.0426867   0.00767884  0.04243652  0.06686644 -0.27139406 -0.14535404
  -0.18504014  0.45563682  0.37015862  0.06711063  0.08185858 -0.0442585
   0.15916343 -0.0097554   0.00617023 -0.01207511  0.1751064   0.30689904
  -0.08012923 -0.22256564]
 [-0.23076992  0.11425345 -0.11951999  0.05816168 -0.13452581 -0.0075143
  -0.39231816  0.08872263  0.23870925  0.09044968  0.19675443 -0.11008367
   0.02201221  0.26001691 -0.02856215  0.21026475 -0.03841241 -0.14369876
   0.06157346  0.06991049  0.275438   -0.1066638  -0.03770755  0.31651338
   0.04631812  0.0338461 ]
 [ 0.25230412 -0.1461479   0.21236344 -0.15619149 -0.21393333 -0.03856252
  -0.01363443 -0.40577761  0.1358836   0.49456425  0.01246276 -0.09584183
  -0.17318874 -0.15291381 -0.33540513  0.03490242  0.24284608  0.00795782
   0.0769429   0.02492061  0.20314028  0.09971345 -0.05981949 -0.0646529
   0.02718068  0.0290959

Reward: -986.3335608937141 weights: [[ 3.30931714e-01 -5.07714448e-02 -6.77506777e-02  2.24511961e-01
   1.57016178e-01 -9.54215196e-03  3.71477282e-02  2.31831486e-02
   5.93980775e-02  5.45864907e-02 -2.48062343e-01 -2.00096850e-01
  -1.85151259e-01  4.04259021e-01  5.22811949e-01 -3.50275130e-02
   1.25077232e-01 -1.10197319e-01  1.62309264e-01 -3.52469789e-02
   1.70217972e-02  6.65648768e-02  1.60320729e-01  3.53594100e-01
   6.88795589e-03 -2.30146569e-01]
 [-2.13626272e-01  9.39604462e-02 -1.47953893e-01  4.23680162e-02
  -1.52206006e-01 -7.59515723e-02 -4.29381334e-01  3.59187485e-02
   3.06108912e-01  9.34576838e-02  1.44470070e-01 -1.53458688e-01
   1.41149427e-01  2.82237308e-01 -1.27882515e-01  1.74579970e-01
  -6.45852940e-02 -9.83915905e-02  7.36565071e-02  4.23077156e-02
   3.20434072e-01 -2.58108649e-02 -3.09075357e-02  3.38637861e-01
   1.20648653e-01  3.11915552e-02]
 [ 2.16124273e-01 -1.86713481e-01  2.42725418e-01 -1.17386173e-01
  -2.03272621e-01  3.50835755e-02  7

Reward: -217.48001433635997 weights: [[ 0.29618484 -0.0619027  -0.05893062  0.23358153 -0.03645737  0.05837667
  -0.0219784  -0.01472755 -0.03092325 -0.01092914 -0.22663546 -0.26510148
  -0.2752106   0.43840607  0.46561042 -0.1187763   0.10497143 -0.16935062
   0.19000579 -0.02555792 -0.02050088 -0.00818074  0.20014809  0.29903334
   0.07052052 -0.26083038]
 [-0.07522343  0.24917357 -0.18458689 -0.06203085 -0.17047887 -0.18768245
  -0.41942678  0.0949183   0.23831321  0.1652493   0.22760643 -0.02120089
   0.09604554  0.32787493 -0.02970421  0.09201351 -0.18018882 -0.08476472
   0.06746945  0.10144838  0.28199588 -0.03801093 -0.06856491  0.30926293
   0.13749949  0.11155514]
 [ 0.21862999 -0.28705601  0.22560982  0.05266979 -0.26986299  0.03359958
   0.08917936 -0.41638906  0.29761809  0.48175153  0.02040239 -0.25525544
  -0.20894982  0.08051624 -0.32107813  0.00794527  0.38447192 -0.06946448
   0.00729006  0.0376468   0.05104085  0.21747626 -0.10414935 -0.06368001
   0.12557751  0.0121

Reward: -778.1058645520853 weights: [[ 0.01353486 -0.16348537  0.11652394 -0.01394381 -0.04888382  0.04449942
   0.05703348 -0.06405118  0.14192091  0.09351976  0.08539792 -0.00418626
  -0.20134423  0.1640814   0.11738977 -0.06146146  0.0479787   0.14652677
  -0.01917356  0.09241796 -0.00152021  0.09162958 -0.08710179 -0.09376419
  -0.03062977 -0.22783134]
 [-0.0168667  -0.08432434 -0.01608446 -0.07353969  0.06745768  0.07681229
   0.06163242  0.02390086 -0.20541832  0.37119553  0.07217905  0.0817662
  -0.07779137  0.2529565  -0.35289837  0.0852462  -0.11805017  0.17735487
   0.03846008 -0.00130161 -0.14705333  0.17726301  0.12729993  0.24493919
   0.05239481  0.21546953]
 [-0.08475119  0.06911588 -0.17316702  0.08499576 -0.08634082  0.11174927
   0.12077598  0.1370472   0.08647781  0.1056265   0.0368381   0.12290137
  -0.06014596 -0.07462116  0.18042606  0.16555282  0.03533787 -0.01060509
   0.01787315  0.06140045  0.28097899 -0.20271312 -0.02056293  0.0742715
   0.18683455  0.2136979

Reward: 502.43013008810647 weights: [[ 4.11835491e-02 -8.76150963e-02  1.37356477e-02  1.25525928e-01
  -9.88149407e-02  3.83507538e-02  4.43701190e-02 -4.56253798e-02
  -3.94621780e-02  1.71324588e-01  2.01907532e-04  1.31142106e-01
  -2.43066541e-01  2.37966887e-01  7.55764460e-02 -4.43892728e-02
   3.44142147e-02  1.57616524e-01 -1.18739334e-01  1.48052416e-01
  -4.52878910e-02  1.06982448e-01 -9.74528203e-02 -1.59474764e-01
   2.54922776e-03 -1.54628214e-01]
 [-9.34623233e-02 -6.83271760e-02 -2.83535987e-03 -1.18715282e-01
   8.23447916e-02  8.74924893e-02  2.91275498e-02  1.04528656e-02
  -2.82503816e-02  3.77491536e-01  1.24114207e-01  1.37522439e-01
  -9.62657332e-02  2.72401185e-01 -2.39146610e-01  8.45585861e-02
  -1.17585164e-01  1.13785192e-01  5.47464908e-03  2.88011829e-02
  -1.02872579e-01  7.28685506e-02  3.65647510e-02  1.76820599e-01
   1.45396711e-01  1.70160887e-01]
 [ 1.90802930e-03 -3.85473260e-03 -8.91666808e-02  1.19439433e-01
  -1.42672531e-02  1.81286578e-01  2

Reward: 266.56397989077726 weights: [[ 0.08562529 -0.06686917  0.02264645  0.12623112 -0.18117056 -0.01308064
   0.06360787  0.02941922 -0.28811176  0.2844399   0.0480571   0.11308544
  -0.31558205  0.39655262 -0.16268039  0.06806661  0.08524207  0.30105946
  -0.0946216   0.06673224 -0.01437279  0.07428882 -0.17810127 -0.17939825
   0.10583582 -0.16796709]
 [-0.15952067 -0.00514593  0.03708484 -0.1053607   0.05972286  0.00175571
   0.13886868 -0.12026548  0.05490045  0.3154008   0.0940209   0.0907251
  -0.06706621  0.22482505 -0.28003896 -0.01353036 -0.13060581  0.03389324
  -0.09060709  0.08764504 -0.24530263  0.00261189  0.00893485  0.23135965
   0.17096337  0.22281501]
 [ 0.09180874  0.09755806 -0.1578035   0.14866668 -0.02010067  0.14867663
   0.29745953  0.01072978  0.08066947 -0.00729414 -0.07023601  0.11857883
  -0.00631857 -0.14780023  0.26148675  0.21697632 -0.01137866 -0.17307851
  -0.15984215  0.05077579  0.28027947 -0.18729781 -0.04690538  0.07360084
  -0.1450425   0.280888

Reward: -830.1241746517999 weights: [[ 0.06166838 -0.09008263  0.037046    0.28004855 -0.15265071 -0.03894204
  -0.03710478  0.00065213 -0.1966072   0.27420839 -0.12343591  0.10417676
  -0.29378334  0.41730077 -0.19023228  0.07318635  0.20674449  0.27621086
  -0.09536253  0.05313934 -0.1346768  -0.0164617  -0.1876394  -0.11116277
   0.10124858 -0.03187143]
 [-0.08125734 -0.01383139  0.0636673  -0.07523546  0.00078151 -0.01455794
   0.19206662 -0.1149353   0.21223294  0.2692602   0.17061595  0.05000025
  -0.04226946  0.23945717 -0.178251    0.00510679 -0.12564617  0.04410937
  -0.21086336  0.0739788  -0.21853743  0.06002364 -0.05151927  0.00499071
   0.28776869  0.14423033]
 [ 0.19172255  0.15219282 -0.02179904  0.17186801  0.02202957  0.15853535
   0.34413285  0.0882392   0.10189308 -0.12255829 -0.10542122 -0.0673086
   0.05613018  0.03315724  0.25601425  0.22568674  0.08084707 -0.25963246
  -0.10016092  0.04263975  0.26147237 -0.14566455 -0.12339003  0.10752313
  -0.32206717  0.113410

Reward: -392.2526977040401 weights: [[ 0.12846759 -0.05728319  0.11259107  0.32927016 -0.06642315 -0.04005091
  -0.08956113  0.23846121 -0.18837964 -0.09340009 -0.04069337  0.17083955
  -0.22687113  0.14547432 -0.19594807  0.06384098  0.15751574  0.28939174
  -0.06574164 -0.09875811  0.05554416  0.0763715  -0.07212017  0.04734847
   0.17624114 -0.12737702]
 [-0.05471687 -0.06687729 -0.07301859 -0.06776595  0.11484925 -0.21918686
   0.1448196  -0.14937472  0.24109476  0.25602934 -0.11246341 -0.2283961
   0.20272163  0.17919941 -0.49652608 -0.12116319 -0.00084977  0.15842791
  -0.16221579 -0.10032599 -0.12661258 -0.14733474  0.05392967 -0.10212264
   0.35495416  0.56212517]
 [ 0.28675803 -0.03072309  0.01623762  0.45265269  0.12860275  0.11663162
   0.390608    0.02313918 -0.18999113 -0.10577346  0.23694524 -0.08839694
  -0.02546471 -0.24567626  0.13772007  0.1164433  -0.06849621 -0.00874737
  -0.16770652  0.17351587  0.08150506  0.0189114   0.13637114  0.18344766
  -0.24051141  0.095235

Reward: -938.1947517523232 weights: [[-2.07369750e-02  2.24022243e-02  4.41241539e-02  5.07913253e-02
   9.00910387e-02 -2.45600793e-01  8.07458571e-02  2.03897021e-01
  -3.41859198e-01  6.58374949e-03  4.37344552e-02  1.49479532e-01
  -1.74460406e-01 -1.05931089e-01 -5.00108926e-01  7.20608320e-03
  -9.64736957e-02  1.78978585e-01 -9.83044220e-02  2.04337899e-01
   1.60980674e-01 -5.23263021e-02 -9.07264936e-02 -8.85366517e-02
  -1.81684327e-02  1.95197746e-02]
 [-1.51305776e-04  1.68528596e-01 -8.65534851e-02  2.70432655e-01
   6.75193286e-02 -6.83906716e-02  2.83406591e-01 -6.76028247e-02
   2.15340590e-02  1.07828655e-01 -1.12918599e-01 -3.04978468e-02
  -1.08365079e-01 -3.71758179e-02  1.81669363e-01 -2.05219453e-01
  -1.44205619e-02 -7.58663958e-03 -4.76517411e-03 -1.75434648e-01
   1.03210003e-02 -1.97627997e-02  4.76163421e-02 -2.70758232e-01
   1.25912503e-02  2.43426770e-01]
 [-1.53577464e-01  1.72411270e-01 -1.60275445e-01  4.15327857e-01
   1.67546474e-01 -1.18607715e-01  2

Reward: -797.0329704579509 weights: [[ 0.13242773  0.16378682  0.14511319 -0.07047716  0.10419829 -0.24300089
   0.35180464  0.35425542 -0.33479813 -0.23688989  0.16794806  0.03979767
  -0.06574233 -0.1994696  -0.4972666   0.03409037 -0.12474246  0.23578426
  -0.22266252  0.18395725  0.1684482  -0.00926621  0.19899985  0.10139864
   0.07258934  0.04053834]
 [-0.00206496 -0.04830297 -0.34474048  0.1611664  -0.0126699  -0.17788332
   0.20366243 -0.15820923 -0.11007041  0.22170529 -0.21536028 -0.20232461
   0.15003995  0.12003095 -0.1589721  -0.32952812 -0.01650615  0.06287108
   0.1450854  -0.2987268   0.04280657  0.08150667  0.00536578 -0.26545029
   0.04260949  0.39804699]
 [-0.29245311  0.00420395 -0.13089252  0.42824566  0.20798849  0.04140443
   0.19263195  0.15148847 -0.3260857  -0.09976077  0.25718574 -0.13062215
  -0.07744129  0.10730853  0.33403019  0.01456602 -0.23980231 -0.05294023
  -0.03689356  0.04621975 -0.09162171  0.14109805  0.02896057 -0.00981231
  -0.44291349  0.25729

Reward: -938.9694216491229 weights: [[ 0.27892635  0.06204497  0.11504216 -0.16364897  0.09699204 -0.29075304
   0.51887291  0.31316143 -0.46942959 -0.23117522  0.39670675  0.0151428
  -0.0565972  -0.28291913 -0.27408875  0.04217386 -0.09526515  0.35778684
  -0.24962977  0.27987853  0.20489824  0.26491049  0.12215507 -0.04833276
   0.09590541  0.12783328]
 [-0.19147793 -0.02185876 -0.44076557  0.12168596  0.04810744 -0.21085759
   0.18422959 -0.30491221 -0.00716139  0.44397096 -0.02413773 -0.15993829
   0.01680315  0.18514709 -0.16493144 -0.41401108  0.029517    0.07446781
   0.30255162 -0.46880423 -0.01440614  0.1135219   0.02817841 -0.12684186
   0.00616733  0.22020848]
 [-0.22614164 -0.04464272  0.01679984  0.61390424  0.48720168  0.19400215
   0.18941766  0.21633471 -0.20171997  0.10611866  0.42840939 -0.13353192
  -0.13067952  0.03701767  0.27603659  0.10836081 -0.35259229 -0.13194598
  -0.11477975  0.03590939 -0.08151944 -0.02942296 -0.0626928   0.02704526
  -0.56209759  0.265033

Reward: 349.83883947443223 weights: [[ 0.08746556 -0.10504216  0.06695427  0.00843033 -0.19559454  0.01176069
  -0.01439542 -0.08940263 -0.09029569  0.3468475   0.01389335  0.32135531
  -0.28504662  0.48700518  0.02642977  0.06691562  0.18854792  0.30122143
  -0.13910733  0.14039231 -0.12831821  0.07024333 -0.20071746 -0.20115946
   0.1720498  -0.2063451 ]
 [-0.08331546 -0.19810191  0.05985335 -0.2083863   0.13951084  0.06293012
   0.19551241 -0.03945154 -0.14507254  0.5261526   0.23641095  0.05875464
  -0.10819668  0.21153358 -0.44756888  0.11916422 -0.08413002  0.10003306
   0.05687715  0.06743825 -0.23966737  0.17720267  0.0078219   0.3589885
   0.07266336  0.13662805]
 [ 0.04559174  0.15289469 -0.19001424 -0.00837664  0.03044656  0.15600391
   0.30345588  0.00242058  0.07549615  0.11532269 -0.08119693  0.1558849
  -0.13872678 -0.17737991  0.41361797  0.3057062  -0.02801396 -0.10679144
  -0.16670347  0.04892389  0.4208323  -0.16699377  0.03753904  0.10448993
   0.02963226  0.3541450

Reward: -745.6465378071844 weights: [[-0.048135   -0.16297606  0.08255599 -0.06688099 -0.25636309 -0.07292879
   0.08303285 -0.18863022 -0.18767459  0.30936139  0.08283387  0.37000237
  -0.31332678  0.56697494 -0.0598408   0.07475012  0.34835308  0.42165323
  -0.12168014  0.04743682 -0.12517902 -0.02802957 -0.40131542 -0.15527002
   0.21550576 -0.22995452]
 [-0.04324993 -0.30352588  0.16287324 -0.29650384  0.15593531  0.08907463
   0.28201849  0.05926491 -0.1359548   0.51736693  0.13234929  0.06075603
  -0.15416647  0.15846028 -0.46853458  0.1258965  -0.12295417  0.06895862
   0.04405565 -0.02003854 -0.38450727  0.26155951  0.03430537  0.49309882
  -0.03187932  0.22024706]
 [ 0.01105623  0.32147921 -0.22353828 -0.02706764  0.05580008  0.18339603
   0.30303002  0.04750025  0.12037568  0.16055424 -0.03288861  0.12499627
  -0.19315467 -0.1746909   0.35307736  0.41981191 -0.04525257 -0.14426309
  -0.0987098   0.05536735  0.51428077 -0.19781422  0.16299202  0.06862663
  -0.14409739  0.29496

In [17]:
0.0001.clip(min=1e-2)

AttributeError: 'float' object has no attribute 'clip'

In [18]:
a=np.ones([3,3])

In [19]:
a=a*0.00001

In [20]:
a

array([[1.e-05, 1.e-05, 1.e-05],
       [1.e-05, 1.e-05, 1.e-05],
       [1.e-05, 1.e-05, 1.e-05]])

In [21]:
a.clip(min=1e-2)

array([[0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01]])

In [22]:
a=0.00005
a.clip(1e-2)

AttributeError: 'float' object has no attribute 'clip'

In [40]:
np.exp(2)

7.38905609893065

In [41]:
np.random.seed(1)

In [42]:
np.random.rand(3,2)

array([[4.17022005e-01, 7.20324493e-01],
       [1.14374817e-04, 3.02332573e-01],
       [1.46755891e-01, 9.23385948e-02]])

In [43]:
np.random.seed(1)

In [44]:
np.random.rand(3,2)

array([[4.17022005e-01, 7.20324493e-01],
       [1.14374817e-04, 3.02332573e-01],
       [1.46755891e-01, 9.23385948e-02]])

In [45]:
np.random.rand(3,2)

array([[0.18626021, 0.34556073],
       [0.39676747, 0.53881673],
       [0.41919451, 0.6852195 ]])

In [54]:
a=np.random.rand(3,2)

In [55]:
a

array([[0.14038694, 0.19810149],
       [0.80074457, 0.96826158],
       [0.31342418, 0.69232262]])

In [65]:
a[:,0]

array([0.14038694, 0.80074457, 0.31342418])

In [61]:
a[:,0]*a[:,1]

array([0.02781086, 0.7753302 , 0.21699065])

In [60]:
a[0]

array([0.14038694, 0.19810149])

In [62]:
0.14*0.19

0.026600000000000002

In [63]:
0.8*0.96

0.768

In [64]:
a[0,0]*a[0,1]

0.02781086158378325

In [63]:
data = range(1,5)

In [64]:
data = np.arange(6).reshape((3,2))

In [68]:
np.average(data, axis=1, weights=[1./4, 3./4])

array([0.75, 2.75, 4.75])

In [56]:
datat=np.zeros([26,5])

In [59]:
datat=np.vstack((datat,datat))

In [60]:
datat.shape

(52, 5)

In [82]:
xc=np.average(data,axis=0, weights=[1./4, 3./4,1]).reshape([1,2])

In [83]:
xc

array([[2.75, 3.75]])

In [84]:
yc=np.ones(2)

In [89]:
(xc*yc).shape

(1, 2)

In [28]:
weights_1=data.reshape([-1,1,2])
avg_weighting=[1./4, 3./4,1]
weights_1=np.average(local_1,axis=0,weights=avg_weighting_1)

NameError: name 'local_1' is not defined

In [34]:
local_1.shape

(66, 26)

In [50]:
Weighting_1.shape

(3,)

In [36]:
Weighting_1[1:,0].shape

(10,)

In [37]:
local_2.shape

(66, 26)

In [43]:
local_1.shape

(66, 26)

In [51]:
Weighting_1.shape


(3,)

In [48]:
print(data[1:,2:])

[]


In [65]:
data

array([[0, 1],
       [2, 3],
       [4, 5]])

In [66]:
data[1:,0]

array([2, 4])

In [67]:
data = range(1,9)

In [70]:
data = np.arange(9).reshape((3,3))

In [71]:
data

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [75]:
data[1:,0]

array([3, 6])

In [79]:
data[2:,0:]

array([[6, 7, 8]])

In [94]:
local_1d.shape

(12, 26)

In [82]:
avg_weighting_1d.shape

(2,)

In [83]:
print(avg_weighting_1d)

[0.02404309 0.00625549]


In [95]:
ll=local_1d.reshape([-1,2])

In [92]:
ll.shape

(156, 2)

In [96]:
llc=ll.reshape([12,26])

In [97]:
llc-local_1d

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 

In [99]:
lca=np.average(ll,axis=1,weights=avg_weighting_1d)

In [101]:
lcc=lca.reshape([6,26])

In [102]:
lcc

array([[ 2.55014886e-02,  5.27524665e-02, -1.22126338e-02,
        -4.20579900e-02,  7.47128086e-02, -9.08528634e-02,
        -3.56674858e-02,  1.54575845e-02,  4.04202678e-02,
         6.71619344e-02, -7.04969260e-02,  8.82855049e-02,
         3.19914166e-02,  1.74170172e-02, -7.87802927e-02,
        -1.99787394e-02,  8.24363647e-02,  1.14867249e-01,
        -1.30114549e-02,  2.41219415e-02, -7.10001055e-02,
         4.69713211e-02, -3.65647186e-02, -4.74943112e-02,
        -1.93085023e-02,  2.14384784e-01],
       [ 7.11451574e-02, -3.35031853e-02, -3.98009871e-02,
         4.26996119e-02, -2.30294394e-02, -3.05632668e-02,
         4.08858645e-02,  1.30932974e-02,  6.57687636e-02,
        -5.87753944e-02, -1.09535251e-01,  1.02703183e-01,
         2.08371592e-02,  6.78584279e-02,  2.47913219e-02,
         7.83008237e-02, -1.19347302e-01,  1.31706247e-02,
        -6.92202138e-02, -6.42787257e-02,  7.12870821e-02,
        -8.49309494e-02,  4.57110008e-02,  3.61527130e-02,
         7.27

In [104]:
local_1dl.shape

(312, 1)

SyntaxError: invalid syntax (<ipython-input-116-3b66d6d3ee38>, line 1)

In [117]:
a=[2 3]

SyntaxError: invalid syntax (<ipython-input-117-0066a6e896ad>, line 1)

In [119]:
a=np.array([[ 2.55014886e-02,  5.27524665e-02, -1.22126338e-02,
        -4.20579900e-02,  7.47128086e-02, -9.08528634e-02,
        -3.56674858e-02,  1.54575845e-02,  4.04202678e-02,
         6.71619344e-02, -7.04969260e-02,  8.82855049e-02,
         3.19914166e-02,  1.74170172e-02, -7.87802927e-02,
        -1.99787394e-02,  8.24363647e-02,  1.14867249e-01,
        -1.30114549e-02,  2.41219415e-02, -7.10001055e-02,
         4.69713211e-02, -3.65647186e-02, -4.74943112e-02,
        -1.93085023e-02,  2.14384784e-01],
       [ 7.11451574e-02, -3.35031853e-02, -3.98009871e-02,
         4.26996119e-02, -2.30294394e-02, -3.05632668e-02,
         4.08858645e-02,  1.30932974e-02,  6.57687636e-02,
        -5.87753944e-02, -1.09535251e-01,  1.02703183e-01,
         2.08371592e-02,  6.78584279e-02,  2.47913219e-02,
         7.83008237e-02, -1.19347302e-01,  1.31706247e-02,
        -6.92202138e-02, -6.42787257e-02,  7.12870821e-02,
        -8.49309494e-02,  4.57110008e-02,  3.61527130e-02,
         7.27094257e-02,  1.36126615e-02],
       [-1.67064908e-02,  4.60798322e-03, -1.08021788e-01,
         5.10525920e-02,  5.98024257e-02,  4.65812890e-02,
        -2.31957784e-02, -7.26181243e-02,  7.75987559e-02,
         2.49851261e-02, -5.51630016e-02,  3.69427327e-02,
        -3.47171013e-02, -1.75497931e-01,  6.13204050e-02,
        -1.06465674e-01,  9.03237416e-02, -1.56500426e-02,
        -2.19197783e-02,  1.19210617e-01, -8.03112912e-02,
        -1.80876952e-01,  1.71906381e-02,  5.74004455e-02,
         6.03544928e-02, -2.42024357e-04],
       [-1.15336708e-02,  7.22067280e-02, -1.27696250e-01,
        -4.86754186e-02,  1.92449040e-02, -1.11157898e-01,
        -9.42621534e-02, -8.35078062e-02,  1.13033741e-03,
         9.43133729e-02, -1.35477544e-01,  1.21744288e-01,
         3.48631772e-02,  1.30753623e-01, -1.29817135e-01,
        -8.16921332e-02,  1.15891475e-01,  1.21968307e-01,
         3.74755854e-02, -2.10518175e-02, -1.73153008e-02,
         2.06137914e-02, -4.83071111e-02, -9.32801281e-02,
        -5.98198829e-02,  2.99047720e-01],
       [ 4.43080354e-02, -2.31298609e-02, -8.78596236e-02,
         5.52608871e-02,  9.37111169e-02, -1.50608106e-02,
         6.43760490e-02,  2.68152141e-02,  1.10271987e-01,
        -8.31334471e-02, -1.90081889e-01,  7.93174935e-02,
         7.16765316e-03,  3.09634365e-02,  7.59668772e-03,
         1.59329355e-01, -2.23587184e-01,  8.32822804e-03,
        -1.69097883e-01, -1.23737978e-01,  1.15587915e-01,
        -1.60882146e-01,  5.46630027e-02,  6.24456092e-02,
         2.62631751e-02, -6.57363069e-02],
       [ 1.42215209e-01,  5.94310179e-02, -4.46812769e-02,
         6.99266457e-02,  1.88186442e-02,  1.27155600e-01,
         3.94511991e-02, -1.33996340e-01, -5.47956309e-04,
        -1.42325333e-02, -7.79504702e-02,  9.49384356e-04,
        -6.44518917e-02, -2.29280889e-01,  9.94059857e-02,
         1.44098823e-02,  7.57111014e-02, -6.40160422e-02,
        -5.20439192e-02,  2.64385034e-01, -1.02737277e-01,
        -3.24274562e-01, -7.10724008e-02,  7.95062441e-02,
         7.56820708e-02, -7.23131335e-02]])

In [120]:
print(lcc)

[[ 2.55014886e-02  5.27524665e-02 -1.22126338e-02 -4.20579900e-02
   7.47128086e-02 -9.08528634e-02 -3.56674858e-02  1.54575845e-02
   4.04202678e-02  6.71619344e-02 -7.04969260e-02  8.82855049e-02
   3.19914166e-02  1.74170172e-02 -7.87802927e-02 -1.99787394e-02
   8.24363647e-02  1.14867249e-01 -1.30114549e-02  2.41219415e-02
  -7.10001055e-02  4.69713211e-02 -3.65647186e-02 -4.74943112e-02
  -1.93085023e-02  2.14384784e-01]
 [ 7.11451574e-02 -3.35031853e-02 -3.98009871e-02  4.26996119e-02
  -2.30294394e-02 -3.05632668e-02  4.08858645e-02  1.30932974e-02
   6.57687636e-02 -5.87753944e-02 -1.09535251e-01  1.02703183e-01
   2.08371592e-02  6.78584279e-02  2.47913219e-02  7.83008237e-02
  -1.19347302e-01  1.31706247e-02 -6.92202138e-02 -6.42787257e-02
   7.12870821e-02 -8.49309494e-02  4.57110008e-02  3.61527130e-02
   7.27094257e-02  1.36126615e-02]
 [-1.67064908e-02  4.60798322e-03 -1.08021788e-01  5.10525920e-02
   5.98024257e-02  4.65812890e-02 -2.31957784e-02 -7.26181243e-02
   7.7

In [121]:
for a in range(2):
    lcc

In [136]:
print(local_1d[6:12])

[[ 0.01768857 -0.01386893 -0.05787283  0.07828453  0.20483754 -0.14427083
  -0.12151473 -0.07342291  0.10875512  0.15619998 -0.06772169  0.10989248
   0.0144843  -0.01735802 -0.00028552  0.02220344  0.04848413  0.03229894
   0.05158776  0.01750372 -0.02325308  0.00984654 -0.08888783 -0.06611041
  -0.00905672  0.02239462]
 [-0.09536852 -0.08657353  0.1550659   0.19547535  0.08783022  0.16807753
   0.10019957 -0.02169939  0.0125058  -0.17486179 -0.07878785  0.0398632
  -0.04675927 -0.12859745  0.07840628  0.08428011  0.08638876  0.02688441
  -0.12713748 -0.01174247 -0.01112341 -0.02266229  0.08517881 -0.00877523
  -0.01255788 -0.15785911]
 [ 0.08793698  0.17941409 -0.11079629 -0.08478138  0.08193868 -0.057032
  -0.17232981  0.17496922 -0.11473483  0.02887386 -0.05967157  0.12207091
   0.09262072  0.03032436  0.01064008  0.02517481  0.00513053  0.13756356
   0.11159786 -0.07491046  0.07062988 -0.07376934  0.05304116  0.04734782
   0.03982161 -0.01775067]
 [-0.0205675   0.12456083  0.03383

In [137]:
co_op_1d=co_op(1,env1,local_1d[6:12],True)

Reward: -983.216652366809 weights: [[ 0.12053824 -0.0542102   0.04375696  0.07326041  0.18777045 -0.01084289
  -0.18048491 -0.12218386 -0.01913746  0.02438141 -0.03708293  0.0495879
   0.08974725 -0.11254845 -0.06280241  0.08798216  0.19245732  0.09228992
   0.14546658  0.14181475  0.07132268 -0.06393885 -0.06465451 -0.03388466
  -0.07412814  0.04251796]
 [-0.23928396 -0.16465801  0.16757368  0.04708333  0.00642335  0.16599752
  -0.00333622 -0.02694    -0.04733629 -0.20601033 -0.06674966  0.0606826
  -0.05250235 -0.12665089  0.0314125   0.12623149  0.18705242  0.03237166
  -0.11680029 -0.0937602   0.02884715 -0.08134832 -0.00290495  0.02629138
  -0.03239689 -0.09389832]
 [ 0.19550746  0.15704235 -0.10842469  0.00294803  0.17303028  0.00884041
  -0.08839941  0.12583264 -0.04700209  0.13608547  0.09812138  0.07116155
   0.12573096  0.062986   -0.01868793  0.00631055  0.05789459  0.11171928
  -0.05103008 -0.05553903  0.03706661 -0.11821281 -0.06453803  0.10012862
  -0.11870932 -0.13460926

In [138]:
print(co_op_1d.reward_evaluation)

-983.216652366809


In [139]:
co_op_2d=co_op(2,env2,local_1d[6:12],True)

Reward: -930.7091690964162 weights: [[ 0.17991415 -0.04768459  0.01547834  0.13406077  0.20962484 -0.01960424
  -0.22389735 -0.09206547  0.13435573  0.01358195 -0.17474389 -0.16797908
   0.14008142 -0.29638686 -0.04048417  0.16468398  0.22164819  0.24494682
   0.10255255  0.15129299 -0.01941616 -0.14912123 -0.13255396  0.08458331
   0.05914014 -0.00946631]
 [-0.12964009 -0.14980822  0.23762866  0.00060484  0.04955396  0.18330745
  -0.08865225 -0.09337893 -0.12096981 -0.21848841  0.05422561  0.16100039
   0.0545098  -0.07461298  0.07388923  0.01999074  0.20354373  0.12840941
  -0.03639626 -0.15372336  0.0425371  -0.06439084  0.02751194  0.01018968
   0.00806973 -0.1210308 ]
 [ 0.13015916  0.16393904 -0.12509521  0.12146984  0.25294469 -0.21987653
   0.03735035  0.13650199  0.03547914  0.2685952   0.27619694  0.14037812
   0.11046867  0.02369298 -0.03263511  0.0227125   0.04891924  0.29848538
   0.08784287 -0.02120935  0.17704153 -0.00186039 -0.07817111  0.17930527
   0.10568872 -0.10947

In [140]:
print(co_op_2d.reward_evaluation)

-930.7091690964162


In [12]:
import pandas as pd 
df1 = pd.DataFrame(local_1)
df1.to_csv("local_1.csv")
df1 = pd.DataFrame(local_2)
df1.to_csv("local_2.csv")
df1 = pd.DataFrame(local_1d)
df1.to_csv("local_1d.csv")

In [13]:
df1 = pd.DataFrame(local_2d)
df1.to_csv("local_2d.csv")

In [14]:
df1 = pd.DataFrame(weights_1)
df1.to_csv("weights_1.csv")
df1 = pd.DataFrame(weights_2)
df1.to_csv("weights_2.csv")

In [33]:
class co_op_video():
    def __init__(self,hp_seed,env,weights=None,update=False):
        #self.test=hp_seed
        self.hp=Hp()
        self.hp.seed=hp_seed
        #self.hp.seed=int(time.time())
        np.random.seed(self.hp.seed)
        #self.env=gym.make(self.hp.env_name)
        #self.work_dir=mkdir('exp','brs_video{}'.format(hp_seed))
        #self.monitor_dir=mkdir(self.work_dir,'monitor')
        #self.env=wrappers.Monitor(self.env,self.monitor_dir,force=True)
        self.nb_inputs=env.observation_space.shape[0]
        self.nb_outputs=env.action_space.shape[0]
        self.policy=Policy(self.nb_inputs,self.nb_outputs)
        if update:
            self.policy.theta=weights
        self.normalizer=Normalizer(self.nb_inputs)
    #return env,policy,normalizer
    #print(hp.nb_directions)
        self.reward_evaluation,self.weights,self.sigma_r=train(env,self.policy,self.normalizer,self.hp)
    #return reward_evaluation,weights

In [53]:
hp=Hp()
hp.seed=1
np.random.seed(hp.seed)
env1=gym.make(hp.env_name)
work_dir=mkdir('exp','brs_video1')
monitor_dir=mkdir(work_dir,'monitor')
env1=wrappers.Monitor(env1,monitor_dir,force=True)
co_op_video_1=co_op_video(1,env1,local_1[4*6:5*6],True)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
WalkerBase::__init__ start
Reward: -827.3789871987317 weights: [[ 0.24368639 -0.01850468  0.00979615  0.23369479  0.0249999  -0.19418553
  -0.23897502  0.10002709 -0.1439752  -0.18555511 -0.00359751  0.1582014
  -0.12697571  0.11300304  0.18920173 -0.0555842   0.39291617  0.09129656
   0.00535168 -0.04970305  0.22443138  0.12328946 -0.08653993 -0.0763396
   0.17706982 -0.08076734]
 [-0.03761038  0.08145479 -0.00936432 -0.01933868  0.17562054 -0.20388318
   0.01759076 -0.09244444  0.27004391  0.27095345  0.18341145 -0.02973915
   0.20427268  0.06769463 -0.27037765 -0.05125538 -0.08235912  0.0999997
  -0.1048566  -0.04692073  0.00102942 -0.07872045  0.05397767 -0.04177909
   0.08379773  0.20930829]
 [ 0.29026542 -0.06359639  0.02171806  0.2630452   0.11287797  0.12001089
   0

In [16]:
weights_1.shape

(6, 26)

In [52]:
hp=Hp()
hp.seed=2
np.random.seed(hp.seed)
env2=gym.make(hp.env_name)
work_dir=mkdir('exp','brs_video2')
monitor_dir=mkdir(work_dir,'monitor')
#env2=wrappers.Monitor(env2,monitor_dir,video_callable=lambda episode_id: episode_id%20==0,force=True)
env2=wrappers.Monitor(env2,monitor_dir,force=True)
co_op_video_2=co_op_video(2,env2,local_2[2*6:3*6],True)


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
WalkerBase::__init__ start
Reward: 443.9659915450178 weights: [[-2.82932345e-02  1.69716662e-02  6.88499297e-02  1.17723318e-01
  -2.46454653e-01  3.50798269e-02 -2.51633555e-02 -5.03678291e-02
  -4.47236646e-01  5.12845706e-01 -2.43701358e-02  2.31253237e-01
  -3.29466147e-01  6.97975476e-01 -1.45487374e-01  1.10519404e-01
   8.83872429e-02  3.28536541e-01 -1.49753820e-01  1.41796578e-01
  -7.60052164e-02  7.82811259e-02 -3.16098283e-01 -1.78193430e-01
   1.61571039e-01 -1.10669264e-01]
 [-1.55649785e-01 -2.50038766e-02  1.64955324e-01 -1.87675469e-01
   1.08083270e-01 -5.87141735e-02  3.18232150e-01 -9.24704697e-02
   1.49369323e-01  4.18876386e-01  8.29534689e-02  4.75429811e-02
  -2.13670348e-01  2.83677230e-01 -2.25839271e-01 -1.16826345e-02
  -4.36047044e-02  5.664331

In [34]:
local_2[1*6:2*6]

array([[ 2.32232678e-01, -4.46294611e-02,  7.33115307e-02,
        -2.55555435e-04, -6.49862924e-02, -2.00768774e-03,
         5.86184496e-02, -1.24463211e-01, -1.01761244e-01,
         9.28429803e-02,  1.19910844e-01,  7.09492252e-02,
        -1.77603974e-01,  1.62759596e-01, -1.50328326e-02,
        -2.59148196e-02, -9.63106968e-02,  2.72057354e-01,
        -1.29673081e-01,  8.79883911e-03, -1.32976706e-01,
         1.91774794e-01, -3.89397103e-01, -2.07033826e-01,
         1.38243647e-01, -1.73763771e-01],
       [-6.52636032e-02, -3.53026761e-02,  9.06823498e-02,
        -1.18949190e-01,  1.02420081e-01,  2.80554895e-02,
         9.30463395e-02, -5.00533145e-02, -1.91179104e-02,
         4.09154236e-01, -1.41339506e-01,  8.83827047e-02,
        -2.06168811e-01,  4.67498098e-01, -3.58700201e-01,
         1.08703315e-04, -8.98037023e-02,  4.94515436e-02,
         9.39033845e-02, -2.69435693e-02, -3.00612602e-01,
         5.99916121e-02,  1.04098182e-03,  2.58797522e-01,
         9.22

In [35]:
local_1.shape

(60, 26)

In [37]:
local_1[0:6]

array([[ 0.09857533, -0.04321631, -0.03579079,  0.06776905,  0.01338706,
        -0.13903269, -0.00242192,  0.05972336, -0.15422582,  0.01305957,
        -0.07190221, -0.01871827, -0.1421831 ,  0.16715691, -0.07673198,
         0.05802091,  0.07947249,  0.05865806, -0.05545493,  0.05348605,
         0.14300429,  0.06077916, -0.03779264, -0.01950044, -0.03466338,
        -0.00950199],
       [-0.0497492 ,  0.19209283, -0.07219576,  0.16867341, -0.04184084,
        -0.06447137,  0.07210458, -0.01714962,  0.0948479 ,  0.15197702,
         0.0789123 , -0.0202926 ,  0.02299222,  0.02421136,  0.08787295,
        -0.048247  , -0.07630673, -0.01609258, -0.00640992, -0.07216009,
         0.08545051,  0.00705288,  0.0250386 , -0.09183332,  0.02590828,
         0.21476614],
       [ 0.01999133, -0.05347642, -0.02389055,  0.22999822,  0.01882118,
        -0.02623535,  0.02059167,  0.03866346,  0.14577752,  0.0438011 ,
         0.22278041, -0.05491971,  0.01836825,  0.18949354,  0.06089882,
       

In [42]:
local_1[6:12]

array([[ 0.12907875, -0.125525  ,  0.01044309,  0.19039627, -0.04329899,
        -0.04476498, -0.05654388,  0.02142521, -0.22490023,  0.0619913 ,
        -0.09663566, -0.0830068 , -0.21288082,  0.06616003, -0.16549157,
         0.00748878,  0.08611463,  0.10818784,  0.05138643,  0.08946653,
         0.08456192, -0.00770237, -0.04667985, -0.02740411,  0.01382517,
        -0.0522625 ],
       [-0.06028353,  0.1765072 , -0.06543631,  0.01664835, -0.0662191 ,
        -0.08737367,  0.11751286, -0.00829731,  0.0934754 ,  0.07858246,
        -0.06221906,  0.01632981, -0.0013863 ,  0.03802935, -0.03041743,
        -0.07060298, -0.00204251,  0.03198242, -0.06348108, -0.08212587,
         0.02962125, -0.08194855, -0.00144078, -0.08748373,  0.23828416,
         0.38826749],
       [ 0.13427286, -0.05067705, -0.10046695,  0.42125365,  0.03619262,
         0.01841114,  0.20046283,  0.00246562,  0.12389836,  0.08870726,
         0.20946089, -0.07534692,  0.00430329,  0.20489728,  0.10712997,
       

In [49]:
print(env2.observation_space)

Box(26,)
