# Capture the Flag (RL - Policy Gradient)

- Seung Hyun Kim
- skim449@illinois.edu

## Notes
- This notebook includes:
    - Building the structure of policy driven network.
    - Training with/without render
    - Saver that save model and weights to ./model directory
    - Writer that will record some necessary datas to ./logs
- This notebook does not include running the CtF game with the RL policy. Using the network will be separately scripted in policy/policy_RL1.py.
    - cap_test.py is changed appropriately.
    
## References :
- https://github.com/awjuliani/DeepRL-Agents/blob/master/Vanilla-Policy.ipynb (source)
- https://www.youtube.com/watch?v=PDbXPBwOavc

In [1]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
%matplotlib inline

import time
import gym
import gym_cap
import numpy as np

# the modules that you can use to generate the policy.
import policy.patrol 
import policy.random
import policy.simple # custon written policy
import policy.policy_RL

  return f(*args, **kwds)


In [2]:
gamma = 0.99

def discount_rewards(r):
    """ take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, r.size)):
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r

## Environment Setting

In [3]:
env = gym.make("cap-v0") # initialize the environment

policy_red = policy.random.PolicyGen(env.get_map, env.get_team_red)

[33mWARN: Environment '<class 'gym_cap.envs.cap_env.CapEnvGenerate'>' has deprecated methods. Compatibility code invoked.[0m


## Network Setting

In [6]:
class agent():
    def __init__(self, lr, in_size,action_size):
        #These lines established the feed-forward part of the network. The agent takes a state and produces an action.
        self.state_input = tf.placeholder(shape=in_size,dtype=tf.float32, name='state')
        conv1 = slim.conv2d(self.state_input, 128, [3,3], scope='conv1')
        conv2 = slim.conv2d(conv1, 128, [2,2], scope='conv2')
        flat  = tf.reshape(conv2, [-1, 7*7*128])#slim.flatten(conv2)
        
        dense = slim.fully_connected(flat, action_size,
                                           biases_initializer=None)
        self.output = tf.nn.softmax(dense, name='action')
        #self.chosen_action = tf.argmax(self.output,1, name='action')

        #The next six lines establish the training proceedure. We feed the reward and chosen action into the network
        #to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[None],dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[None],dtype=tf.int32)
        
        self.indexes = tf.range(0, tf.shape(self.output)[0]) * tf.shape(self.output)[1] + self.action_holder
        self.responsible_outputs = tf.gather(tf.reshape(self.output, [-1]), self.indexes)

        self.loss = -tf.reduce_mean(tf.log(self.responsible_outputs)*self.reward_holder)
        
        tvars = tf.trainable_variables()
        self.gradient_holders = []
        for idx,var in enumerate(tvars):
            placeholder = tf.placeholder(tf.float32,name=str(idx)+'_holder')
            self.gradient_holders.append(placeholder)
        
        self.gradients = tf.gradients(self.loss,tvars)
    

        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        self.update_batch = optimizer.apply_gradients(zip(self.gradient_holders,tvars))

In [7]:
tf.reset_default_graph() # Clear the Tensorflow graph.
myAgent = agent(lr=1e-4,in_size=[None,7,7,8],action_size=5) #Load the agent.
global_step = tf.Variable(0, trainable=False, name='global_step') # global step
increment_global_step_op = tf.assign(global_step, global_step+1)
merged = tf.summary.merge_all()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


## Hyperparameters

In [9]:
total_episodes = 10000 #Set total number of episodes to train agent on.
max_ep = 999
update_frequency = 5

## Session

In [10]:
# Launch the session
sess = tf.Session()

# Setup Save and Restore Network
saver = tf.train.Saver(tf.global_variables())

writer = tf.summary.FileWriter('./logs', sess.graph)

ckpt = tf.train.get_checkpoint_state('./model')
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    saver.restore(sess, ckpt.model_checkpoint_path)
    print("Load Model : ", ckpt.model_checkpoint_path)
else:
    sess.run(tf.global_variables_initializer())
    print("Initialized Variables")

Initialized Variables


In [11]:
'''
Try to parametrize the size of vision
'''
def one_hot_encoder(state, agents):
    ret = np.zeros((len(agents),7,7,8))
    reorder = {0:0, 1:1, 2:2, 4:3, 6:4, 7:5, 8:6, 9:7} # CHANGE
    
    # Expand the observation with 3-thickness wall
    # - in order to avoid dealing with the boundary
    sx, sy = state.shape
    _state = np.ones((sx+8, sy+8)) * 8 # 8 for obstacle
    _state[4:4+sx, 4:4+sy] = state
    state = _state

    for idx,agent in enumerate(agents):
        # Initialize Variables
        x, y = agent.get_loc()
        x += 4
        y += 4
        vision = state[x-3:x+4, y-3:y+4] # limited view for the agent (5x5)
        for i in range(len(vision)):
            for j in range(len(vision[0])):
                if vision[i][j] != -1:
                    height = reorder[vision[i][j]]
                    ret[idx][i][j][height] = 1
    return ret

## Training

In [None]:
i = 0
total_reward = []
total_lenght = []

action_space = 5
n_agent = len(env.get_team_blue)

gradBuffer = sess.run(tf.trainable_variables())
for ix,grad in enumerate(gradBuffer):
    gradBuffer[ix] = grad * 0
    
while i < total_episodes:
    s = env.reset(map_size=20, render_mode='env', policy_red=policy_red)
    running_reward = 0
    ep_history = []
    for j in range(max_ep):
        observation = one_hot_encoder(s, env.get_team_blue).tolist()
        a = sess.run(myAgent.output, feed_dict={myAgent.state_input:observation})
        #Probabilistically pick an action given our network outputs.
        a = [np.random.choice(action_space, p=a[x]/sum(a[x])) for x in range(n_agent)] # divide by sum : normalize
        s1,r,d,_ = env.step(a) #Get our reward for taking an action given a bandit.
        
        # Rendering
        #env.render(mode="fast")
        #time.sleep(0.05)
        
        for obs, act in zip(observation, a):
            ep_history.append([obs,act,r,s1])
        s = s1
        running_reward += r
        if d == True:
            #Update the network.
            ep_history = np.array(ep_history)
            ep_history[:,2] = discount_rewards(ep_history[:,2])
            feed_dict={myAgent.reward_holder:ep_history[:,2],
                       myAgent.action_holder:ep_history[:,1],
                       myAgent.state_input:np.stack(ep_history[:,0])}
            grads = sess.run(myAgent.gradients, feed_dict=feed_dict)
            for idx,grad in enumerate(grads):
                gradBuffer[idx] += grad

            if i % update_frequency == 0 and i != 0:
                feed_dict= dictionary = dict(zip(myAgent.gradient_holders, gradBuffer))
                _ = sess.run(myAgent.update_batch, feed_dict=feed_dict)
                for ix,grad in enumerate(gradBuffer):
                    gradBuffer[ix] = grad * 0

            total_reward.append(running_reward)
            total_lenght.append(j)
            break


        #Update our running tally of scores.
    if i % 100 == 0:
        print(np.mean(total_reward[-100:]))
        saver.save(sess, './model/ctf_policy.ckpt', global_step=global_step)
        print("save: ", sess.run(global_step))
    i += 1
    sess.run(increment_global_step_op)

[[0.19623396 0.19623396 0.20018162 0.19623396 0.21111652]
 [0.19664705 0.19664705 0.19664705 0.19664705 0.21341184]
 [0.19744657 0.19744657 0.19744657 0.19744657 0.2102137 ]
 [0.19479829 0.19479828 0.20037274 0.19479828 0.21523239]]
[1, 4, 0, 0]
[[0.1953778  0.20181936 0.1953778  0.1953778  0.21204728]
 [0.19576396 0.1987656  0.1984522  0.19576396 0.2112543 ]
 [0.19717802 0.19717802 0.19717802 0.19717802 0.21128795]
 [0.19588462 0.19588462 0.19970529 0.19588462 0.21264088]]
[1, 1, 4, 2]
[[0.19560514 0.19827056 0.19560514 0.19560514 0.21491396]
 [0.1957317  0.1957317  0.19587292 0.1957317  0.21693206]
 [0.19579667 0.19757223 0.19837138 0.19579667 0.21246307]
 [0.19324872 0.19324873 0.20590878 0.19324873 0.21434505]]
[4, 1, 3, 1]
[[0.19698295 0.19698295 0.19698295 0.19698295 0.21206816]
 [0.19785598 0.19785598 0.19785598 0.19785598 0.20857604]
 [0.19624224 0.19624224 0.19691882 0.19624224 0.21435438]
 [0.19413044 0.19413044 0.20697042 0.19413044 0.21063833]]
[0, 2, 3, 4]
[[0.19742912 0.1

[[0.19589029 0.1964034  0.19775417 0.19589029 0.2140619 ]
 [0.20071642 0.20203847 0.1990817  0.1990817  0.1990817 ]
 [0.19616821 0.19106215 0.19573642 0.20816451 0.20886879]
 [0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]]
[3, 2, 4, 4]
[[0.19703594 0.19703594 0.19812796 0.19703594 0.21076424]
 [0.19470035 0.19590898 0.19507143 0.19470035 0.21961893]
 [0.19616821 0.19106215 0.19573642 0.20816451 0.20886879]
 [0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]]
[3, 4, 4, 1]
[[0.19539051 0.19793534 0.19843993 0.19539051 0.21284366]
 [0.20071642 0.20203847 0.1990817  0.1990817  0.1990817 ]
 [0.19709325 0.1912388  0.19521467 0.20913604 0.20731731]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[2, 1, 1, 4]
[[0.19663769 0.19663769 0.19968672 0.19663769 0.2104002 ]
 [0.1995031  0.1995031  0.1995031  0.20198768 0.1995031 ]
 [0.20252459 0.19770254 0.19852929 0.20320898 0.19803467]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[3, 0, 1, 1]
[[0.19667955 0.2

[[0.19512896 0.19847524 0.20552598 0.20064665 0.20022316]
 [0.19648671 0.19648671 0.19745032 0.19648671 0.21308947]
 [0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]
 [0.1954366  0.1954366  0.20161632 0.1954366  0.21207385]]
[0, 2, 1, 2]
[[0.19512896 0.19847524 0.20552598 0.20064665 0.20022316]
 [0.19648671 0.19648671 0.19745032 0.19648671 0.21308947]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]
 [0.19413044 0.19413044 0.20697042 0.19413044 0.21063833]]
[0, 2, 3, 0]
[[0.19512896 0.19847524 0.20552598 0.20064665 0.20022316]
 [0.19605795 0.19824632 0.1984119  0.19605795 0.21122588]
 [0.19854873 0.19854873 0.20004643 0.19981512 0.203041  ]
 [0.19413044 0.19413044 0.20697042 0.19413044 0.21063833]]
[1, 1, 1, 3]
[[0.19474788 0.20481284 0.20123751 0.19620092 0.20300084]
 [0.19576396 0.1987656  0.1984522  0.19576396 0.2112543 ]
 [0.19652338 0.19652338 0.19652338 0.19652338 0.21390648]
 [0.19324872 0.19324873 0.20590878 0.19324873 0.21434505]]
[0, 2, 3, 3]
[[0.19474788 0.2

[[0.19163498 0.19975217 0.20364155 0.19163498 0.21333638]
 [0.19474788 0.20481284 0.20123751 0.19620092 0.20300084]
 [0.19350018 0.20323671 0.20599318 0.1916754  0.20559461]
 [0.19216985 0.19216985 0.20775948 0.19216985 0.21573101]]
[0, 2, 4, 1]
[[0.19182447 0.19578363 0.20498466 0.19182447 0.2155828 ]
 [0.1963693  0.20151405 0.19925202 0.19380556 0.20905909]
 [0.19528729 0.20537417 0.20194758 0.19047396 0.20691705]
 [0.19324872 0.19324873 0.20590878 0.19324873 0.21434505]]
[4, 1, 3, 3]
[[0.19380161 0.19710003 0.19968314 0.19380161 0.21561362]
 [0.19704773 0.19167037 0.2002899  0.1914959  0.2194961 ]
 [0.19249798 0.20021848 0.20160763 0.19249798 0.21317792]
 [0.19216985 0.19216985 0.20775948 0.19216985 0.21573101]]
[1, 0, 4, 4]
[[0.19374397 0.19374397 0.19857374 0.19374397 0.22019443]
 [0.19704773 0.19167037 0.2002899  0.1914959  0.2194961 ]
 [0.18944806 0.20923522 0.20142764 0.18944806 0.210441  ]
 [0.19537258 0.19537258 0.20133096 0.19537258 0.21255127]]
[1, 3, 4, 2]
[[0.19615075 0.1

[[0.19597758 0.19597758 0.19597758 0.19597758 0.21608967]
 [0.19512896 0.19847524 0.20552598 0.20064665 0.20022316]
 [0.19819841 0.19819841 0.19819841 0.19819841 0.20720641]
 [0.19036818 0.19430007 0.21071246 0.19417116 0.21044812]]
[3, 0, 4, 2]
[[0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]
 [0.19512896 0.19847524 0.20552598 0.20064665 0.20022316]
 [0.19760075 0.19760075 0.19760075 0.19760075 0.20959704]
 [0.19036818 0.19430007 0.21071246 0.19417116 0.21044812]]
[0, 4, 3, 0]
[[0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]
 [0.2000387  0.1958658  0.20868313 0.19954656 0.1958658 ]
 [0.19698493 0.19698493 0.1977882  0.19698493 0.21125704]
 [0.19036818 0.19430007 0.21071246 0.19417116 0.21044812]]
[1, 1, 4, 0]
[[0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]
 [0.2017717  0.19387369 0.20428284 0.19618192 0.20388989]
 [0.19656582 0.19851616 0.19656582 0.19656582 0.21178636]
 [0.19036818 0.19430007 0.21071246 0.19417116 0.21044812]]
[1, 2, 2, 0]
[[0.19576502 0.1

[[0.19578102 0.19779496 0.19620644 0.19578102 0.21443662]
 [0.19667442 0.19667442 0.2091064  0.19667442 0.20087026]
 [0.19834197 0.19834197 0.19834197 0.19834197 0.20663208]
 [0.19361906 0.20127699 0.20376793 0.19361906 0.20771696]]
[1, 1, 3, 4]
[[0.1981154  0.1981154  0.1981154  0.1981154  0.20753841]
 [0.1940238  0.19849245 0.19911757 0.1940238  0.21434236]
 [0.19825709 0.19825709 0.19825709 0.19825709 0.20697165]
 [0.19616145 0.19662565 0.20187064 0.19616145 0.20918085]]
[2, 2, 1, 3]
[[0.19666994 0.19666994 0.20197101 0.19666994 0.20801918]
 [0.19344519 0.2007202  0.20959562 0.19344519 0.20279373]
 [0.19736397 0.19736397 0.19736397 0.20053092 0.20737712]
 [0.19616856 0.1974851  0.20174192 0.19616856 0.20843582]]
[4, 1, 0, 2]
[[0.19841558 0.19841558 0.19841558 0.19841558 0.20633768]
 [0.19066581 0.20582357 0.20481683 0.19066581 0.20802799]
 [0.19915035 0.19915035 0.19915035 0.19915035 0.20339862]
 [0.19610108 0.19610108 0.20861548 0.19610108 0.2030812 ]]
[3, 1, 3, 3]
[[0.19673494 0.1

[[0.19745043 0.19745043 0.19745043 0.19745043 0.21019831]
 [0.19223042 0.198381   0.2043523  0.19223042 0.21280582]
 [0.19825709 0.19825709 0.19825709 0.19825709 0.20697165]
 [0.19508332 0.19508332 0.19508332 0.19508332 0.21966667]]
[3, 3, 1, 4]
[[0.19648616 0.19745792 0.19675113 0.19648616 0.21281865]
 [0.19223042 0.198381   0.2043523  0.19223042 0.21280582]
 [0.19834197 0.19834197 0.19834197 0.19834197 0.20663208]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[3, 4, 0, 4]
[[0.19536152 0.1984205  0.19867776 0.19536152 0.21217863]
 [0.19412488 0.20211583 0.20124221 0.19412488 0.20839225]
 [0.19854873 0.19854873 0.20004643 0.19981512 0.203041  ]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[0, 4, 0, 4]
[[0.19536152 0.1984205  0.19867776 0.19536152 0.21217863]
 [0.19248565 0.19519056 0.2091766  0.19248565 0.21066152]
 [0.19854873 0.19854873 0.20004643 0.19981512 0.203041  ]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[1, 2, 2, 2]
[[0.19713467 0.1

[[0.19916534 0.19916534 0.19916534 0.19916534 0.20333862]
 [0.19386242 0.20100784 0.20018515 0.19386242 0.21108218]
 [0.189497   0.20463559 0.19934572 0.189497   0.21702468]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]]
[0, 1, 2, 3]
[[0.19916534 0.19916534 0.19916534 0.19916534 0.20333862]
 [0.19404529 0.20440976 0.19781087 0.19404529 0.20968886]
 [0.19470742 0.20385323 0.20210919 0.19089442 0.20843565]
 [0.19836764 0.19836764 0.19836764 0.19836764 0.20652953]]
[3, 2, 3, 3]
[[0.19748025 0.20068899 0.19748025 0.19748025 0.20687024]
 [0.19143662 0.20793816 0.20504259 0.19143662 0.20414604]
 [0.19212426 0.19737335 0.20225011 0.19212426 0.21612799]
 [0.19826432 0.19826432 0.19826432 0.19826432 0.20694275]]
[0, 2, 2, 0]
[[0.19748025 0.20068899 0.19748025 0.19748025 0.20687024]
 [0.18906379 0.20602824 0.20366552 0.18906379 0.21217866]
 [0.19307555 0.20201908 0.20291086 0.19340661 0.20858788]
 [0.19826432 0.19826432 0.19826432 0.19826432 0.20694275]]
[1, 4, 4, 3]
[[0.19916534 0.1

[[0.19501854 0.19570671 0.19607715 0.19501854 0.21817906]
 [0.19826432 0.19826432 0.19826432 0.19826432 0.20694275]
 [0.19296353 0.19682571 0.20601125 0.19296353 0.211236  ]
 [0.19703595 0.19703594 0.19812797 0.19703594 0.21076426]]
[0, 3, 2, 0]
[[0.19501854 0.19570671 0.19607715 0.19501854 0.21817906]
 [0.19539616 0.19539616 0.19853479 0.19539616 0.2152767 ]
 [0.19592685 0.199434   0.2112317  0.19566575 0.19774175]
 [0.19703595 0.19703594 0.19812797 0.19703594 0.21076426]]
[4, 2, 2, 4]
[[0.19483314 0.19671953 0.19531752 0.19483314 0.21829666]
 [0.19303398 0.19697338 0.20090552 0.19303398 0.21605311]
 [0.19694112 0.19694112 0.21037343 0.19694112 0.19880319]
 [0.19703595 0.19703594 0.19812797 0.19703594 0.21076426]]
[4, 4, 2, 0]
[[0.19625677 0.20049854 0.19648045 0.19625677 0.21050756]
 [0.19539616 0.19539616 0.19853479 0.19539616 0.2152767 ]
 [0.1970064  0.19820124 0.20905063 0.19684774 0.19889402]
 [0.19648671 0.19648671 0.19745032 0.19648671 0.21308947]]
[4, 1, 0, 2]
[[0.19809867 0.2

[[0.19378959 0.19378959 0.2044564  0.19378959 0.2141748 ]
 [0.19648671 0.19648671 0.19745032 0.19648671 0.21308947]
 [0.19616821 0.19106215 0.19573642 0.20816451 0.20886879]
 [0.19760075 0.19760075 0.19760075 0.19760075 0.20959704]]
[0, 4, 0, 3]
[[0.19378959 0.19378959 0.2044564  0.19378959 0.2141748 ]
 [0.19655794 0.19888288 0.19655794 0.19655794 0.2114433 ]
 [0.19568935 0.19103043 0.19771707 0.20779167 0.20777144]
 [0.19698493 0.19698493 0.1977882  0.19698493 0.21125704]]
[4, 0, 1, 1]
[[0.19860579 0.19889997 0.19860579 0.19860579 0.20528266]
 [0.19667943 0.19667943 0.19816196 0.19667943 0.21179973]
 [0.20312068 0.19959418 0.19361216 0.20522235 0.19845068]
 [0.19760075 0.19760075 0.19760075 0.19760075 0.20959704]]
[3, 3, 2, 4]
[[0.1983616  0.2029405  0.1983616  0.1983616  0.20197468]
 [0.20054111 0.20135145 0.19666858 0.19666858 0.20477033]
 [0.20637271 0.19751881 0.19175793 0.204977   0.19937359]
 [0.19842122 0.19842122 0.19842122 0.19842122 0.20631509]]
[3, 1, 2, 4]
[[0.1970368  0.2

[[0.19270861 0.19270861 0.19949214 0.19270861 0.22238208]
 [0.19947714 0.19947714 0.20209138 0.19947714 0.19947714]
 [0.19784264 0.19784264 0.19784264 0.19803117 0.20844086]
 [0.19833118 0.197505   0.197505   0.197505   0.20915379]]
[2, 4, 2, 1]
[[0.19522949 0.19522949 0.20113328 0.19522949 0.21317828]
 [0.19947714 0.19947714 0.20209138 0.19947714 0.19947714]
 [0.19646105 0.19646105 0.20247117 0.19646105 0.20814572]
 [0.20077349 0.19671129 0.19671129 0.19671129 0.20909274]]
[2, 3, 2, 0]
[[0.19522949 0.19522949 0.20113328 0.19522949 0.21317828]
 [0.19943157 0.19943157 0.20227373 0.19943157 0.19943157]
 [0.19381648 0.19358905 0.20721936 0.19545133 0.20992371]
 [0.20077349 0.19671129 0.19671129 0.19671129 0.20909274]]
[3, 0, 2, 3]
[[0.19605388 0.19605388 0.19605388 0.19605388 0.21578453]
 [0.19943157 0.19943157 0.20227373 0.19943157 0.19943157]
 [0.19381648 0.19358905 0.20721936 0.19545133 0.20992371]
 [0.19833118 0.197505   0.197505   0.197505   0.20915379]]
[0, 3, 1, 4]
[[0.19605388 0.1

[[0.2        0.2        0.2        0.2        0.2       ]
 [0.20510831 0.19537258 0.19457659 0.19457659 0.21036597]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]
 [0.19890095 0.19569501 0.19569501 0.20913045 0.20057853]]
[4, 3, 2, 1]
[[0.2        0.2        0.2        0.2        0.2       ]
 [0.20321181 0.19432372 0.19432372 0.19432372 0.21381703]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]
 [0.2020628  0.19869392 0.20040743 0.19666672 0.20216915]]
[0, 2, 1, 3]
[[0.2        0.2        0.2        0.2        0.2       ]
 [0.20982166 0.19252174 0.19252174 0.1997875  0.20534743]
 [0.19576502 0.19576502 0.19576502 0.19576502 0.21693993]
 [0.19890095 0.19569501 0.19569501 0.20913045 0.20057853]]
[0, 3, 2, 2]
[[0.2        0.2        0.2        0.2        0.2       ]
 [0.19640785 0.19500805 0.19439067 0.20161901 0.21257436]
 [0.19508332 0.19508332 0.19508332 0.19508332 0.21966667]
 [0.19890095 0.19569501 0.19569501 0.20913045 0.20057853]]
[4, 2, 3, 1]
