# Complex Imitation Learning in MineRL
This tutorial contains a simple example of how to build a imitation-learning based agent that can solve the MineRLNavigateDense-v0 environment. For more information about that environment, see this [MineRL Docs](http://minerl.io/docs/environments/index.html#minerlnavigatedense-v0).

For more Imitation Learning algorithms, like a Dagger in Tensorflow, see that Github repo, [Dagger](https://github.com/zsdonghao/Imitation-Learning-Dagger-Torcs).

Parts of this tutorial are based on code by [Arthur Juliani](https://medium.com/@awjuliani/super-simple-reinforcement-learning-tutorial-part-2-ded33892c724).

In [1]:
from __future__ import division

import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
%matplotlib inline
import matplotlib.pyplot as plt
import math

try:
    xrange = xrange
except:
    xrange = range
    
#env_name = 'MineRLTreechop-v0'
env_name = 'MineRLObtainIronPickaxe-v0'
data_path = '/media/kimbring2/6224AA7924AA5039/minerl_data'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#Used to initialize weights for policy and value output layers
def normalized_columns_initializer(std=1.0):
    def _initializer(shape, dtype=None, partition_info=None):
        out = np.random.randn(*shape).astype(np.float32)
        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
        return tf.constant(out)
    return _initializer

In [6]:
H = 1024
H_tree = 1152

tf.reset_default_graph()

state_pov = tf.placeholder(shape=[None,64,64,3], dtype=tf.float32)

state_item = tf.placeholder(shape=[None,64,64,18], dtype=tf.float32)

conv1 = slim.conv2d(inputs=state_pov, num_outputs=32, kernel_size=[8,8], stride=[4,4], padding='VALID', 
                    biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
conv2 = slim.conv2d(inputs=conv1, num_outputs=64, kernel_size=[4,4], stride=[2,2], padding='VALID', 
                    biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
conv3 = slim.conv2d(inputs=conv2, num_outputs=64, kernel_size=[3,3], stride=[1,1], padding='VALID', 
                    biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
conv4 = slim.conv2d(inputs=conv3, num_outputs=128, kernel_size=[2,2], stride=[1,1], padding='VALID', 
                    biases_initializer=None, activation_fn=tf.nn.relu)

convFlat_tree = slim.flatten(conv4)

W_tree = tf.get_variable("W", shape=[H_tree,7], initializer=tf.contrib.layers.xavier_initializer())
score_tree = tf.matmul(convFlat_tree, W_tree)
probability_tree = tf.nn.softmax(score_tree)
real_action_tree = tf.placeholder(shape=[None,7], dtype=tf.int32)
loss_tree = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=real_action_tree, logits=score_tree))

with tf.variable_scope('wooodenpickaxe'):
    conv1 = slim.conv2d(inputs=state_item, num_outputs=32, kernel_size=[8,8], stride=[4,4], padding='VALID', 
                        biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
    conv2 = slim.conv2d(inputs=conv1, num_outputs=64, kernel_size=[4,4], stride=[2,2], padding='VALID', 
                        biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
    conv3 = slim.conv2d(inputs=conv2, num_outputs=64, kernel_size=[3,3], stride=[1,1], padding='VALID', 
                        biases_initializer=None, activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer())
    convFlat_wp = slim.flatten(conv3)
    
    W_wp = tf.get_variable("W_wp", shape=[H,20], initializer=tf.contrib.layers.xavier_initializer())
    score_wp = tf.matmul(convFlat_wp, W_wp)
    probability_wp = tf.nn.softmax(score_wp)
    real_action_wp = tf.placeholder(shape=[None,20], dtype=tf.int32)
    loss_wp = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=real_action_wp, logits=score_wp))

train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_wp)
tf.summary.scalar('loss_wp', loss_wp)
merged = tf.summary.merge_all()
#tree_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='treechop')
#print("tree_vars: " + str(tree_vars))
#optimizer = tf.train.AdamOptimizer(0.0005)

'''
gradients = optimizer.compute_gradients(mean_loss, var_list=q_func_vars)
for i, (grad, var) in enumerate(gradients):
    if grad is not None:
        common_net_coeff = 1.0 / 4
        grad *= common_net_coeff
        gradients[i] = (tf.clip_by_norm(grad, 10), var)
        gradients[i] = (grad, var)
            
            
apply_grads = optimizer.apply_gradients(gradients)

merged = tf.summary.merge_all()
'''

'\ngradients = optimizer.compute_gradients(mean_loss, var_list=q_func_vars)\nfor i, (grad, var) in enumerate(gradients):\n    if grad is not None:\n        common_net_coeff = 1.0 / 4\n        grad *= common_net_coeff\n        gradients[i] = (tf.clip_by_norm(grad, 10), var)\n        gradients[i] = (grad, var)\n            \n            \napply_grads = optimizer.apply_gradients(gradients)\n\nmerged = tf.summary.merge_all()\n'

### Running the Agent and Environment

# Test

In [4]:
import minerl
import gym

env = gym.make(env_name)
obs = env.reset()

In [7]:
import random

e = 0.0

variables = slim.get_variables_to_restore()
variables_treechop = [v for v in variables if v.name.split('/')[0] not in ['wooodenpickaxe']]
variables_wp = [v for v in variables if v.name.split('/')[0] in ['wooodenpickaxe']]
tree_path = '/home/kimbring2/MineRL_Git/model/MineRLTreechop-v0/'
wp_path = '/home/kimbring2/MineRL_Git/model/MineRLObtainIronPickaxe-v0/'

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    print('Loading Tree Model...')
    saver_tree = tf.train.Saver(variables_treechop)
    ckpt_tree = tf.train.get_checkpoint_state(tree_path)
    saver_tree.restore(sess, ckpt_tree.model_checkpoint_path)
    
    print('Loading Wooden Model...')
    saver_wp = tf.train.Saver(variables_wp)
    ckpt_wp = tf.train.get_checkpoint_state(wp_path)
    saver_wp.restore(sess, ckpt_wp.model_checkpoint_path)
    
    env.init()
    obs = env.reset()
    net_reward = 0
    e = 0.05
    while True:
        pov = obs['pov'].astype(np.float32) / 255.0 - 0.5
        inventory = obs['inventory']
        
        coal = inventory['coal']
        cobblestone = inventory['cobblestone']
        crafting_table = inventory['crafting_table']
        dirt = inventory['dirt']
        furnace = inventory['furnace']
        iron_axe = inventory['iron_axe']
        iron_ingot = inventory['iron_ingot']
        iron_ore = inventory['iron_ore']
        iron_pickaxe = inventory['iron_pickaxe']
        log = inventory['log']
        planks = inventory['planks']
        stick = inventory['stick']
        stone = inventory['stone']
        stone_axe = inventory['stone_axe']
        stone_pickaxe = inventory['stone_pickaxe']
        torch = inventory['torch']
        wooden_axe = inventory['wooden_axe']
        wooden_pickaxe = inventory['wooden_pickaxe']
        
        coal_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*coal
        cobblestone_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*cobblestone
        crafting_table_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*crafting_table
        dirt_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*dirt
        furnace_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*furnace
        iron_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_axe
        iron_ingot_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_ingot
        iron_ore_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_ore
        iron_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_pickaxe
        log_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*log
        planks_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*planks
        stick_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stick
        stone_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone
        stone_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone_axe
        stone_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone_pickaxe
        torch_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*torch
        wooden_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*wooden_axe
        wooden_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*wooden_pickaxe
            
        item = np.concatenate([coal_channel, cobblestone_channel, crafting_table_channel, dirt_channel, furnace_channel, 
                               iron_axe_channel, iron_ingot_channel, iron_ore_channel, iron_pickaxe_channel, log_channel, 
                               planks_channel, stick_channel, stone_channel, stone_axe_channel, stone_pickaxe_channel,
                               torch_channel, wooden_axe_channel, wooden_pickaxe_channel], axis=-1)
        
        
        action_tree, action_wp = sess.run([probability_tree, probability_wp], feed_dict={state_pov:[pov], state_item:[item]})
        #action_tree = sess.run([probability_tree], feed_dict={state_pov:[pov]})
        
        
        if (log < 15):
            if np.random.rand(1) >= e:
                tree_index = np.argmax(action_tree)
            else:
                tree_index = random.randint(0,6)
            
            action = env.action_space.noop()
            if (tree_index == 0):
                action['camera'] = [0, -10]
                action['jump'] = 0
                action['forward'] = 1
                action['attack'] = 1
                action['sprint'] = 0
            elif (tree_index == 1):
                action['camera'] = [0, 10]
                action['jump'] = 0
                action['forward'] = 1
                action['attack'] = 1
                action['sprint'] = 0
            elif (tree_index == 2):
                action['camera'] = [-10, 0]
                action['jump'] = 0
                action['forward'] = 1
                action['attack'] = 1
                action['sprint'] = 0
            elif (tree_index == 3):
                action['camera'] = [10, 0]
                action['jump'] = 0
                action['forward'] = 1
                action['attack'] = 1
                action['sprint'] = 0
            elif (tree_index == 4):
                action['camera'] = [0, 0]
                action['jump'] = 0
                action['forward'] = 0
                action['attack'] = 1
                action['sprint'] = 0
            elif (tree_index == 5):
                action['camera'] = [0, 0]
                action['jump'] = 1
                action['forward'] = 1
                action['attack'] = 1
                action['sprint'] = 0
            else:
                action['camera'] = [0, 0]
                action['jump'] = 0
                action['forward'] = 0
                action['attack'] = 1
                action['sprint'] = 0
        else
            if np.random.rand(1) >= e:
                wp_index = np.argmax(action_wp)
            else:
                wp_index = random.randint(0,6)
        
            action = env.action_space.noop()
            if (wp_index == 0):
                action['place'] = 1; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 1):
                action['place'] = 2; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 2):
                action['place'] = 3; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 3):
                action['place'] = 4; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 4):
                action['place'] = 5; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0  
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 5):
                action['place'] = 6; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
                
            elif (wp_index == 6):
                action['place'] = 0; action['craft'] = 1; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 7):
                action['place'] = 0; action['craft'] = 2; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 8):
                action['place'] = 0; action['craft'] = 3; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 9):
                action['place'] = 'none'; action['craft'] = 4; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
                
            elif (wp_index == 10):
                action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 1; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 11):
                action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 2; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 12):
                action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 3; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 13):
                action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 4; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 14):
                action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 5; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 15):
                action['attack'] = 0; action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 6; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 16):
                action['attack'] = 0; action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 7; action['nearbySmelt'] = 0;
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = -5;
                action['forward'] = 0; action['jump'] = 0
                
            elif (wp_index == 17):
                action['attack'] = 0; action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 1
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 5;
                action['forward'] = 0; action['jump'] = 0
            elif (wp_index == 18):
                action['attack'] = 0; action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 2
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
                
            elif (wp_index == 19):
                action['attack'] = 0; action['place'] = 0; action['craft'] = 0; 
                action['nearbyCraft'] = 0; action['nearbySmelt'] = 0
                action['attack'] = 0; action['camera'][0] = 0; action['camera'][1] = 0;
                action['forward'] = 0; action['jump'] = 0
            
        action['back'] = 0
        action['left'] = 0
        action['right'] = 0
        action['sprint'] = 0
        
        #print("action: " + str(action))
        obs1, reward, done, info = env.step(action)
        
        if done == True:
            
            break
        
        obs = obs1
        net_reward += reward
    print("Total reward: ", net_reward)

variables_treechop: [<tf.Variable 'Conv/weights:0' shape=(8, 8, 3, 32) dtype=float32_ref>, <tf.Variable 'Conv_1/weights:0' shape=(4, 4, 32, 64) dtype=float32_ref>, <tf.Variable 'Conv_2/weights:0' shape=(3, 3, 64, 64) dtype=float32_ref>, <tf.Variable 'Conv_3/weights:0' shape=(2, 2, 64, 128) dtype=float32_ref>, <tf.Variable 'W:0' shape=(1152, 7) dtype=float32_ref>]
Loading Tree Model...
INFO:tensorflow:Restoring parameters from /home/kimbring2/MineRL_Git/model/MineRLTreechop-v0/model-1044710.cptk
Total reward:  24.0


# Train
MineRL package provides a human playing dataset for improving effiency of traning. At first, we are going to train our network by this dataset and use pretrained network for Reinforcement Learning. I assure it will reduce traing time tremendously. 

For more information about that dataset, see this [MineRL Dataset Docs](http://minerl.io/docs/tutorials/data_sampling.html).

As you can see, the network not only does much better than random actions, but achieves the goal of 200 points per episode, thus solving the task!

In [19]:
import minerl
import glob
import pickle

action_files = glob.glob("/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_*.pkl")
state_files = glob.glob("/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_state_*.pkl")

print(len(action_files))

from random import randrange

np.random.choice(len(action_files), 10)

np.array(action_files)[np.random.choice(len(action_files), 10).astype(int)]

1294


array(['/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_438.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_1174.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_991.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_316.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_670.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_203.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_406.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_678.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_601.pkl',
       '/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_695.pkl'],
      dtype='<U78')

In [23]:
import minerl
import glob
import pickle

action_files = glob.glob("/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_*.pkl")
state_files = glob.glob("/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_state_*.pkl")
#print(glob.glob("/media/kimbring2/6224AA7924AA5039/minerl_data/WoodenPickaxe/wp_action_*.pkl"))

#data = minerl.data.make(env_name, data_path)

variables = slim.get_variables_to_restore()
variables_wooodenpickaxe = [v for v in variables if v.name.split('/')[0] in ['wooodenpickaxe']]

init = tf.global_variables_initializer()
restore = False
with tf.Session() as sess:
    rendering = False
    sess.run(init)
    saver = tf.train.Saver(variables_wooodenpickaxe)
    train_writer = tf.summary.FileWriter('/home/kimbring2/MineRL_Git/train_summary/' + env_name, sess.graph)
    
    if restore == True:
        path = '/home/kimbring2/MineRL_Git/model/' + env_name
        ckpt = tf.train.get_checkpoint_state(path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    
    episode_count = 0
    for k in range(0, 100):
        index = np.random.choice(len(action_files), 10)
        action_files = list(np.array(action_files)[index.astype(int)])
        state_files = list(np.array(state_files)[index.astype(int)])
        
        for j in (0, len(action_files) - 1):    
            f_action = open(action_files[j], 'rb')
            f_state = open(state_files[j], 'rb')
    
            act = pickle.load(f_action)
            #print(action['place'])
    
            state = pickle.load(f_state)
            #print(state)
        
            length = len(act['place'])
        
            action_list = []
            state_pov_list = []
            state_item_list = []
            for i in range(0, length):             
                pov = state['pov'][i].astype(np.float32) / 255.0 - 0.5
                #inventory = current_state['inventory']
            
                coal = state['coal'][i]
                cobblestone = state['cobblestone'][i]
                crafting_table = state['crafting_table'][i]
                dirt = state['dirt'][i]
                furnace = state['furnace'][i]
                iron_axe = state['iron_axe'][i]
                iron_ingot = state['iron_ingot'][i]
                iron_ore = state['iron_ore'][i]
                iron_pickaxe = state['iron_pickaxe'][i]
                log = state['log'][i]
                planks = state['planks'][i]
                stick = state['stick'][i]
                stone = state['stone'][i]
                stone_axe = state['stone_axe'][i]
                stone_pickaxe = state['stone_pickaxe'][i]
                torch = state['torch'][i]
                wooden_axe = state['wooden_axe'][i]
                wooden_pickaxe = state['wooden_pickaxe'][i]
            
                coal_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*coal
                cobblestone_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*cobblestone
                crafting_table_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*crafting_table
                dirt_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*dirt
                furnace_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*furnace
                iron_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_axe
                iron_ingot_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_ingot
                iron_ore_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_ore
                iron_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*iron_pickaxe
                log_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*log
                planks_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*planks
                stick_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stick
                stone_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone
                stone_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone_axe
                stone_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*stone_pickaxe
                torch_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*torch
                wooden_axe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*wooden_axe
                wooden_pickaxe_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=np.float32)*wooden_pickaxe
            
                item = np.concatenate([coal_channel, cobblestone_channel, crafting_table_channel, dirt_channel, furnace_channel, 
                                       iron_axe_channel, iron_ingot_channel, iron_ore_channel, iron_pickaxe_channel, log_channel, 
                                       planks_channel, stick_channel, stone_channel, stone_axe_channel, stone_pickaxe_channel,
                                       torch_channel, wooden_axe_channel, wooden_pickaxe_channel], axis=-1)
            
                #print('act: ' + str(act))
                if (act['place'][i] == 1):
                    action_ = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]   
                elif (act['place'][i] == 2):
                    action_ = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]            
                elif (act['place'][i] == 3):
                    action_ = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['place'][i] == 4):
                    action_ = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['place'][i] == 5):
                    action_ = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                elif (act['place'][i] == 6):
                    action_ = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                ############################################
                elif (act['craft'][i] == 1):
                    action_ = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['craft'][i] == 2):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['craft'][i] == 3):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                elif (act['craft'][i] == 4):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                ############################################
                elif (act['nearbyCraft'][i] == 1):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               1, 0, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['nearbyCraft'][i] == 2):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 1, 0, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['nearbyCraft'][i] == 3):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 1, 0, 0, 0, 0, 0, 0, 0]             
                elif (act['nearbyCraft'][i] == 4):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 1, 0, 0, 0, 0, 0, 0]             
                elif (act['nearbyCraft'][i] == 5):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
                elif (act['nearbyCraft'][i] == 6):   
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
                elif (act['nearbyCraft'][i] == 7):   
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
                ############################################
                elif (act['nearbySmelt'][i] == 1):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 1, 0, 0] 
                elif (act['nearbySmelt'][i] == 2):
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
                else:
                    action_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

                #print(state_item)
                state_pov_list.append(pov)   
                state_item_list.append(item)   
                action_list.append(action_)

            #episode_count = episode_count + 1
            #print(action_list)
            #print(np.stack(action_list, 0))
            feed_dict = {state_pov:np.stack(state_pov_list, 0),
                         state_item:np.stack(state_item_list, 0),
                         real_action_wp:np.stack(action_list, 0)
                        }

            summary = sess.run(merged, feed_dict=feed_dict)  
            train_writer.add_summary(summary, k)

            #sess.run([train_step_1, train_step_2, train_step_3, train_step_4], feed_dict=feed_dict)
            sess.run([train_step], feed_dict=feed_dict)
            #print("action_index_1: " + str(action_index_1))

            if episode_count % 10 == 0:
                model_path = '/home/kimbring2/MineRL_Git/model/' + env_name
                saver.save(sess, model_path + '/model-' + str(k) + '.cptk')
                print("Saved Model")

Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Instructions for updating:
Use standard file APIs to delete files with this prefix.
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Saved Model
Save