In [None]:
import malmo
import malmo.MalmoPython as MalmoPython
import os
import sys
import time
import json
import numpy as np

import gym
from gym import error, spaces, utils
from gym.utils import seeding

In [None]:
import functools
print = functools.partial(print, flush=True)

In [None]:
def Menger(xorg, yorg, zorg, size, blocktype, variant, holetype):
    #draw solid chunk
    genstring = GenCuboidWithVariant(xorg,yorg,zorg,xorg+size-1,yorg+size-1,zorg+size-1,blocktype,variant) + "\n"
    #now remove holes
    unit = size
    while (unit >= 3):
        w=unit//3
        for i in range(0, size, unit):
            for j in range(0, size, unit):
                x=xorg+i
                y=yorg+j
                genstring += GenCuboid(x+w,y+w,zorg,(x+2*w)-1,(y+2*w)-1,zorg+size-1,holetype) + "\n"
                y=yorg+i
                z=zorg+j
                genstring += GenCuboid(xorg,y+w,z+w,xorg+size-1, (y+2*w)-1,(z+2*w)-1,holetype) + "\n"
                genstring += GenCuboid(x+w,yorg,z+w,(x+2*w)-1,yorg+size-1,(z+2*w)-1,holetype) + "\n"
        unit = w
    return genstring

def GenCuboid(x1, y1, z1, x2, y2, z2, blocktype):
    return '<DrawCuboid x1="' + str(x1) + '" y1="' + str(y1) + '" z1="' + str(z1) + '" x2="' + str(x2) + '" y2="' + str(y2) + '" z2="' + str(z2) + '" type="' + blocktype + '"/>'

def GenCuboidWithVariant(x1, y1, z1, x2, y2, z2, blocktype, variant):
    return '<DrawCuboid x1="' + str(x1) + '" y1="' + str(y1) + '" z1="' + str(z1) + '" x2="' + str(x2) + '" y2="' + str(y2) + '" z2="' + str(z2) + '" type="' + blocktype + '" variant="' + variant + '"/>'

In [None]:
import random as rd

drawingDecorator = '''<DrawingDecorator>
                    <DrawSphere x="-27" y="70" z="0" radius="30" type="air"/>''' + Menger(-40, 40, -13, 27, "stone", "smooth_granite", "air") + '''
                    <DrawCuboid x1="-25" y1="39" z1="-2" x2="-29" y2="39" z2="2" type="lava"/>
                    <DrawCuboid x1="-26" y1="39" z1="-1" x2="-28" y2="39" z2="1" type="obsidian"/>
                    <DrawBlock x="-27" y="39" z="0" type="diamond_block"/>
                  </DrawingDecorator>'''

def createBlocks(n, randomize=False):
    z = -1
    res = ""
    
    for k in range(n):
        res += '''<DrawBlock x="0" y="56" z="''' + str(z) + '''" type="diamond_block" />'''
        if(randomize):
            z -= rd.randint(3, 10)
        else:
            z -= 4
    return res

def missionXML(mspertick, test, randomize=False):
    return '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
            <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            
              <About>
                <Summary>Hello world!</Summary>
              </About>
              
              <ModSettings>
                  <MsPerTick>''' + str(mspertick) + '''</MsPerTick>
              </ModSettings>
              
            <ServerSection>
              <ServerInitialConditions>
                <Time>
                    <StartTime>1000</StartTime>
                    <AllowPassageOfTime>false</AllowPassageOfTime>
                </Time>
                <Weather>clear</Weather>
              </ServerInitialConditions>
              <ServerHandlers>
                  <FlatWorldGenerator forceReset='1' generatorString="3;7,44*49,73,35:1,159:4,95:13,35:13,159:11,95:10,159:14,159:6,35:6,95:6;12;"/>
                  <DrawingDecorator>
                     '''+ createBlocks(1000, randomize) + '''
                  </DrawingDecorator>
                  <ServerQuitFromTimeUp timeLimitMs="''' + (str(30000) if test else str(10000)) + '''"/>
                  <ServerQuitWhenAnyAgentFinishes/>
                </ServerHandlers>
              </ServerSection>
              
              <AgentSection mode="Survival">
                <Name>MalmoTutorialBot</Name>
                <AgentStart>
                    <Placement x="0.5" y="56.0" z="0.5" yaw="180"/>
                </AgentStart>
                <AgentHandlers>
                  <ObservationFromFullStats/>
                  <ObservationFromGrid>
                      <Grid name="floor3x3">
                        <min x="-1" y="-1" z="-1"/>
                        <max x="1" y="0" z="1"/>
                      </Grid>
                  </ObservationFromGrid>
                  <ObservationFromDistance>
                      <Marker name="Origin" x="0.5" y="56.0" z="0.5" />
                  </ObservationFromDistance>
                  <ContinuousMovementCommands turnSpeedDegs="180"/>
                </AgentHandlers>
              </AgentSection>
            </Mission>'''




In [None]:
def setup_malmo(port, mspertick, test, randomize=False) -> [MalmoPython.AgentHost, MalmoPython.MissionSpec, MalmoPython.MissionRecordSpec]:
    # Create default Malmo objects:

    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse( sys.argv )
    except RuntimeError as e:
        print('ERROR:',e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    my_mission = MalmoPython.MissionSpec(missionXML(mspertick, test, randomize), True)
    my_mission_record = MalmoPython.MissionRecordSpec()

    # Create client pool
    client_pool = MalmoPython.ClientPool()
    client_pool.add( MalmoPython.ClientInfo('127.0.0.1', port) )
    
    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, client_pool, my_mission_record, 0, 'experimentID' )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print("Waiting for the mission to start ", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)

    print()
    print("Mission running ", end=' ')


    return agent_host, my_mission, my_mission_record

In [None]:
def try_observation(agent_host):
    world_state = agent_host.getWorldState()
    for error in world_state.errors:
            print("Error:",error.text)
            
    finished = not world_state.is_mission_running
    state = None
    
    if world_state.number_of_observations_since_last_state > 0:
        msg = world_state.observations[-1].text                 # Yes, so get the text
        observations = json.loads(msg)                          # and parse the JSON
        grid = observations.get(u'floor3x3', 0)                 # and get the grid we asked for
        #dist = observations.get(u'distanceFromOrigin', 0)
        #px = observations.get(u'XPos', 0)
        #py = observations.get(u'YPos', 0)
        #print(px, py, pz)
        dist = 0.5 - observations.get(u'ZPos', 0)
        
        state = grid, dist
    
    return finished, state

In [None]:
def get_observation(agent_host):
    done, state = try_observation(agent_host)
    
    while(not done and state is None):
        #time.sleep(0.1)
        done, state = try_observation(agent_host)
    
    grid, dist = None, None
    if(state is not None):
        grid, dist = state
    return done, grid, dist

In [None]:
def convert_observation(malmo_obs, output_shape):
    '''
    Returns the observation in the form of a MultiBinary space of the right shape
    1 if block is occupied, 0 if block is empty
    '''
    array = np.array(malmo_obs).reshape(output_shape)
    return np.where(array == 'air', 0, 1)

In [None]:
def handle_action(agent_host, actions, mspertick, use_multidiscrete):
    #print(actions)
    ########### MultiDiscrete for A2C
    if(use_multidiscrete):
        mov, jmp = actions
    ########### Discrete for DQN
    else:
        mov = actions % 3
        jmp = actions // 3
    
    mov -= 1 # between -1 and 1
    
    agent_host.sendCommand("move " + str(mov))
    agent_host.sendCommand("jump " + str(jmp))
    time.sleep(2 * mspertick / 1000)
    agent_host.sendCommand("jump 0")
    agent_host.sendCommand("move 0")
    
    # send back action penalty
    return 0 #0.1 if jmp == 1 else 0

In [None]:
class MinecraftEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}

    def __init__(self, port=10000, mspertick=50, test=False, use_multidiscrete=True, use_multibinary=True, randomize=False):
        self.port = port
        self.mspertick = mspertick
        self.test = test
        self.use_multidiscrete = use_multidiscrete
        self.randomize = randomize
        # set up environment variables (state space, action space, rewards...)
        if(use_multibinary):
            self.obs_shape = (2 * 3 * 3,) #[y, z, x]
            self.observation_space = spaces.MultiBinary(self.obs_shape[0])
        else:
            self.obs_shape = (2, 3, 3)
            self.observation_space = spaces.Box(low=0, high=1, shape=self.obs_shape)
        if(use_multidiscrete):
            self.action_space = spaces.MultiDiscrete((3,2)) # (for A2C) [0] => move -1 / 0 / 1 ; [1] => jump 0 / 1
        else:
            self.action_space = spaces.Discrete(6) # (for DQN) (move -1 / 0 / 1) * (jump 0 / 1)
        self.distance = 0
    
    def step(self, action):
        # handle action
        penalty = handle_action(self.agent_host, action, self.mspertick, self.use_multidiscrete)
        # get new observation
        finished, grid, dist = get_observation(self.agent_host)
        if finished:
            return np.zeros(self.obs_shape[0]), 0, True, {}
        
        obs = convert_observation(grid, self.obs_shape)
        # calculate reward
        reward = dist - self.distance
        reward -= penalty
        self.distance = dist
        
        return obs, reward, finished, {}
    
    def reset(self):
        # save results
        with open(str(self.use_multidiscrete) + 'results.txt', 'a') as f:
            f.write(str(self.distance) + '\n')
        # set up malmo
        agent_host, my_mission, my_mission_record = setup_malmo(self.port, self.mspertick, self.test, self.randomize)
        self.agent_host = agent_host
        self.my_mission = my_mission
        self.my_mission_record = my_mission_record
        self.distance = 0
        
        finished, grid, dist = get_observation(self.agent_host)
        return convert_observation(grid, self.obs_shape)
    
    def render(self, mode='human'):
        pass
  
    def close(self):
        pass

In [None]:
################ Test random agent

env = MinecraftEnv()
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        env.render()
        #print(observation)        
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        print(reward)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()


In [None]:
######################x With real baseline algorithms

from stable_baselines.common.policies import MlpPolicy, CnnPolicy
from stable_baselines.deepq.policies import MlpPolicy as DqnMlpPolicy, CnnPolicy as DqnCnnPolicy
from stable_baselines.common import make_vec_env, env_checker
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import A2C, DQN
from stable_baselines.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info


In [None]:
#####################x Train the algo

# Parallel environments
#env_checker.check_env(MinecraftEnv())
#env = make_vec_env(MinecraftEnv, n_envs=1)
# env = DummyVecEnv([lambda: MinecraftEnv(10000), lambda: MinecraftEnv(10002)])

#env = MinecraftEnv(randomize=True)
#model = A2C(MlpPolicy, env, verbose=1)
#try:
#    model.learn(total_timesteps=25000)
#    model.save("a2c_rand")
#except:
#    print("error")


env = MinecraftEnv(randomize=True, use_multidiscrete=False)
model = DQN(DqnMlpPolicy, env, verbose=1)
try:
    model.learn(total_timesteps=25000)
    model.save("dqn_rand")
except:
    print("error")

#del model # remove to demonstrate saving and loading




In [None]:
#####################x Test the algo

#model = A2C.load("a2c_rand_pen")
model = DQN.load("dqn_rand_pen")

env = MinecraftEnv(test=True, randomize=True, use_multidiscrete=False)
for i_episode in range(20):
    obs = env.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
        if dones:
            break
env.close()

    