# Importing and Configs

In [12]:
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np
from tabulate import tabulate
from enum import Enum
from copy import copy, deepcopy

# Definig Environments

## objects in environment

In [None]:
class WorldObj:
    def __init__(self, name):
        self.name = name
        
    def __repr__(self):
        return " "
    
class Empty(WorldObj):
    def __init__(self):
        super().__init__('empty')
    
class Log(WorldObj):
    def __init__(self):
        super().__init__('log')
    
    def __repr__(self):
        return '_'
    
class Gold(WorldObj):
    def __init__(self):
        super().__init__('gold')
    
    def __repr__(self):
        return 'G'
    
class Food(WorldObj):
    def __init__(self):
        super().__init__('food')
    
    def __repr__(self):
        return '8'
    
    
class Trap(WorldObj):
    def __init__(self):
        super().__init__('trap')
        self.state = None
    
    def __repr__(self):
        return 'X'
    
    
class Stream(WorldObj):
    """
    stream only goes down or right!
    """
    
    def __init__(self, _dir: int):
        super().__init__('stream')
        self.dir = _dir
    
    def __repr__(self):
        return '~'
    
class Miner(WorldObj):
    def __init__(self):
        super().__init__('miner')
    
    def __repr__(self):
        return '*'
        

## Defining Envrionment

In [None]:
class Action(Enum):
    STOP = 0
    RIGHT = 1
    DOWN = 2
    LEFT = 3
    UP = 4
    LOG_RIGHT = 5
    LOG_DOWN = 6
    LOG_LEFT = 7
    LOG_UP = 8
    
    

class GoldHuntEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}
    static_map_size = 7
    movement_acts = [Action.RIGHT, Action.LEFT, Action.DOWN, Action.UP,]
    craft_acts = [Action.LOG_RIGHT, Action.LOG_LEFT, Action.LOG_DOWN, Action.LOG_UP,]

    def __init__(self, random=False, ascii_rep=True):
        self.ascii_rep = ascii_rep
        self.action_space = spaces.Discrete(9)
        self.observation_space = spaces.Discrete(49)
        self.time_step = 0
        
        if random:
            pass
        else:
            self.map_size = self.static_map_size
            self.agent_pos = [0, 0]
            self.env_map = self.generate_static_map(self.agent_pos)
            
        self.prev_action = None
            
            
    def generate_static_map(self, agent_pos):
        env_map = [[Empty() for _ in range(self.map_size)] for _ in range(self.map_size)]
        #miner shouldn't be an object (cause it producses overrighting)
        
        env_map[2][0] = Trap()
        env_map[2][1] = Trap()
        env_map[2][2] = Trap()
        env_map[2][3] = Trap()
        env_map[2][5] = Trap()
        env_map[0][3] = Trap()
        
        env_map[0][6] = Food()
        env_map[6][0] = Food()
        
        env_map[6][6] = Gold()
        
        for i in range(self.map_size):
            env_map[4][i] = Stream(_dir=Action.RIGHT.value)
            
        return env_map

    def get_pos_obj(self, pos):
        return self.env_map[pos[0]][pos[1]]

    
    def _get_next_pos(self, current_loc, action):
        action_dir = Action(action)
        next_pos = copy(current_loc)
        
        if action_dir not in self.movement_acts:
            return current_loc
        
        if action_dir == Action.RIGHT:
            next_pos[1] = self._restricted_move(next_pos[1], 1)
        elif action_dir == Action.LEFT:
            next_pos[1] = self._restricted_move(next_pos[1], -1)
        elif action_dir == Action.UP:
            next_pos[0] = self._restricted_move(next_pos[0], -1)
        elif action_dir == Action.DOWN:
            next_pos[0] = self._restricted_move(next_pos[0], 1)
            
        return next_pos
        
        
    def _restricted_move(self, init_val, res):
        new_val = init_val + res
        if new_val >= self.map_size:
            new_val = self.map_size - 1
        elif new_val < 0:
            new_val = 0
        return new_val
    
    
    def pos_to_state(self, pos):
        return pos[0]*self.map_size + pos[1]
    
    def on_border(self, pos):
        if pos[0] == 0 or pos[0] == self.map_size-1 or pos[1] == 0 or pos[1] == self.map_size-1:
            return True
        return False
    
    
    def craft_log(self, current_pos, action: int):
        """
        returns True if log is crafted, False otherwise
        """
        log_loc = _get_next_pos(pos, action-4)
        if log_loc == current_pos:
            return False
        
        self.env_map[log_loc[0]][log_loc[1]] = Log()
        return True
        
    
    def step(self, action: int):
        done = False
        reward = -1
        info = {}
        
        curr_obj = self.get_pos_obj(self.agent_pos)
        
        if Action(action) in self.craft_acts:
            is_crafted = self.craft_log(self.agent_pos, action)
            reward = -5
        
        next_pos = self._get_next_pos(self.agent_pos, action)
        next_obj = self.get_pos_obj(next_pos)

        if isinstance(next_obj, Trap):
            
        
        if isinstance(next_obj, Stream):
            while isinstance(next_obj, Stream):
                next_pos = self._get_next_pos(next_pos, next_obj.dir)
                next_obj = self.get_pos_obj(next_pos)
                
                if self.on_border(next_pos):
                    if next_pos[0] == self.map_size-1 and next_pos[1] == self.map_size-1:
                        done = True
                        reward = -50
                    elif next_pos[0] == self.map_size-1 and Action(next_obj.dir) == Action.DOWN:
                        next_pos[1] = self._restricted_move(next_pos[1], 1)
                    elif next_pos[1] == self.map_size-1 and Action(next_obj.dir) == Action.RIGHT:
                        next_pos[0] = self._restricted_move(next_pos[0], 1)
                    
                next_obj = self.get_pos_obj(next_pos)                              
        
        if isinstance(next_obj, Food):
            reward = 15
        
        if isinstance(next_obj, Gold):
            done = True
            reward = 50
            
        self.time_step += 1
        self.agent_pos = next_pos
        
        return self.pos_to_state(self.agent_pos), reward, done, info

    def reset(self):
        state = 0
        return state
  
    def get_env_rep(self):
        env_map_top_layer = deepcopy(self.env_map)
        env_map_top_layer[self.agent_pos[0]][self.agent_pos[1]] = Miner()
        
        return tabulate(env_map_top_layer, headers=[], tablefmt='grid')

    def render(self, mode='human'):
        if self.ascii_rep:
            print(self.get_env_rep())

    def close(self):
        pass
    
env = GoldHuntEnv()
env.render()

In [33]:
done = False
env = GoldHuntEnv()
env.render()

while not done:
    action = int(input())
    state, reward, done, _ = env.step(action)
    env.render()

+---+---+---+---+---+---+---+
| * |   |   | X |   |   | 8 |
+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| X | X | X | X |   | X |   |
+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| ~ | ~ | ~ | ~ | ~ | ~ | ~ |
+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| 8 |   |   |   |   |   | G |
+---+---+---+---+---+---+---+
2
+---+---+---+---+---+---+---+
|   |   |   | X |   |   | 8 |
+---+---+---+---+---+---+---+
| * |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| X | X | X | X |   | X |   |
+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| ~ | ~ | ~ | ~ | ~ | ~ | ~ |
+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |
+---+---+---+---+---+---+---+
| 8 |   |   |   |   |   | G |
+---+---+---+---+---+---+---+
2
+---+---+---+---+---+---+---+
|   |   |   | X |   |   | 8 |
+---+---+---+---+---+---+---+
|   | 

KeyboardInterrupt: Interrupted by user