In [5]:
!pip install gymnasium



In [9]:
import gymnasium
from gymnasium import spaces
import numpy as np

class RecyclingRobotEnv(gymnasium.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, grid_size=10, num_items=5):
        super(RecyclingRobotEnv, self).__init__()

        self.grid_size = grid_size
        self.num_items = num_items
        self.robot_pos = None
        self.item_positions = {}
        self.held_item = None

        self.observation_space = spaces.Box(low=0, high=self.grid_size, shape=(2 + self.num_items * 4,), dtype=np.int32)

        self.action_space = spaces.Discrete(8)

        self.reset()

    def step(self, action):
        reward = 0
        done = False
        info = {}

        prev_robot_pos = self.robot_pos.copy()
        prev_held_item = self.held_item

        if action == 0:
            self.robot_pos[1] = max(0, self.robot_pos[1] - 1)
        elif action == 1:
            self.robot_pos[1] = min(self.grid_size - 1, self.robot_pos[1] + 1)
        elif action == 2:
            self.robot_pos[0] = max(0, self.robot_pos[0] - 1)
        elif action == 3:
            self.robot_pos[0] = min(self.grid_size - 1, self.robot_pos[0] + 1)
        elif action == 4:
            if self.held_item is None:
                for item_id, (x, y, item_type) in list(self.item_positions.items()):
                    if x == self.robot_pos[0] and y == self.robot_pos[1]:
                        self.held_item = item_id
                        del self.item_positions[item_id]
                        reward += 1
                        break
        elif action in [5, 6, 7]:
            if self.held_item is not None:
                item_type = self.item_positions.get(self.held_item, (None, None, -1))[2]
                if item_type == (action - 5):
                    reward += 10

                    self.held_item = None
                    if not self.item_positions and self.held_item is None:
                        done = True
                        reward += 100
                else:
                    reward -= 5
                    x = np.random.randint(0, self.grid_size)
                    y = np.random.randint(0, self.grid_size)
                    self.item_positions[self.held_item] = (x, y, item_type)
                    self.held_item = None

        if self.held_item is not None:
            item_type = self.item_positions.get(self.held_item, (None, None, -1))[2]
            self.item_positions[self.held_item] = (self.robot_pos[0], self.robot_pos[1], item_type)


        obs = self._get_observation()

        return obs, reward, done, info

    def reset(self):
        self.robot_pos = np.array([np.random.randint(0, self.grid_size), np.random.randint(0, self.grid_size)])
        self.item_positions = {}
        self.held_item = None

        for i in range(self.num_items):
            x = np.random.randint(0, self.grid_size)
            y = np.random.randint(0, self.grid_size)
            item_type = np.random.randint(0, 3) # 0: plastic, 1: paper, 2: metal
            self.item_positions[i] = (x, y, item_type)

        return self._get_observation()

    def _get_observation(self):
        obs = np.zeros(2 + self.num_items * 4, dtype=np.int32)
        obs[:2] = self.robot_pos

        for i in range(self.num_items):
            if i in self.item_positions:
                x, y, item_type = self.item_positions[i]
                obs[2 + i * 4] = x
                obs[2 + i * 4 + 1] = y
                obs[2 + i * 4 + 2] = item_type
                obs[2 + i * 4 + 3] = 0
            elif self.held_item == i:
                x, y, item_type = self.item_positions.get(i, (self.robot_pos[0], self.robot_pos[1], -1))
                obs[2 + i * 4] = x
                obs[2 + i * 4 + 1] = y
                obs[2 + i * 4 + 2] = item_type
                obs[2 + i * 4 + 3] = 1
            else:
                obs[2 + i * 4: 2 + (i + 1) * 4] = -1

        return obs

    def render(self, mode='human'):
        if mode == 'human':
            grid = [[' ' for _ in range(self.grid_size)] for _ in range(self.grid_size)]

            for item_id, (x, y, item_type) in self.item_positions.items():
                if item_type == 0:
                    grid[y][x] = 'P'
                elif item_type == 1:
                    grid[y][x] = 'P'
                elif item_type == 2:
                    grid[y][x] = 'M'

            rx, ry = self.robot_pos
            grid[ry][rx] = 'R'

            for row in grid:
                print(' '.join(row))
            print("-" * (self.grid_size * 2 - 1))

    def close(self):
        pass


In [10]:
env = RecyclingRobotEnv()
obs = env.reset()
env.render()

for _ in range(10):
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        break

env.close()

              M    
                   
                   
            P   P  
                   
                   
                   
            P      
              P    
        R          
-------------------
              M    
                   
                   
            P   P  
                   
                   
                   
            P      
              P    
        R          
-------------------
              M    
                   
                   
            P   P  
                   
                   
                   
            P      
        R     P    
                   
-------------------
              M    
                   
                   
            P   P  
                   
                   
                   
            P      
        R     P    
                   
-------------------
              M    
                   
                   
            P   P  
                   
                   
