In [4]:
import gym
from gym import spaces
import pygame
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
import json
import random

In [6]:
info = dict()
with open("699_recipe_5.json", "r") as f:
    d = json.load(f)
    for i in d["dataset"]:
      e = i["key"]
      if e not in info.keys():
          v = i["value"]
          n_v = set()
          for value in v:
              n_v.add(tuple(value))
          info[e] = n_v
      else:
          value = info[e]
          v = i["value"]
          n_v = set()
          for elements in v:
              n_v.add(tuple(elements))
          n_v = value.union(n_v)
          info[e] = n_v

#info key is command, value is set with multiple solutions and each solution type is tuple
print(info)






{'Serve Apple Salad to Person in Red': {('Go to Green Apple', 'Take Green Apple', 'Go to Red Apple', 'Take Red Apple', 'Go to Knife', 'Prepare Green Apple', 'Prepare Red Apple', 'Go to Mixing Bowl', 'Prepare Apple Salad', 'Place Apple Salad on Plate', 'Serve plate to Person in Red'), ('Go to Red Apple', 'Take Red Apple', 'Go to Green Apple', 'Take Green Apple', 'Go to Knife', 'Prepare Red Apple', 'Prepare Green Apple', 'Go to Mixing Bowl', 'Prepare Apple Salad', 'Place Apple Salad on Plate', 'Serve plate to Person in Red')}, 'Serve Cheese Platter to Person in White': {('Go to White Cheese', 'Take White Cheese', 'Go to Yellow Cheese', 'Take Yellow Cheese', 'Go to Knife', 'Prepare White Cheese', 'Prepare Yellow Cheese', 'Go to Plate', 'Place White and Yellow Cheese on Plate', 'Serve plate to Person in White'), ('Go to Yellow Cheese', 'Take Yellow Cheese', 'Go to White Cheese', 'Take White Cheese', 'Go to Knife', 'Prepare Yellow Cheese', 'Prepare White Cheese', 'Go to Plate', 'Place White

In [20]:
from abc import update_abstractmethods
env_items={
  "ingredients": [
    "Red Onions",
    "Yellow Onions",
    "Bread",
    "Yellow Cheese",
    "White Cheese",
    "Red Bell Pepper",
    "Green Bell Pepper",
    "Red Apple",
    "Green Apple",
    "Yellow Egg",
    "White Egg",
    "Butter",
    "Ham",
    "Red Meat (Beef)",
    "White Meat (Turkey)",
    "Salt",
    "Herbs"
  ],
  "kitchenTools": [
    "Plate",
    "Pot",
    "Oven",
    "Knife",
    "Mixing Bowl"
  ]
}

action_list={
  "actions": [
    "Go to",
    "Use",
    "Prepare",
    "Take",
    "Drop",
    "Place on/in"
  ]
}


#reference from https://www.gymlibrary.dev/content/environment_creation/
class GridWorldEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, render_mode=None, the_size=30):
        self.size = the_size
        self.window_size = 1000

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        self.window = None
        self.clock = None

        self.counter = 0
        self.food_list = ["RedApple", "RedOnions", "YellowOnions", "Bread", "YellowCheese", "WhiteCheese",
         "RedBellPepper", "GreenBellPepper", "GreenApple", "YellowEgg", "WhiteEgg", "Butter",
         "Ham", "RedMeat", "WhiteMeat", "Salt", "Herbs"]

        self.tools_list =[ "Pot", "Oven", "Knife", "MixingBowl"]

        self.guest_list = ["Red","White","Green","Yellow"]

        self.taking_item = []

        all_obs = {"Agent": spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "RedApple": spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "RedOnions":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "YellowOnions":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Bread":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "YellowCheese":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "WhiteCheese":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "RedBellPepper":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "GreenBellPepper":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "GreenApple":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "YellowEgg":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "WhiteEgg":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Butter":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Ham":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "RedMeat":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "WhiteMeat":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Salt":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Herbs":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Pot":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Oven":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Knife":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "MixingBowl":spaces.Box(0, the_size - 1, shape=(2,), dtype=int),
                   "Plate":spaces.Box(0, the_size - 1, shape=(2,), dtype=int)}
        self.observation_space = spaces.Dict(all_obs)
        self.action = spaces.Discrete(9)

        right = np.array([1,0])
        left = np.array([-1,0])
        down = np.array([0,1])
        up = np.array([0,-1])

        self._action_to_direction = {0: right,1: left,2: down,3: up}

        all_locations = [np.array([x, y]) for x in range(self.size) for y in range(self.size)]

        # Shuffle the locations
        random.shuffle(all_locations)

        # self.all_loc = dict()
        # Assign unique locations to each item

        self.map_info = [[None for _ in range(self.size)] for _ in range(self.size)]
        
        for item in self.food_list:
            location = all_locations.pop()
            x = location[0]
            y = location[1]
            setattr(self, f'_{item}_location', location)
            self.map_info[x][y] = item
            # key_name = f'_{item}_location'
            # self.all_loc[key_name] = location
        self.food_on = dict()
        for item in self.tools_list:
            location = all_locations.pop()
            setattr(self, f'_{item}_location', location)
            x = location[0]
            y = location[1]
            self.map_info[x][y] = item
            self.food_on[item] = None

        self.food_on["Plate"] = None
        p_loc = all_locations.pop()
        setattr(self, '_Plate_location', p_loc)
        x = p_loc[0]
        y = p_loc[1]
        self.map_info[x][y] = "Plate"
        # print(self.map_info)

        a_loc = all_locations.pop()
        setattr(self, '_Agent_location', a_loc)
        


    def _get_obs(self):
        return {"Agent": self._Agent_location,
                "Red Apple": self._RedApple_location,
                "Red Onions": self._RedOnions_location,
                "Yellow Onions": self._YellowOnions_location,
                "Bread": self._Bread_location,
                "Yellow Cheese": self._YellowCheese_location,
                "White Cheese": self._WhiteCheese_location,
                "Red Bell Pepper": self._RedBellPepper_location,
                "Green Bell Pepper": self._GreenBellPepper_location,
                "Green Apple": self._GreenApple_location,
                "Yellow Egg": self._YellowEgg_location,
                "White Egg": self._WhiteEgg_location,
                "Butter": self._Butter_location,
                "Ham": self._Ham_location,
                "Red Meat (Beef)": self._RedMeat_location,
                "White Meat (Turkey)": self._WhiteMeat_location,
                "Salt": self._Salt_location,
                "Herbs": self._Herbs_location,
                "Pot": self._Pot_location,
                "Oven": self._Oven_location,
                "Knife": self._Knife_location,
                "Mixing Bowl": self._MixingBowl_location,
                "Plate": self._Plate_location
                }

    def _get_info(self):
        redapple_dis = np.linalg.norm(self._Agent_location - self._RedApple_location, ord=1)
        redonions_dis = np.linalg.norm(self._Agent_location - self._RedOnions_location, ord=1)
        yellowonions_dis = np.linalg.norm(self._Agent_location - self._YellowOnions_location, ord=1)
        bread_dis = np.linalg.norm(self._Agent_location - self._Bread_location, ord=1)
        yellowcheese_dis = np.linalg.norm(self._Agent_location - self._YellowCheese_location, ord=1)
        whitecheese_dis = np.linalg.norm(self._Agent_location - self._WhiteCheese_location, ord=1)
        redbellpepper_dis = np.linalg.norm(self._Agent_location - self._RedBellPepper_location, ord=1)
        greenbellpepper_dis = np.linalg.norm(self._Agent_location - self._GreenBellPepper_location, ord=1)
        greenapple_dis = np.linalg.norm(self._Agent_location - self._GreenApple_location, ord=1)
        yellowegg_dis = np.linalg.norm(self._Agent_location - self._YellowEgg_location, ord=1)
        whiteegg_dis = np.linalg.norm(self._Agent_location - self._WhiteEgg_location, ord=1)
        butter_dis = np.linalg.norm(self._Agent_location - self._Butter_location, ord=1)
        ham_dis = np.linalg.norm(self._Agent_location - self._Ham_location, ord=1)
        redmeat_dis = np.linalg.norm(self._Agent_location - self._RedMeat_location, ord=1)
        whitemeat_dis = np.linalg.norm(self._Agent_location - self._WhiteMeat_location, ord=1)
        salt_dis = np.linalg.norm(self._Agent_location - self._Salt_location, ord=1)
        herbs_dis = np.linalg.norm(self._Agent_location - self._Herbs_location, ord=1)
        pot_dis = np.linalg.norm(self._Agent_location - self._Pot_location, ord=1)
        oven_dis = np.linalg.norm(self._Agent_location - self._Oven_location, ord=1)
        knife_dis = np.linalg.norm(self._Agent_location - self._Knife_location, ord=1)
        mixingbowl_dis = np.linalg.norm(self._Agent_location - self._MixingBowl_location, ord=1)
        plate_dis = np.linalg.norm(self._Agent_location - self._Plate_location, ord=1)

        # Add each distance to a dictionary
        distances = {
            "distance to Red Apple": redapple_dis,
            "distance to Red Onions": redonions_dis,
            "distance to Yellow Onions": yellowonions_dis,
            "distance to Bread": bread_dis,
            "distance to Yellow Cheese": yellowcheese_dis,
            "distance to White Cheese": whitecheese_dis,
            "distance to Red Bell Pepper": redbellpepper_dis,
            "distance to Green Bell Pepper": greenbellpepper_dis,
            "distance to Green Apple": greenapple_dis,
            "distance to Yellow Egg": yellowegg_dis,
            "distance to White Egg": whiteegg_dis,
            "distance to Butter": butter_dis,
            "distance to Ham": ham_dis,
            "distance to Red Meat (Beef)": redmeat_dis,
            "distance to White Meat (Turkey)": whitemeat_dis,
            "distance to Salt": salt_dis,
            "distance to Herbs": herbs_dis,
            "distance to Pot": pot_dis,
            "distance to Oven": oven_dis,
            "distance to Knife": knife_dis,
            "distance to Mixing Bowl": mixingbowl_dis,
            "distance to Plate": plate_dis
        }

        return distances, self.taking_item

    def taking(self):
        pos = self._Agent_location
        x = pos[0]
        y = pos[1]
        if self.map_info[x][y]!=None:
          if self.map_info[x][y] in self.food_list:
            self.taking_item.append(self.map_info[x][y])
            return True, "U took the " + str(self.taking_item), self.map_info[x][y]
          else:
             item = self.map_info[x][y]
             has_item = self.food_on[item]
             if has_item != None:
                self.taking_item.append(has_item)
                return True, "U got "+str(has_item)+" from "+str(item), has_item
             else:
                return False, "U got nothing from "+str(item), None
        else:
          False,"Nothing", None



    def drop(self,item):
      if len(self.taking_item)==0:
        return False, "U did not take anythings", None
      else:
        pos = self._Agent_location
        x = pos[0]
        y = pos[1]
        if self.map_info[x][y]==None:
          info = "U need to drop food on tools"
          return False, info, None
        else:
          if self.map_info[x][y] in self.food_list:
            return False, "U can not drop on food loc", None
          elif self.map_info[x][y] in self.tools_list or self.map_info[x][y]=="Plate":
            v = self.food_on[self.map_info[x][y]]
            if v != None:
              v.append(item)
              self.taking_item.remove(item)
              return True, "U dropped "+str(item)+" on "+str(self.map_info[x][y]), item
            else:
              self.food_on[self.map_info[x][y]] = [item]
              self.taking_item.remove(item)
              return True, "U dropped "+str(item)+" on "+str(self.map_info[x][y]), item
              



    def step(self,action,target):
        if action >=0 and action <=3:
          direction = self._action_to_direction[action]
          self._Agent_location += direction
          points, terminate = self.rewardMoving(target)
          self._Agent_location = np.clip(self._Agent_location, 0, self.size - 1)
        if action == 4: ##taking
            getting, message, item = self.taking()
            if getting:
              points, terminate = self.rewardTaking(target,item)
            else:
              points = -0.02
              terminate = False
        if action == 5: #prepare
            getting, message, item = self.drop(target)
            if getting:
              points = 0.02
              terminate = False
            else:
              points = -0.02
              terminate = False
        
           
        self.counter += 1
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()


        return observation, points, terminate, info


    def rewardMoving(self,target):
        pos = self._Agent_location
        x = pos[0]
        y = pos[1]
        
        if target in self.tools_list or target in self.food_list:
          if self.map_info[x][y] == target:
              return 0.03, False
        if target in self.guest_list:
            if self.map_info[x][y] == target:
              return 1, True
        if (self._agent_location < 0).any() or (self._agent_location >= self.size).any():
            return -0.05, False
        return -0.01, False
    
    def rewardTaking(self, target,item_get):
        if item_get == target:
           return 0.02, False
        else:
           return -0.02, False
        
    def prepareReward(self,tool):
        pos = self._Agent_location
        x = pos[0]
        y = pos[1]
        if self.map_info[x][y] in self.tools_list:
           return 0.02,True
        else:
           return -0.02, False
           

           
        






    # def check_reach_plate(self):
    #     if np.array_equal(self._Plate_location, self._agent_location):
    #         self.has_apple = True
    #         return True
    #     else:
    #         return False

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.counter = 0
        self.has_apple = False
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        self._Apple_location = self._agent_location
        while np.array_equal(self._Apple_location, self._agent_location):
          self._Apple_location = self.np_random.integers(
            0, self.size, size=2, dtype=int
        )

        self._Plate_location = self._agent_location
        while np.array_equal(self._Plate_location, self._agent_location) or np.array_equal(self._Plate_location, self._Apple_location):
          self._Plate_location = self.np_random.integers(
            0, self.size, size=2, dtype=int
        )

        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, info

    def render(self):
      if self.render_mode == "rgb_array":
        return self._render_frame()

    


    def _render_frame(self):
      surface_size = (self.window_size, self.window_size)  # Example: 900x900 window size
      self.window_size = max(surface_size)

      if self.render_mode == "human":
          if self.window is None:
              self.clock = pygame.time.Clock()
              pygame.init()
              pygame.display.init()
              self.window = pygame.display.set_mode(surface_size)

      canvas = pygame.Surface(surface_size)
      canvas.fill((255, 255, 255))

      # Scale factor for the grid squares
      s = self.window_size / 30  # Adjust '30' to your grid size
      pix_square_size = s
      agent_rgb = (0, 0, 0)
      apple_rgb = (255, 0, 0)
      plate_rgb = (255, 0, 255)


      # Draw the agent, apple, and plate with adjusted sizes
      pygame.draw.rect(canvas, agent_rgb, pygame.Rect(pix_square_size * self._A个ent_location+5, (pix_square_size * 0.7, pix_square_size * 0.7)))
      pygame.draw.circle(canvas, apple_rgb, (self._Apple_location + 0.5) * pix_square_size, pix_square_size / 3)
      pygame.draw.circle(canvas, plate_rgb, (self._Plate_location + 0.5) * pix_square_size, pix_square_size / 3)

      # Draw the grid
      for i in range(31):  # Adjust the range for your grid size
          the_pix = pix_square_size * i
          pygame.draw.line(canvas, 0, (0, the_pix), (self.window_size, the_pix), width=3)
          pygame.draw.line(canvas, 0, (the_pix, 0), (the_pix, self.window_size), width=3)

      if self.render_mode != "human":
          return np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))
      else:
          self.window.blit(canvas, canvas.get_rect())
          pygame.event.pump()
          pygame.display.update()
          self.clock.tick(self.metadata["render_fps"])


      def close(self):
        if self.window != None:
            pygame.display.quit()
            pygame.quit()
            self.window = None

In [21]:
env = GridWorldEnv(render_mode='rgb_array')

theM = env.map_info
for i in range(len(theM)):
    print(theM[i])

[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Pot', None, None, None, None, None, None, None, None, None]
[None, None, None, None, None, None, None, None, None, 'Oven', None, None, None, None, None, None, None, None, None, None, None, None, None, 'YellowCheese', None, None, None, None, None, None]
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Knife']
[None, None, None, None, None, None, None, 'RedBellPepper', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, None, None, None, None, None, None, None, None, None, None, N

In [22]:
a,b = env._get_info()

In [23]:
print(a)
print(b)

{'distance to Red Apple': 16.0, 'distance to Red Onions': 6.0, 'distance to Yellow Onions': 21.0, 'distance to Bread': 10.0, 'distance to Yellow Cheese': 33.0, 'distance to White Cheese': 14.0, 'distance to Red Bell Pepper': 15.0, 'distance to Green Bell Pepper': 4.0, 'distance to Green Apple': 19.0, 'distance to Yellow Egg': 8.0, 'distance to White Egg': 15.0, 'distance to Butter': 8.0, 'distance to Ham': 30.0, 'distance to Red Meat (Beef)': 16.0, 'distance to White Meat (Turkey)': 10.0, 'distance to Salt': 16.0, 'distance to Herbs': 34.0, 'distance to Pot': 31.0, 'distance to Oven': 19.0, 'distance to Knife': 38.0, 'distance to Mixing Bowl': 29.0, 'distance to Plate': 10.0}
[]


In [24]:
print(env._Agent_location)

[15  4]
