In [6]:
import openai
import httpx
import os

deployment_name = "rl_agent"
openai.api_type = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = "https://smerrill-openai-test.openai.azure.com/"
openai.api_version = "2023-09-15-preview"

In [10]:
# Defining a function to send the prompt to the model
def send_message(messages, model_name, max_response_tokens=500):
    response = openai.ChatCompletion.create(
        engine=model_name,
        messages=messages,
        temperature=0.5,
        max_tokens=max_response_tokens,
        top_p=0.9,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response['choices'][0]['message']['content']

# Defining a function to print out the conversation in a readable format
def print_conversation(messages):
    for message in messages:
        print(f"[{message['role'].upper()}]")
        print(message['content'])
        print()

In [157]:
base_system_message = """
You run in a loop of Thought, Action, PAUSE, Observation.
You are helping an agent navigate in a 2 dimensional grid world.  The agent starts at position (0, 0) and you're job is to help him get to the goal cell. The goal cell can be anywhere in the gridworld.
You will neeed to direct the agent to take actions to move him to the goal cell.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.

Your available actions are:

find(agent or goal):
e.g. returns the grid position of the agent or goal cell

left:
e.g. moves the agent left.  If the agent is at cell (0, 1) it moves him to cell (0, 0)

right:
e.g. moves the agent right.  If the agent is at cell (0, 0) it moves him to cell (0, 1)

up:
e.g. moves the agent up.  If the agent is at cell (0, 0) it moves him to cell (1, 0)

down:
e.g. moves the agent down.  If the agent is at cell (1, 0) it moves him to cell (0, 0)

Example session:

Task: Agent is in a 5 X 5 grid world
Thought: I should identify where the goal is
Action: find
PAUSE

Observation: The goal cell is at grid position (0, 1)
Thought: Since the agent is at position (0, 0) and the goal cell is at position (0, 1), I should move right
Action right
PAUSE

You will be called again with this
Observation: Congrats you have helped the agent reach the goal
"""

system_message = f"{base_system_message.strip()}"
print(system_message)

You run in a loop of Thought, Action, PAUSE, Observation.
You are helping an agent navigate in a 2 dimensional grid world.  The agent starts at position (0, 0) and you're job is to help him get to the goal cell. The goal cell can be anywhere in the gridworld.
You will neeed to direct the agent to take actions to move him to the goal cell.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.

Your available actions are:

find(agent or goal):
e.g. returns the grid position of the agent or goal cell

left:
e.g. moves the agent left.  If the agent is at cell (0, 1) it moves him to cell (0, 0)

right:
e.g. moves the agent right.  If the agent is at cell (0, 0) it moves him to cell (0, 1)

up:
e.g. moves the agent up.  If the agent is at cell (0, 0) it moves him to cell (1, 0)

down:
e.g. moves the agent down.  If the agent is at cell (1, 0) it moves him to cell (0, 0)

Example session:

Task: Agent is in a 5 X 5 

In [158]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np

class GridWorldEnv(gym.Env):
    metadata = {'render.modes': ['human', 'ansi']}
    
    def __init__(self, grid_size=(5, 5), start_pos=(0, 0), goal_pos=(2, 2)):
        self.grid_size = grid_size
        self.start_pos = start_pos
        self.goal_pos = goal_pos
        self.agent_pos = start_pos
        
        self.action_space = spaces.Discrete(4)  # up, down, left, right
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.grid_size[0], self.grid_size[1]), dtype=np.float32)
        
        self._seed()
        self.reset()
        
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    
    def reset(self):
        self.agent_pos = self.start_pos
        self.grid = np.zeros(self.grid_size)
        self.grid[self.goal_pos[0], self.goal_pos[1]] = 1  # Set goal position
        return self._get_observation()
    
    def step(self, action):
        if action == 0:  # up
            self.agent_pos = (max(0, self.agent_pos[0] - 1), self.agent_pos[1])
        elif action == 1:  # down
            self.agent_pos = (min(self.grid_size[0] - 1, self.agent_pos[0] + 1), self.agent_pos[1])
        elif action == 2:  # left
            self.agent_pos = (self.agent_pos[0], max(0, self.agent_pos[1] - 1))
        elif action == 3:  # right
            self.agent_pos = (self.agent_pos[0], min(self.grid_size[1] - 1, self.agent_pos[1] + 1))
        
        done = self.agent_pos == self.goal_pos
        reward = 1 if done else 0
        return self._get_observation(), reward, done, {}
    
    def _get_observation(self):
        obs = np.zeros(self.grid_size)
        obs[self.agent_pos[0], self.agent_pos[1]] = 1
        return obs
    
    def render(self, mode='human'):
        if mode == 'human':
            for i in range(self.grid_size[0]):
                for j in range(self.grid_size[1]):
                    if (i, j) == self.agent_pos:
                        print('A', end=' ')
                    elif (i, j) == self.goal_pos:
                        print('G', end=' ')
                    else:
                        print('.', end=' ')
                print()
            print()
        elif mode == 'ansi':
            return None
        else:
            super(GridWorldEnv, self).render(mode=mode)

In [236]:
env = GridWorldEnv()
obs = env.reset()
env.render()

A . . . . 
. . . . . 
. . G . . 
. . . . . 
. . . . . 



In [214]:
# This is the first user message that will be sent to the model. Feel free to update this.
user_message = "the agent is in a 5 X 5 gridwolrd"

# Create the list of messages. role can be either "user" or "assistant" 
messages=[
    {"role": "system", "content": system_message},
    {"role": "user", "name":"example_user", "content": user_message}
]

### First Action

In [215]:
max_response_tokens = 1000

response = send_message(messages, deployment_name, max_response_tokens)
messages.append({"role": "assistant", "content": response})

In [216]:
print(response)

Thought: I should identify where the goal is
Action: find(goal)
PAUSE


### First Observation

In [237]:
env.render()

A . . . . 
. . . . . 
. . G . . 
. . . . . 
. . . . . 



In [217]:
#message from environment: 
obs = f'Observation: (2, 2)'
messages.append({"role": "user", "content": obs})

### Second Action

In [219]:
response = send_message(messages, deployment_name, max_response_tokens)
print(response)

Thought: The goal is at position (2, 2). I should direct the agent to move towards that position.
Action: right
PAUSE


### Second Observation

In [238]:
env.step(3)
env.render()

. A . . . 
. . . . . 
. . G . . 
. . . . . 
. . . . . 



In [220]:
obs = f'Observation: Agent has moved right, now at (0, 1)'
messages.append({"role": "user", "content": obs})

### Third Action

In [221]:
response = send_message(messages, deployment_name, max_response_tokens)
print(response)

Thought: The agent is now at (0, 1) and the goal is at (2, 2). I should move the agent down and then right to reach the goal.
Action: down
PAUSE


### Third Observation

In [239]:
env.step(1)
env.render()

. . . . . 
. A . . . 
. . G . . 
. . . . . 
. . . . . 



In [222]:
obs = f'Observation: Agent moved down, now at (1, 1)'
messages.append({"role": "user", "content": obs})

### Fourth Action

In [223]:
response = send_message(messages, deployment_name, max_response_tokens)
print(response)

Thought: The goal is at (2, 2) and the agent is at (1, 1). I should move the agent down and then right.
Action: down
PAUSE


### Fourth Observation

In [240]:
env.step(1)
env.render()

. . . . . 
. . . . . 
. A G . . 
. . . . . 
. . . . . 



In [224]:
obs = f'Observation: Agent moved down, now at (1, 2)'
messages.append({"role": "user", "content": obs})

### Fifth Action

In [225]:
response = send_message(messages, deployment_name, max_response_tokens)
print(response)

Thought: The goal cell is at position (2, 2) and the agent is currently at position (1, 2). I should move the agent right to get closer to the goal cell.
Action: right
PAUSE


### Fifth Observation

In [241]:
env.step(3)
env.render()

. . . . . 
. . . . . 
. . A . . 
. . . . . 
. . . . . 



In [226]:
obs = f'Observation: Agent moved right, now at (2, 2)'
messages.append({"role": "user", "content": obs})

### Sixth Action

In [227]:
response = send_message(messages, deployment_name, max_response_tokens)
print(response)

Thought: The agent has reached the goal cell at (2, 2). Congrats!
