Skip to content

Commit 41b3a0f

Browse files
feat(env): add print_history helper
- the paths back from fragile weren't producing the right states (maybe my error) but the agents have all their history internally, so this helper replays the agent history from the given state and prints it out. - regenerate api docs
1 parent d04366e commit 41b3a0f

File tree

4 files changed

+109
-7
lines changed

4 files changed

+109
-7
lines changed

libraries/mathy_python/mathy/env.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,14 +311,52 @@ def print_state(
311311
)
312312
)
313313

314+
def is_terminal_state(self, env_state: MathyEnvState) -> bool:
315+
"""Determine if a given state is terminal or not.
316+
317+
# Arguments
318+
env_state (MathyEnvState): The state to inspect
319+
320+
# Returns
321+
(bool): A boolean indicating if the state is terminal or not.
322+
"""
323+
return is_terminal_transition(self.get_state_transition(env_state))
324+
325+
def print_history(self, env_state: MathyEnvState) -> None:
326+
"""Render the history of an episode from a given state.
327+
328+
# Arguments
329+
env_state (MathyEnvState): The state to render the history of.
330+
"""
331+
history: List[MathyEnvStateStep] = env_state.agent.history[:]
332+
initial_step: MathyEnvStateStep = history.pop(0)
333+
curr_state: MathyEnvState = MathyEnvState(
334+
problem=initial_step.raw, max_moves=env_state.max_moves,
335+
)
336+
self.print_state(curr_state, "initial-state")
337+
while not self.is_terminal_state(curr_state):
338+
step: MathyEnvStateStep = history.pop(0)
339+
curr_state, transition, change = self.get_next_state(
340+
curr_state, step.action + (step.focus * len(self.rules))
341+
)
342+
rule_idx, token_idx = self.get_action_indices(step.action)
343+
rule: BaseRule = self.rules[rule_idx]
344+
self.print_state(
345+
env_state=curr_state,
346+
action_name=rule.name[:25].lower(),
347+
token_index=int(f"{step.focus}".zfill(3)),
348+
change=change,
349+
change_reward=transition.reward,
350+
)
351+
314352
def render_state(
315353
self,
316354
env_state: MathyEnvState,
317355
action_name: str,
318356
token_index: int = -1,
319357
change: ExpressionChangeRule = None,
320358
change_reward: float = 0.0,
321-
):
359+
) -> str:
322360
"""Render the given state to a string suitable for printing to a log"""
323361
changed_problem = env_state.agent.problem
324362
if change is not None and change.result is not None:

libraries/mathy_python/tests/test_envs.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,20 @@ def test_env_terminal_conditions():
4747
for text, is_win in expectations + out_of_scope_valid:
4848
env_state = MathyEnvState(problem=text)
4949
reward = env.get_state_transition(env_state)
50+
assert text == text and env.is_terminal_state(env_state) == bool(is_win)
5051
assert text == text and is_terminal_transition(reward) == bool(is_win)
5152

5253

54+
def test_print_history():
55+
env = PolySimplify()
56+
env_state = MathyEnvState(problem="4x+2")
57+
for i in range(10):
58+
env_state = env_state.get_out_state(
59+
problem="2+4x", focus=i, moves_remaining=10 - i, action=i
60+
)
61+
assert env.print_history(env_state) is None
62+
63+
5364
def test_env_finalize_state():
5465
env = PolySimplify()
5566

libraries/website/docs/api/env.md

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ MathyEnv(
77
rules: List[mathy.core.rule.BaseRule] = None,
88
max_moves: int = 20,
99
verbose: bool = False,
10+
error_invalid: bool = False,
1011
reward_discount: float = 0.99,
1112
)
1213
```
@@ -144,10 +145,10 @@ __Returns__
144145
MathyEnv.get_token_at_index(
145146
self,
146147
expression: mathy.core.expressions.MathExpression,
147-
focus_index: int,
148+
index: int,
148149
) -> Optional[mathy.core.expressions.MathExpression]
149150
```
150-
Get the token that is `focus_index` from the left of the expression
151+
Get the token that is `index` from the left of the expression
151152
### get_valid_moves
152153
```python
153154
MathyEnv.get_valid_moves(self, env_state:mathy.state.MathyEnvState) -> List[int]
@@ -176,6 +177,20 @@ MathyEnv.get_win_signal(self, env_state:mathy.state.MathyEnvState) -> float
176177
Calculate the reward value for completing the episode. This is done
177178
so that the reward signal can be scaled based on the time it took to
178179
complete the episode.
180+
### is_terminal_state
181+
```python
182+
MathyEnv.is_terminal_state(self, env_state:mathy.state.MathyEnvState) -> bool
183+
```
184+
Determine if a given state is terminal or not.
185+
186+
__Arguments__
187+
188+
- __env_state (MathyEnvState)__: The state to inspect
189+
190+
__Returns__
191+
192+
`(bool)`: A boolean indicating if the state is terminal or not.
193+
179194
### max_moves_fn
180195
```python
181196
MathyEnv.max_moves_fn(
@@ -185,6 +200,16 @@ MathyEnv.max_moves_fn(
185200
) -> int
186201
```
187202
Return the environment specific maximum move count for a given prolem.
203+
### print_history
204+
```python
205+
MathyEnv.print_history(self, env_state:mathy.state.MathyEnvState) -> None
206+
```
207+
Render the history of an episode from a given state.
208+
209+
__Arguments__
210+
211+
- __env_state (MathyEnvState)__: The state to render the history of.
212+
188213
### print_state
189214
```python
190215
MathyEnv.print_state(
@@ -227,7 +252,7 @@ MathyEnv.render_state(
227252
token_index: int = -1,
228253
change: mathy.core.rule.ExpressionChangeRule = None,
229254
change_reward: float = 0.0,
230-
)
255+
) -> str
231256
```
232257
Render the given state to a string suitable for printing to a log
233258
### state_to_observation

libraries/website/docs/api/state.md

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ MathyAgentState(
99
problem_type,
1010
reward = 0.0,
1111
history = None,
12-
focus_index = 0,
13-
last_action = None,
1412
)
1513
```
1614
The state related to an agent for a given environment state
@@ -35,13 +33,23 @@ This allocation strategy requires more memory but removes a class
3533
of potential issues around unintentional sharing of data and mutation
3634
by two different sources.
3735

36+
### from_np
37+
```python
38+
MathyEnvState.from_np(input_bytes:numpy.ndarray) -> 'MathyEnvState'
39+
```
40+
Convert a numpy object into a state object
41+
### from_string
42+
```python
43+
MathyEnvState.from_string(input_string:str) -> 'MathyEnvState'
44+
```
45+
Convert a string representation of state into a state object
3846
### get_out_state
3947
```python
4048
MathyEnvState.get_out_state(
4149
self,
4250
problem: str,
51+
focus: int,
4352
action: int,
44-
focus_index: int,
4553
moves_remaining: int,
4654
) -> 'MathyEnvState'
4755
```
@@ -60,11 +68,31 @@ __Example__
6068
- `mycorp.envs.solve_impossible_problems` -> `[12375561, -2838517]`
6169

6270

71+
### to_np
72+
```python
73+
MathyEnvState.to_np(self) -> numpy.ndarray
74+
```
75+
Convert a state object into a numpy representation
76+
### to_observation
77+
```python
78+
MathyEnvState.to_observation(
79+
self,
80+
move_mask: Optional[List[int]] = None,
81+
hash_type: Optional[List[int]] = None,
82+
parser: Optional[mathy.core.parser.ExpressionParser] = None,
83+
) -> mathy.state.MathyObservation
84+
```
85+
Convert a state into an observation
6386
### to_start_observation
6487
```python
6588
MathyEnvState.to_start_observation(self) -> mathy.state.MathyObservation
6689
```
6790
Generate an episode start MathyObservation
91+
### to_string
92+
```python
93+
MathyEnvState.to_string(self) -> str
94+
```
95+
Convert a state object into a string representation
6896
## MathyEnvStateStep
6997
```python
7098
MathyEnvStateStep(self, /, *args, **kwargs)

0 commit comments

Comments
 (0)