# Testing grounds

## Setup

### Imports

In [1]:
import os
import numpy as np
from main import load_agent_model, train
from gymnasium.wrappers.flatten_observation import FlattenObservation
from footsies_gym.envs.footsies import FootsiesEnv
from footsies_gym.wrappers.normalization import FootsiesNormalized
from footsies_gym.wrappers.action_comb_disc import FootsiesActionCombinationsDiscretized
from footsies_gym.wrappers.statistics import FootsiesStatistics
from footsies_gym.wrappers.frame_skip import FootsiesFrameSkipped
from importlib import reload
import pprint

pp = pprint.PrettyPrinter(indent=4)

### Environment

In [2]:
human_testing_kwargs = {
    "fast_forward": False,
    "vs_player": True,
}

normal_testing_kwargs = {
    "frame_delay": 0,
    "dense_reward": True,
}

different_addresses_kwargs = {
    "game_port": 14000
}

In [65]:
footsies_env = FootsiesEnv(
    game_path="../Footsies-Gym/Build/FOOTSIES.x86_64",
    **normal_testing_kwargs,
    # **human_testing_kwargs,
    **different_addresses_kwargs,
    render_mode="human",
    log_file=os.path.join(os.getcwd(), "out.log"),
    log_file_overwrite=True,
)

statistics = FootsiesStatistics(footsies_env)

FRAME_SKIP = True

env = FootsiesActionCombinationsDiscretized(
    FlattenObservation(
        FootsiesNormalized(statistics)
        if not FRAME_SKIP
        else FootsiesFrameSkipped(FootsiesNormalized(statistics))
    )
)

## Environment testing

In [177]:
env.close()

In [66]:
obs, info = env.reset()
print("Observation:")
print(pp.pprint(obs))
print()
print("Info:")
print(pp.pprint(info))

{'guard': [1.0, 1.0], 'move': [0, 0], 'move_frame': [0.0, 0.0], 'position': [-0.45454545454545453, 0.45454545454545453]}
Observation:
array([ 1.        ,  1.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.45454547,  0.45454547])
None

Info:
{   'frame': 0,
    'p1_action': (False, False, False),
    'p2_action': (False, False, False)}
None


In [176]:
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
print("Observation:")
print(pp.pprint(obs))
print()
print("Reward:", reward)
print("Terminated:", terminated)
print("Truncated:", truncated)
print()
print("Info:")
print(pp.pprint(info))

{'guard': [0.0, 0.0], 'move': [0, 1], 'move_frame': [0.0, 0.0], 'position': [-0.8381816473874178, 0.24909024888818912]}
Observation:
array([ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.83818167,  0.24909025])
None

Reward: 0.0
Terminated: False
Truncated: False

Info:
{   'frame': 1002,
    'p1_action': (False, False, False),
    'p2_action': (True, False, False)}
None


In [9]:
for e in range(5):
    print("Env reset")
    obs, info = env.reset()
    terminated, truncated = False, False

    while not (terminated or truncated):
        obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
        if reward != 0.0:
            print(reward)

Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7


In [10]:
footsies_env.hard_reset()

## Brisket testing

In [6]:
import torch
from agents.brisket.agent import FootsiesAgent as BrisketAgent
from agents.brisket.loggables import get_loggables as get_brisket_loggables
from agents.logger import TrainingLoggerWrapper

For reloading in case changes were made

In [62]:
import agents.brisket.agent
import agents.logger
reload(agents.brisket.agent)
reload(agents.logger)

<module 'agents.logger' from '/home/martinho/projects/footsies-agents/agents/logger.py'>

In [7]:
brisket = BrisketAgent(
    observation_space=env.observation_space,
    action_space=env.action_space,
    
    # For testing
    # epsilon=0,
    # epsilon_decay_rate=0,
    # min_epsilon=0,
)

In [29]:
brisket = TrainingLoggerWrapper(
    brisket,
    float("+inf"),  # never log
    cummulative_reward=False,
    win_rate=False,
    test_states_number=5000,
    **get_brisket_loggables(brisket),
)

In [31]:
brisket.preprocess(env)

In [30]:
load_agent_model(brisket, "brisket_sparse")

Agent loaded


In [20]:
obs, info = env.reset()

In [21]:
obs_t = torch.tensor(obs, dtype=torch.float32).reshape(1, -1)

In [22]:
obs_t

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000, -0.4545,  0.4545]])

In [25]:
[brisket.agent.q_value(obs_t, brisket.agent.action_oh(a)) for a in range(8)]

[0.13985449075698853,
 0.07847494632005692,
 0.11667368561029434,
 0.04185425862669945,
 0.21139173209667206,
 0.13351216912269592,
 0.10901963710784912,
 0.15006758272647858]

### Test output of a single state

In [39]:
experience = np.array([t[0] for t in brisket.test_states])

In [44]:
(experience[:, 0] != 1.0).sum()

1018

In [74]:
from footsies_gym.utils import get_dict_obs_from_vector_obs
from footsies_gym.wrappers.normalization import FootsiesNormalized

In [80]:
import footsies_gym.wrappers.normalization
import footsies_gym.utils

In [79]:
reload(footsies_gym.utils)
reload(footsies_gym.wrappers.normalization)

<module 'footsies_gym.wrappers.normalization' from '/home/martinho/projects/Footsies-Gym/footsies-gym/footsies_gym/wrappers/normalization.py'>

In [77]:
FootsiesNormalized = footsies_gym.wrappers.normalization.FootsiesNormalized

In [81]:
footsies_gym.utils.get_dict_obs_from_vector_obs(e, normalized=True, flattened=True)

{'guard': [3.0, 3.0],
 'move': [0, 0],
 'move_frame': [0.0, 0.0],
 'position': [-2.0000000596046448, 2.0000000596046448]}

In [47]:
e = experience[0, :]

In [60]:
np.argwhere(e[17:17+15] == 1.0).item()

0

In [64]:
e[34]

-0.4545454680919647

In [50]:
list(e[2:])

[1.0, 1.0]

### Brisket training

In [10]:
footsies_env.hard_reset()

In [9]:
train(brisket, env, 10)

100%|██████████| 10/10 [00:07<00:00,  1.29it/s]


In [15]:
sum(statistics.metric_special_moves_per_episode)

264

In [16]:
env.unwrapped.hard_reset()

In [17]:
obs, info = env.reset()

In [18]:
obs

array([ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.04166667,  0.04166667,
       -2.        ,  2.        ])

In [19]:
obs, _, _, _, _ = env.step(1)

## ...