# Testing grounds

## Setup

### Imports

In [1]:
import os
import numpy as np
from main import load_agent_model, train
import gymnasium as gym
from gymnasium.wrappers.flatten_observation import FlattenObservation
from footsies_gym.envs.footsies import FootsiesEnv
from footsies_gym.wrappers.normalization import FootsiesNormalized
from footsies_gym.wrappers.action_comb_disc import FootsiesActionCombinationsDiscretized
from footsies_gym.wrappers.statistics import FootsiesStatistics
from footsies_gym.wrappers.frame_skip import FootsiesFrameSkipped
from importlib import reload
import pprint

pp = pprint.PrettyPrinter(indent=4)

### Environment

#### Footsies

In [2]:
human_testing_kwargs = {
    "fast_forward": False,
    "vs_player": True,
}

normal_testing_kwargs = {
    "frame_delay": 0,
    "dense_reward": True,
}

different_addresses_kwargs = {
    "game_port": 14000,
    "opponent_port": 14001,
}

In [3]:
footsies_env = FootsiesEnv(
    game_path="../Footsies-Gym/Build/FOOTSIES.x86_64",
    **normal_testing_kwargs,
    # **human_testing_kwargs,
    **different_addresses_kwargs,
    render_mode="human",
    log_file=os.path.join(os.getcwd(), "out.log"),
    log_file_overwrite=True,
)

statistics = FootsiesStatistics(footsies_env)

FRAME_SKIP = True

env = FootsiesActionCombinationsDiscretized(
    FlattenObservation(
        FootsiesNormalized(statistics)
        if not FRAME_SKIP
        else FootsiesFrameSkipped(FootsiesNormalized(statistics))
    )
)

#### CartPole

In [3]:
env = gym.make("CartPole-v1")

## Environment testing

In [4]:
def report_env(obs, info, reward = None, terminated = None, truncated = None):
    print("Observation:")
    pp.pprint(obs)

    if reward is not None or terminated is not None or truncated is not None:
        print()
    if reward is not None:
        print("Reward:", reward)
    if terminated is not None:
        print("Terminated:", terminated)
    if truncated is not None:
        print("Truncated:", truncated)

    print()
    print("Info:")
    pp.pprint(info)

In [66]:
obs, info = env.reset()
report_env(obs, info)

{'guard': [1.0, 1.0], 'move': [0, 0], 'move_frame': [0.0, 0.0], 'position': [-0.45454545454545453, 0.45454545454545453]}
Observation:
array([ 1.        ,  1.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.45454547,  0.45454547])
None

Info:
{   'frame': 0,
    'p1_action': (False, False, False),
    'p2_action': (False, False, False)}
None


In [176]:
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
report_env(obs, info, reward, terminated, truncated)

{'guard': [0.0, 0.0], 'move': [0, 1], 'move_frame': [0.0, 0.0], 'position': [-0.8381816473874178, 0.24909024888818912]}
Observation:
array([ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.83818167,  0.24909025])
None

Reward: 0.0
Terminated: False
Truncated: False

Info:
{   'frame': 1002,
    'p1_action': (False, False, False),
    'p2_action': (True, False, False)}
None


In [9]:
for e in range(5):
    print("Env reset")
    obs, info = env.reset()
    terminated, truncated = False, False

    while not (terminated or truncated):
        obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
        if reward != 0.0:
            print(reward)

Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7
Env reset
0.3
0.7


## Brisket testing

### Create agent

In [5]:
import torch
from agents.brisket.agent import FootsiesAgent as BrisketAgent
from agents.brisket.loggables import get_loggables as get_brisket_loggables
from agents.logger import TrainingLoggerWrapper

For reloading in case changes were made

In [21]:
import agents.brisket.agent
import agents.logger
import agents.base
reload(agents.brisket.agent)
reload(agents.logger)
reload(agents.base)

<module 'agents.base' from '/home/martinho/projects/footsies-agents/agents/base.py'>

In [6]:
brisket = {
    "Footsies": lambda: BrisketAgent(
        observation_space=env.observation_space,
        action_space=env.action_space,
        
        # For testing
        epsilon=0,
        epsilon_decay_rate=0,
        min_epsilon=0,
    ),
    "CartPole": lambda: BrisketAgent(
        observation_space=env.observation_space,
        action_space=env.action_space,

        shallow=True,
        shallow_size=4,
        epsilon_decay_rate=0.001,
    )
}["Footsies"]()

In [29]:
brisket = TrainingLoggerWrapper(
    brisket,
    float("+inf"),  # never log
    cummulative_reward=False,
    win_rate=False,
    test_states_number=5000,
    **get_brisket_loggables(brisket),
)

In [7]:
load_agent_model(brisket, "brisket_frameskipped")

Agent loaded


### Test output of a single state

In [39]:
experience = np.array([t[0] for t in brisket.test_states])

In [44]:
(experience[:, 0] != 1.0).sum()

1018

In [74]:
from footsies_gym.utils import get_dict_obs_from_vector_obs
from footsies_gym.wrappers.normalization import FootsiesNormalized

In [80]:
import footsies_gym.wrappers.normalization
import footsies_gym.utils

In [79]:
reload(footsies_gym.utils)
reload(footsies_gym.wrappers.normalization)

<module 'footsies_gym.wrappers.normalization' from '/home/martinho/projects/Footsies-Gym/footsies-gym/footsies_gym/wrappers/normalization.py'>

In [77]:
FootsiesNormalized = footsies_gym.wrappers.normalization.FootsiesNormalized

In [81]:
footsies_gym.utils.get_dict_obs_from_vector_obs(e, normalized=True, flattened=True)

{'guard': [3.0, 3.0],
 'move': [0, 0],
 'move_frame': [0.0, 0.0],
 'position': [-2.0000000596046448, 2.0000000596046448]}

In [47]:
e = experience[0, :]

In [60]:
np.argwhere(e[17:17+15] == 1.0).item()

0

In [64]:
e[34]

-0.4545454680919647

In [50]:
list(e[2:])

[1.0, 1.0]

### Training

In [8]:
train(brisket, env, 1000)

Preprocessing... done!


100%|██████████| 1000/1000 [05:17<00:00,  3.15it/s]


In [9]:
env.close()

In [10]:
print(sum(statistics.metric_special_moves_per_episode))
print(sum(statistics.metric_special_moves_from_neutral_per_episode))

882
0


In [44]:
prev_obs = None
obs, info = env.reset()
report_env(obs, info)

Observation:
array([ 0.03174489, -0.02254564, -0.01391198,  0.02921619], dtype=float32)

Info:
{}


In [45]:
action = brisket.act(obs)
"Left" if action == 0 else "Right"

'Right'

In [47]:
prev_obs = obs
obs, reward, truncated, terminated, info = env.step(action)
report_env(obs, info, reward, truncated, terminated)

Observation:
array([ 0.03474943,  0.3680826 , -0.01868412, -0.56468004], dtype=float32)

Reward: 1.0
Terminated: False
Truncated: False

Info:
{}


In [50]:
brisket.q_values(brisket._obs_to_torch(obs))

[-0.47500109672546387, -0.2977886199951172]

### Statistics

In [15]:
print("Number of special moves:", sum(statistics.metric_special_moves_per_episode))

264

### Self-play test

In [8]:
pi = brisket.extract_policy(env)

In [11]:
footsies_env.set_opponent(pi)

False

In [12]:
train(brisket, env, 10)

Preprocessing... done!


  0%|          | 0/10 [00:16<?, ?it/s]

Game closed manually, quitting training





## ...