In [None]:
# Install:
# Kaggle environments.
!git clone https://github.com/Kaggle/kaggle-environments.git
!cd kaggle-environments && pip install .

# GFootball environment.
!apt-get update -y
!apt-get install -y libsdl2-gfx-dev libsdl2-ttf-dev

# Make sure that the Branch in git clone and in wget call matches !!
!git clone -b v2.3 https://github.com/google-research/football.git
!mkdir -p football/third_party/gfootball_engine/lib

!wget https://storage.googleapis.com/gfootball/prebuilt_gameplayfootball_v2.3.so -O football/third_party/gfootball_engine/lib/prebuilt_gameplayfootball.so
!cd football && GFOOTBALL_USE_PREBUILT_SO=1 pip3 install .

In [None]:
!pip install rl-replicas

In [None]:
import os

import gfootball
import gym
import torch
import torch.nn as nn

from rl_replicas.algorithms import TRPO
from rl_replicas.common.policy import Policy
from rl_replicas.common.value_function import ValueFunction
from rl_replicas.common.optimizers import ConjugateGradientOptimizer
from rl_replicas.common.torch_net import mlp

algorithm = "trpo"
environment = "GFootball-11_vs_11_kaggle-simple115v2-v0"
epochs = 5
# steps_per_epoch = 4000
steps_per_epoch = 250000
policy_architecture = [64, 64]
value_function_architecture = [64, 64]
value_function_learning_rate = 1e-3
output_dir = './trpo'

env: gym.Env = gym.make(environment)

policy_network = mlp(
    sizes = [env.observation_space.shape[0]]+policy_architecture+[env.action_space.n])

policy: Policy = Policy(
    network = policy_network,
    optimizer = ConjugateGradientOptimizer(params = policy_network.parameters()))

value_function_network = mlp(
    sizes = [env.observation_space.shape[0]]+value_function_architecture+[1])

value_function: ValueFunction = ValueFunction(
    network = value_function_network,
    optimizer = torch.optim.Adam(value_function_network.parameters(), lr = value_function_learning_rate))

model: TRPO = TRPO(policy, value_function, env, seed = 0)

print("an experiment to: {}".format(output_dir))

print("algorithm:           {}".format(algorithm))
print("epochs:              {}".format(epochs))
print("steps_per_epoch:     {}".format(steps_per_epoch))
print("environment:         {}".format(environment))

print("value_function_learning_rate: {}".format(value_function_learning_rate))
print("policy network:")
print(policy.network)

In [None]:
model.learn(
    epochs = epochs,
    steps_per_epoch = steps_per_epoch,
    output_dir = output_dir,
    model_saving = True
)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

episodes = range(1,416)

fig = plt.figure(figsize=(30, 5))
ax1 = fig.add_subplot(121)

ax1.set_title("Total Episodes vs. Average Episode Return")
ax1.set_xlabel("Episodes")
ax1.set_ylabel("Average Return")

sns.lineplot(x=episodes, y=reward, ax=ax1)
plt.show()

In [None]:
%%writefile ./agent.py
import time

import torch
import gfootball
import gym
from gfootball.env.wrappers import Simple115StateWrapper

from rl_replicas.common.policy import Policy
from rl_replicas.common.torch_net import mlp

start_setup_time: float = time.time()

num_observation = 115
num_action = 19
policy_network_architecture = [64, 64]
model_location = "./trpo/model.pt"
model = torch.load(model_location)

policy_network = mlp(
    sizes = [num_observation] + policy_network_architecture + [num_action])

policy_network.load_state_dict(model["policy_state_dict"])

policy: Policy = Policy(
    network = policy_network,
    optimizer = None
)

current_step: int = 0

print("Set up Time: {:<8.3g}".format(time.time() - start_setup_time))

def agent(observation):
    global policy
    global current_step

    start_time: float = time.time()
    current_step += 1

    raw_observation = observation["players_raw"]
    simple_115_observation = Simple115StateWrapper.convert_observation(raw_observation, fixed_positions = False)
    observation_tensor: torch.Tensor = torch.from_numpy(simple_115_observation).float()

    action = policy.predict(observation_tensor)
    
    if (current_step%100) == 0:
        print("Current Step: {}".format(current_step))

    one_step_time = time.time() - start_time
    if one_step_time >= 0.2:
        print("One Step Time exceeded 0.2 seconds: {:<8.3g}".format(one_step_time))

    return [action.item()]

In [None]:
%%writefile submission.py
from kaggle_environments.envs.football.helpers import *

# @human_readable_agent wrapper modifies raw observations 
# provided by the environment:
# https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#raw-observations
# into a form easier to work with by humans.
# Following modifications are applied:
# - Action, PlayerRole and GameMode enums are introduced.
# - 'sticky_actions' are turned into a set of active actions (Action enum)
#    see usage example below.
# - 'game_mode' is turned into GameMode enum.
# - 'designated' field is removed, as it always equals to 'active'
#    when a single player is controlled on the team.
# - 'left_team_roles'/'right_team_roles' are turned into PlayerRole enums.
# - Action enum is to be returned by the agent function.
@human_readable_agent

def agent(obs):
    # Make sure player is running.
    if Action.Sprint not in obs["sticky_actions"]:
        return Action.Sprint
    # We always control left team (observations and actions
    # are mirrored appropriately by the environment).
    controlled_player_pos = obs["left_team"][obs["active"]]
    # Does the player we control have the ball?
    if obs["ball_owned_player"] == obs["active"] and obs["ball_owned_team"] == 0:
        # Shot if we are 'close' to the goal (based on 'x' coordinate).
        if controlled_player_pos[0] > 0.5:
            return Action.Shot
        # Run towards the goal otherwise.
            return Action.Right
    else:
        # Run towards the ball.
        if obs["ball"][0] > controlled_player_pos[0] + 0.05:
            return Action.Right
        if obs["ball"][0] < controlled_player_pos[0] - 0.05:
            return Action.Left
        if obs["ball"][1] > controlled_player_pos[1] + 0.05:
            return Action.Bottom
        if obs["ball"][1] < controlled_player_pos[1] - 0.05:
            return Action.Top
        # Try to take over the ball if close to the ball.
        return Action.Slide

In [None]:
from kaggle_environments import make

env = make("football", 
           configuration = {
             "save_video": True, 
             "scenario_name": "11_vs_11_kaggle",
             "running_in_notebook": True,
           })

output = env.run(["./agent.py", "do_nothing"])[-1]

print("Left player: reward = {}, status = {}, info = {}".format(output[0]["reward"], output[0]["status"], output[0]["info"]))
print("Right player: reward = {}, status = {}, info = {}".format(output[1]["reward"], output[1]["status"], output[1]["info"]))

env.render(mode = "human", width = 800, height = 600)