In [None]:
from IPython.display import clear_output
import sys

In [None]:
# GFootball environment.
!pip install kaggle_environments
!apt-get update -y
!apt-get install -y libsdl2-gfx-dev libsdl2-ttf-dev
!git clone -b v2.3 https://github.com/google-research/football.git
!mkdir -p football/third_party/gfootball_engine/lib
!wget https://storage.googleapis.com/gfootball/prebuilt_gameplayfootball_v2.3.so -O football/third_party/gfootball_engine/lib/prebuilt_gameplayfootball.so
!cd football && GFOOTBALL_USE_PREBUILT_SO=1 pip3 install .

# Some helper code
!git clone https://github.com/garethjns/kaggle-football.git
!pip install pfrl==0.1.0
!pip install dgl 
!pip install gfootball 
clear_output()

In [None]:
env_name = "GFootballBase-v0"
sys.path.append("/kaggle/working/kaggle-football/")

In [None]:
import numpy as np 
import pandas as pd 

import os
import sys

import matplotlib.pyplot as plt
import pprint

import glob 

from tqdm import tqdm

import networkx as nx

import dgl
# PyTorch
import pfrl
from pfrl.agents import CategoricalDoubleDQN,A2C
from pfrl import experiments,explorers,replay_buffers,utils
from pfrl import nn as pnn
from pfrl.wrappers import atari_wrappers
from pfrl.q_functions import DistributionalDuelingDQN

import torch
from torch import nn

# TF
import tensorflow as tf

# Env
import gym
import gfootball
import gfootball.env as football_env
from gfootball.env import observation_preprocessing

# dgl.backend.load_backend('tensorflow')
dgl.backend.load_backend('pytorch')

In [None]:
from gfootball.env.config import Config
import gym

env_name = "GFootballBase-v0"
gym.envs.register(id=env_name,
                  entry_point="gfootball.env.football_env:FootballEnv",
                  max_episode_steps=10000)

base_env = gym.make(env_name, config=Config())

In [None]:
ob = dict(sorted(base_env.reset()[0].items()))
ob.keys()

In [None]:
ob

In [None]:
# considerations: game mode affect situation most, score affects how desperate of a play, steps left also affects desperateness
# game_mode,score,steps_left

In [None]:
keys = np.array(list(ob.keys()))
get_key_index  = lambda attribute: [i for i,x in enumerate(keys) if attribute in x]

In [None]:
ob.keys()

In [None]:
# Static Values for 
edges = np.array([(x,12) for x in np.concatenate((np.arange(12),np.arange(13,23)))])
src, dst = edges[:,0],edges[:,1]
#test case
# G = dgl.graph((src, dst))

In [None]:
# Tensorflow
# left attributes --> ball attribute --> right attributes
def graph_it(obs):
    obs = dict(sorted(obs[0].items()))
    G = dgl.graph((src, dst))
    for attribute in ['team_tired_factor','team_roles','yellow_card','team_active','direction']:
        player_features = np.array(list(obs.values()))
        player_features[1] = player_features[1][:2].reshape(-1,2)
        factor = np.concatenate((player_features[8].flatten() ,player_features[1].flatten(), player_features[14].flatten()),axis=-1).reshape(-1,2)
        G.ndata['positions'] = tf.convert_to_tensor(factor)
        distances = np.linalg.norm(np.delete(factor,11,axis=0)-factor[11,:],axis=1)
        G.edata['distance'] = tf.convert_to_tensor(distances)
        if attribute != 'direction':
            factor = np.concatenate(player_features[get_key_index(attribute)],axis=-1)
            factor = np.insert(factor, 11, -1)
            G.ndata[attribute] = tf.convert_to_tensor(factor)
        else:
            factor = player_features[get_key_index(attribute)]
            factor[0] =factor[0][:2]
            factor = np.concatenate((factor[1],factor[0].reshape(1,2),factor[2]))
            G.ndata[attribute] = tf.convert_to_tensor(factor)
    return G

In [None]:
# Pytorch 
# left attributes --> ball attribute --> right attributes
def graph_it(obs):
    obs = dict(sorted(obs[0].items()))
    G = dgl.graph((src, dst))
    for attribute in ['team_tired_factor','team_roles','yellow_card','team_active','direction']:
        player_features = np.array(list(obs.values()))
        player_features[1] = player_features[1][:2].reshape(-1,2)
        factor = np.concatenate((player_features[8].flatten() ,player_features[1].flatten(), player_features[14].flatten()),axis=-1).reshape(-1,2)
        G.ndata['positions'] = torch.tensor(factor)
        distances = np.linalg.norm(np.delete(factor,11,axis=0)-factor[11,:],axis=1)
        G.edata['distance'] = torch.tensor(distances)
        if attribute != 'direction':
            factor = np.concatenate(player_features[get_key_index(attribute)],axis=-1)
            factor = np.insert(factor, 11, -1)
            G.ndata[attribute] = torch.tensor(factor)
        else:
            factor = player_features[get_key_index(attribute)]
            factor[0] =factor[0][:2]
            factor = np.concatenate((factor[1],factor[0].reshape(1,2),factor[2]))
            G.ndata[attribute] = torch.tensor(factor)
    return G

# graph_it([ob])

In [None]:
train_seed = 42
test_seed  = 43
class TransEnv(gym.ObservationWrapper):
    def __init__(self, env):

        gym.ObservationWrapper.__init__(self, env)
        
    def observation(self, obs):
        return graph_it(obs)

def make_env(test):
    # Use different random seeds for train and test envs
    env_seed = test_seed if test else train_seed
    
    # env = gym.make('GFootball-11_vs_11_kaggle-SMM-v0')
    env = football_env.create_environment(
      env_name='11_vs_11_easy_stochastic',  # easy mode
      stacked=False,
      representation='raw',  # SMM
      rewards='scoring,checkpoints',
      write_goal_dumps=False,
      write_full_episode_dumps=False,
      render=False,
      write_video=False,
      dump_frequency=1,
      logdir='./',
      extra_players=None,
      number_of_left_players_agent_controls=1,
      number_of_right_players_agent_controls=0
    )
    env = TransEnv(env)

    env.seed(int(env_seed))
    if test:
        # Randomize actions like epsilon-greedy in evaluation as well
        env = pfrl.wrappers.RandomizeAction(env, random_fraction=0.0)
    return env

env = make_env(test=False)

In [None]:
env.reset()

In [None]:
import dgl.nn as dglnn

In [None]:
class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()
        self.conv1 = dglnn.GraphConv(in_dim, hidden_dim)
        self.conv2 = dglnn.GraphConv(hidden_dim, hidden_dim)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g, feat):
        # Apply graph convolution and activation.
        h = F.relu(self.conv1(g, h))
        h = F.relu(self.conv2(g, h))
        with g.local_scope():
            g.ndata['h'] = h
            # Calculate graph representation by average readout.
            hg = dgl.mean_nodes(g, 'h')
            return self.classify(hg)

In [None]:
model = Classifier(23, 22, 19)
opt = torch.optim.Adam(model.parameters())
# for epoch in range(20):
#     for batched_graph, labels in dataloader:
#         feats = batched_graph.ndata['feats']
#         logits = model(batched_graph, feats)
#         loss = F.cross_entropy(logits, labels)
#         opt.zero_grad()
#         loss.backward()
#         opt.step()

In [None]:
agent = A2C(model,optimizer = opt,gamma = 0.99,num_processes=4)

In [None]:
num_steps = 100000
experiments.train_agent(
    agent=agent,
    env=env,
    steps=num_steps,
    outdir="./",
    checkpoint_freq=100000)

In [None]:
env.action_space.n