In [None]:
!pip install gdown
import gdown
url = #Google drive link for saved model
output = 'qrdqn_3M_steps_scenario_11.zip'
gdown.download(url, output, quiet=False)

In [None]:
%%capture
%%bash
# dependencies
apt-get -y update > /dev/null
apt-get -y install libsdl2-gfx-dev libsdl2-ttf-dev > /dev/null

# cloudpickle, pytorch, gym
pip3 install "cloudpickle==1.3.0"
pip3 install "torch==1.5.1"
pip3 install "gym==0.17.2"

# gfootball
GRF_VER=v2.8
GRF_PATH=football/third_party/gfootball_engine/lib
GRF_URL=https://storage.googleapis.com/gfootball/prebuilt_gameplayfootball_${GRF_VER}.so
git clone -b ${GRF_VER} https://github.com/google-research/football.git
mkdir -p ${GRF_PATH}
wget -q ${GRF_URL} -O ${GRF_PATH}/prebuilt_gameplayfootball.so
cd football && GFOOTBALL_USE_PREBUILT_SO=1 pip3 install . && cd ..

# kaggle-environments
git clone https://github.com/Kaggle/kaggle-environments.git
cd kaggle-environments && pip3 install . && cd ..

# stable-baselines3
git clone https://github.com/DLR-RM/stable-baselines3.git
cd stable-baselines3 && pip3 install . && cd ..

# housekeeping
rm -rf football kaggle-environments stable-baselines3

In [None]:
!pip install sb3-contrib

In [None]:
import os
from collections import OrderedDict
import base64
import pickle
import zlib
import gym
import numpy as np
import pandas as pd
import torch as th
from torch import nn, tensor
from collections import deque
from gym.spaces import Box, Discrete
from kaggle_environments import make
from kaggle_environments.envs.football.helpers import *
from gfootball.env import create_environment, observation_preprocessing
from sb3_contrib import QRDQN
from sb3_contrib.qrdqn import CnnPolicy
from stable_baselines3.common import results_plotter
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.common.vec_env import VecTransposeImage
from IPython.display import HTML
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
#Google-Football
class FootballGym(gym.Env):
    spec = None
    metadata = None
    
    def __init__(self, config=None):
        super(FootballGym, self).__init__()
        env_name = "11_vs_11_easy_stochastic"
        rewards = "scoring,checkpoints"
        if config is not None:
            env_name = config.get("env_name", env_name)
            rewards = config.get("rewards", rewards)
        self.env = create_environment(
            env_name=env_name,
            stacked=False,
            representation="raw",
            rewards = rewards,
            write_goal_dumps=False,
            write_full_episode_dumps=False,
            render=False,
            write_video=False,
            dump_frequency=1,
            logdir=".",
            extra_players=None,
            number_of_left_players_agent_controls=1,
            number_of_right_players_agent_controls=0)  
        self.action_space = Discrete(19)
        self.observation_space = Box(low=0, high=255, shape=(72, 96, 16), dtype=np.uint8)
        self.reward_range = (-1, 1)
        self.obs_stack = deque([], maxlen=4)
        
    def transform_obs(self, raw_obs):
        obs = raw_obs[0]
        obs = observation_preprocessing.generate_smm([obs])
        if not self.obs_stack:
            self.obs_stack.extend([obs] * 4)
        else:
            self.obs_stack.append(obs)
        obs = np.concatenate(list(self.obs_stack), axis=-1)
        obs = np.squeeze(obs)
        return obs

    def reset(self):
        self.obs_stack.clear()
        obs = self.env.reset()
        obs = self.transform_obs(obs)
        return obs
    
    def step(self, action):
        obs, reward, done, info = self.env.step([action])
        obs = self.transform_obs(obs)
        return obs, float(reward), done, info
    
check_env(env=FootballGym(), warn=True)

In [None]:
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=True)

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.relu = nn.ReLU()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.conv2 = conv3x3(out_channels, out_channels, stride)
        
    def forward(self, x):
        residual = x
        out = self.relu(x)
        out = self.conv1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out += residual
        return out
    
class FootballCNN(BaseFeaturesExtractor):
    def __init__(self, observation_space, features_dim=256):
        super().__init__(observation_space, features_dim)
        in_channels = observation_space.shape[0]  # channels x height x width
        self.cnn = nn.Sequential(
            conv3x3(in_channels=in_channels, out_channels=32),
            nn.MaxPool2d(kernel_size=3, stride=2, dilation=1, ceil_mode=False),
            ResidualBlock(in_channels=32, out_channels=32),
            ResidualBlock(in_channels=32, out_channels=32),
            nn.ReLU(),
            nn.Flatten(),
        )
        self.linear = nn.Sequential(
          nn.Linear(in_features=52640, out_features=features_dim, bias=True),
          nn.ReLU(),
        )

    def forward(self, obs):
        return self.linear(self.cnn(obs))

In [None]:
scenarios = {0: "academy_empty_goal_close",            # academy_difficulty = 0.6
             1: "academy_empty_goal",
             2: "academy_run_to_score",
             3: "academy_run_to_score_with_keeper",
             4: "academy_pass_and_shoot_with_keeper",
             5: "academy_run_pass_and_shoot_with_keeper",
             6: "academy_3_vs_1_with_keeper",
             7: "academy_corner",
             8: "academy_counterattack_easy",
             9: "academy_counterattack_hard",
             10: "academy_single_goal_versus_lazy",
             11: "11_vs_11_easy_stochastic",         #difficulty: 0.05
             12: "11_vs_11_stochastic",              #difficulty: 0.6
             13: "11_vs_11_hard_stochastic",         #difficulty: 0.95 
             14: "11_vs_11_kaggle"}                  #difficulty: 1

scenario_index = 11
if scenario_index >=0 and scenario_index <=9:
  scenario_length = 401
else:
  scenario_length = 3001
  
scenario_name = scenarios[scenario_index]
rewards = "scoring,checkpoints"

In [None]:
def make_env(config=None, rank=0):
    def _init():
        env = FootballGym(config)
        log_file = os.path.join(".", str(rank))
        env = Monitor(env, log_file, allow_early_resets=True)
        return env
    return _init

In [None]:
n_envs = 1
#config={"env_name":scenario_name}
#train_env = DummyVecEnv([make_env(config, rank=i) for i in range(n_envs)])
#eval_env = DummyVecEnv([make_env(config, rank=i) for i in range(1)])
train_env = DummyVecEnv([make_env({"env_name":scenario_name, "rewards": rewards})])
eval_env = VecTransposeImage(DummyVecEnv([make_env({"env_name":scenario_name, "rewards": rewards})]))
#train_env = SubprocVecEnv([make_env(config, rank=i)])
#eval_env = SubprocVecEnv([make_env(config, rank=i)])

In [None]:
RANDOM_SEED = 0
#torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
#random.seed(RANDOM_SEED)
train_env.seed(RANDOM_SEED)
train_env.action_space.seed(RANDOM_SEED)

In [None]:
policy_kwargs = dict(features_extractor_class=FootballCNN,
                     features_extractor_kwargs=dict(features_dim=256),
                     n_quantiles=200)
model = QRDQN(CnnPolicy, train_env, 
            policy_kwargs=policy_kwargs,learning_rate=1e-5, 
            buffer_size=10000, learning_starts=16, 
            batch_size=32, 
            tau= 1e-3, gamma=0.99, 
            train_freq=4, gradient_steps=1, 
            optimize_memory_usage=False, 
            target_update_interval=2500, 
            exploration_fraction=0.1, 
            exploration_initial_eps=1.0, 
            exploration_final_eps=0.01, 
            max_grad_norm=0.5,
            verbose=1,  
            tensorboard_log ="./tensorboard/",
            seed=0)

In [None]:
obs=train_env.reset()
#model = QRDQN.load("../input/qrdqn-resnet/qrdqn_1116372_steps.zip",verbose = 1)
model = QRDQN.load("./qrdqn_2500000_steps",env=train_env,verbose=1)
model.set_random_seed(seed=0)
model.tensorboard_log='./tensorboard/'
#model.learning_starts

In [None]:
%%time
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

eval_freq = scenario_length*10
eval_callback = EvalCallback(eval_env=eval_env, best_model_save_path='./models/',
                             log_path='./logs/', eval_freq=eval_freq, n_eval_episodes = 1,
                             deterministic=True, render=False, verbose=1)

checkpoint_callback = CheckpointCallback(save_freq=250000, save_path='./',
                                         name_prefix='qrdqn')

total_timesteps = scenario_length*1000

In [None]:
model.learn(total_timesteps=total_timesteps, callback=[eval_callback,checkpoint_callback], reset_num_timesteps=False)

In [None]:
model.save("./qrdqn_4000500_steps")

In [None]:
!tar -zcvf qrdqn.tar.gz ./
from IPython.display import FileLink
FileLink(r'qrdqn.tar.gz')

In [None]:
model = QRDQN.load("./qrdqn_3M_steps_scenario_11",env=eval_env,verbose=1)

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=5, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [None]:
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=5, deterministic=False)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [None]:
%reload_ext tensorboard
%tensorboard --logdir "./tensorboard/"

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

logs = np.load('./logs/evaluations.npz')
#episodes = np.transpose(logs['timesteps']/scenario_length)
episodes = (logs['timesteps']/scenario_length).tolist()
rewards = (logs['results'].T[0]).tolist()
data = np.array([episodes,rewards]).T
fig = plt.figure()
#ax1 = fig.add_subplot(121)
#fig.set_title("scores")
fig.suptitle('Validation scores')
scores = pd.DataFrame(data=data, columns= ["Episodes", "Scores"])
sns.lineplot(x="Episodes", y="Scores", data=scores)
plt.show()
plt.savefig('validation.png')

In [None]:
%%writefile submission.py
import base64
import pickle
import zlib
import numpy as np
import torch as th
from torch import nn, tensor
from collections import deque
from gfootball.env import observation_preprocessing

state_dict = _STATE_DICT_

state_dict = pickle.loads(zlib.decompress(base64.b64decode(state_dict)))

def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=True)

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.relu = nn.ReLU()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.conv2 = conv3x3(out_channels, out_channels, stride)
        
    def forward(self, x):
        residual = x
        out = self.relu(x)
        out = self.conv1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out += residual
        return out
    
class PyTorchCnnPolicy(nn.Module):
    global state_dict
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            conv3x3(in_channels=16, out_channels=32),
            nn.MaxPool2d(kernel_size=3, stride=2, dilation=1, ceil_mode=False),
            ResidualBlock(in_channels=32, out_channels=32),
            ResidualBlock(in_channels=32, out_channels=32),
            nn.ReLU(),
            nn.Flatten(),
        )
        self.linear = nn.Sequential(
          nn.Linear(in_features=52640, out_features=256, bias=True),
          nn.ReLU(),
        )
        self.action_net = nn.Sequential(
          nn.Linear(in_features=256, out_features=19, bias=True),
          nn.ReLU(),
        )
        self.out_activ = nn.Softmax(dim=1)
        self.load_state_dict(state_dict)

    def forward(self, x):
        x = tensor(x).float() / 255.0  # normalize
        x = x.permute(0, 3, 1, 2).contiguous()  # 1 x channels x height x width
        x = self.cnn(x)
        x = self.linear(x)
        x = self.action_net(x)
        x = self.out_activ(x)
        return int(x.argmax())
    
obs_stack = deque([], maxlen=4)
def transform_obs(raw_obs):
    global obs_stack
    obs = raw_obs['players_raw'][0]
    obs = observation_preprocessing.generate_smm([obs])
    if not obs_stack:
        obs_stack.extend([obs] * 4)
    else:
        obs_stack.append(obs)
    obs = np.concatenate(list(obs_stack), axis=-1)
    return obs

policy = PyTorchCnnPolicy()
policy = policy.float().to('cpu').eval()
def agent(raw_obs):
    obs = transform_obs(raw_obs)
    action = policy(obs)
    return [action]

In [None]:
model = QRDQN.load("./qrdqn_4M_steps_scenario_11")
#model = QRDQN.load("./models/best_model")
_state_dict = model.policy.to('cpu').state_dict()

for param_tensor in _state_dict:
    print(param_tensor, "\t", _state_dict[param_tensor].size())

state_dict = {
    "cnn.0.weight":_state_dict['features_extractor.cnn.0.weight'], 
    "cnn.0.bias":_state_dict['features_extractor.cnn.0.bias'], 
    "cnn.2.conv1.weight":_state_dict['features_extractor.cnn.2.conv1.weight'], 
    "cnn.2.conv1.bias":_state_dict['features_extractor.cnn.2.conv1.bias'],
    "cnn.2.conv2.weight":_state_dict['features_extractor.cnn.2.conv2.weight'], 
    "cnn.2.conv2.bias":_state_dict['features_extractor.cnn.2.conv2.bias'], 
    "cnn.3.conv1.weight":_state_dict['features_extractor.cnn.3.conv1.weight'], 
    "cnn.3.conv1.bias":_state_dict['features_extractor.cnn.3.conv1.bias'], 
    "cnn.3.conv2.weight":_state_dict['features_extractor.cnn.3.conv2.weight'], 
    "cnn.3.conv2.bias":_state_dict['features_extractor.cnn.3.conv2.bias'], 
    "linear.0.weight":_state_dict['features_extractor.linear.0.weight'], 
    "linear.0.bias":_state_dict['features_extractor.linear.0.bias'], 
    "action_net.0.weight":_state_dict['action_net.weight'],
    "action_net.0.bias":_state_dict['action_net.bias'],
}
state_dict = base64.b64encode(zlib.compress(pickle.dumps(state_dict)))
with open('submission.py', 'r') as file:
    src = file.read()
src = src.replace("_STATE_DICT_", f"{state_dict}")
with open('submission.py', 'w') as file:
    file.write(src)

In [None]:
from shutil import copyfile
copyfile("submission.py", #Save it in Gdrive)
copyfile("0.monitor.csv", #Save it in Gdrive)

In [None]:
from kaggle_environments import make
env = make("football", configuration={"save_video": True, "scenario_name": scenario_name, "running_in_notebook": True}, debug=True)
agent = "submission.py"
output = env.run([#File path to model 1,#File path to model 2])[-1]
print('Left player: action = %s, reward = %s, status = %s, info = %s' % (output[0]["action"], output[0]['reward'], output[0]['status'], output[0]['info']))
print('Right player: action = %s, reward = %s, status = %s, info = %s' % (output[1]["action"], output[1]['reward'], output[1]['status'], output[1]['info']))
env.render(mode="human", width=800, height=600)