In [1]:
# Notebook for training and testing a Player in Pokemon Showdown using Monte Carlo with Function Approximation in Stochastic Environment

In [2]:
"""
Comparative Table: https://prnt.sc/1ytqrzm
------
neptune
Action space: 4 moves + 5 switches
poke-env installed in C:\\Users\\-\\anaconda3\\envs\\poke_env\\lib\\site-packages
"""


'\nComparative Table: https://prnt.sc/1ytqrzm\n------\nneptune\nAction space: 4 moves + 5 switches\npoke-env installed in C:\\Users\\-\\anaconda3\\envs\\poke_env\\lib\\site-packages\n'

In [3]:
# imports

import asyncio
import json
import os
import matplotlib
import neptune.new as neptune
import nest_asyncio
import numpy as np
import pandas as pd
import time

from collections import defaultdict
from datetime import date
from itertools import product
from matplotlib import pyplot
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.player.battle_order import ForfeitBattleOrder
from poke_env.player.player import Player
from poke_env.player.random_player import RandomPlayer
from scipy.interpolate import griddata

from poke_env.data import GenData

import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.append('../')



In [4]:
from src.playerMC_FA import Player as PlayerMC_FA

In [5]:
# global configs

debug = True
save_to_json_file = False
use_validation = True
use_neptune = False

nest_asyncio.apply()
np.random.seed(0)

if use_neptune:
    run = neptune.init(name= 'MCControlFAStochastic', tags=['Function Approximation', 'MC Control', 'Stochastic', 'Train'], project='your_project',
                       api_token='your_api_token')

In [6]:
from py.variables.variables import our_team_sto as OUR_TEAM
from py.variables.variables import op_team_sto as OP_TEAM

In [7]:
from py.variables.utils import name_to_id_sto as name_to_id


In [8]:
N_STATE_COMPONENTS = 12

# num of features = num of state components + action
N_FEATURES = N_STATE_COMPONENTS + 1

N_OUR_MOVE_ACTIONS = 4
N_OUR_SWITCH_ACTIONS = 5
N_OUR_ACTIONS = N_OUR_MOVE_ACTIONS + N_OUR_SWITCH_ACTIONS

ALL_OUR_ACTIONS = np.array(range(0, N_OUR_ACTIONS))



In [9]:
from py.agents.MaxDamagePlayer import MaxDamagePlayer as MaxDamagePlayer

In [10]:
from py.agents.MonteCarloPlayerFA import MCPlayer_FA as MCPlayerFA


In [11]:
# global parameters

# possible values for num_battles (number of episodes)
n_battles_array = [10000]
# exploration schedule from MC, i. e., epsilon(t) = N0 / (N0 + N(S(t)))
n0_array = [0.0001, 0.001, 0.01]

# possible values for gamma (discount factor)
gamma_array = [0.75]


list_of_params = [
    {
        'n_battles': n_battles,
        'n0': n0,
        'gamma': gamma
    } for n_battles, n0, gamma in product(n_battles_array, n0_array, gamma_array)
]


In [12]:
from py.variables.utils import save_to_json_file as save_to_json_file


In [13]:
# main (let's battle!)

# training
async def do_battle_training():
    for params in list_of_params:
        start = time.time()
        params['player'] = MCPlayerFA(battle_format="gen8ou", team=OUR_TEAM, n0=params['n0'], gamma=params['gamma'])
        params['opponent'] = MaxDamagePlayer(battle_format="gen8ou", team=OP_TEAM)
        await params['player'].battle_against(opponent=params['opponent'], n_battles=params['n_battles'])
        if debug:
            print("training: num battles (episodes)=%d, N0=%.4f, gamma=%.2f, wins=%d, winning %%=%.2f, total time=%s sec" %
                  (
                      params['n_battles'],
                      round(params['n0'], 4),
                      round(params['gamma'], 2),
                      params['player'].n_won_battles,
                      round((params['player'].n_won_battles / params['n_battles']) * 100, 2),
                      round(time.time() - start, 2)
                  ))

        # save w to json file
        if save_to_json_file:
            save_to_json_file("MC_Control_FA", params)
            
        


loop = asyncio.get_event_loop()
loop.run_until_complete(loop.create_task(do_battle_training()))


KeyboardInterrupt: 

In [14]:
from py.agents.ValidationPlayer import ValidationPlayerMCFA as ValidationPlayer

In [None]:
# validation  - vs MaxPlayer

async def do_battle_validation_params(params):
    for parm in params:
        # learned feature vector
        w = parm['player'].w
        N = parm['player'].N
        # params: n_battles, n0, gamma
        n_battles = parm['n_battles']
        n0 = parm['n0']
        gamma = parm['gamma']

        # validation (play 1/3 of the battles using Q-learned table)
        start = time.time()
        validation_player = ValidationPlayer(battle_format="gen8ou", team=OUR_TEAM, w=w, N=N, n0=n0)
        opponent = MaxDamagePlayer(battle_format="gen8ou", team=OP_TEAM)
        n_battles_validation = int(n_battles / 3)
        await validation_player.battle_against(opponent=opponent, n_battles=n_battles_validation)
        print("validation: num battles (episodes)=%d, N0=%.4f, gamma=%.2f, wins=%d, winning %%=%.2f, total time=%s sec" %
              (
                  n_battles_validation,
                  n0,
                  gamma,
                  validation_player.n_won_battles,
                  round((validation_player.n_won_battles / n_battles_validation) * 100, 2),
                  round(time.time() - start, 2)
              ))


if use_validation:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(loop.create_task(do_battle_validation_params(list_of_params)))


In [None]:
# validation - vs RandomPlayer

async def do_battle_validation_params(params):
    for parm in params:
        # learned feature vector
        w = parm['player'].w
        N = parm['player'].N
        # params: n_battles, n0, gamma
        n_battles = parm['n_battles']
        n0 = parm['n0']
        gamma = parm['gamma']

        # validation (play 1/3 of the battles using Q-learned table)
        start = time.time()
        validation_player = ValidationPlayer(battle_format="gen8ou", team=OUR_TEAM, w=w, N=N, n0=n0)
        opponent = RandomPlayer(battle_format="gen8ou", team=OP_TEAM)
        n_battles_validation = int(n_battles / 3)
        await validation_player.battle_against(opponent=opponent, n_battles=n_battles_validation)
        print("validation: num battles (episodes)=%d, N0=%.4f, gamma=%.2f, wins=%d, winning %%=%.2f, total time=%s sec" %
              (
                  n_battles_validation,
                  n0,
                  gamma,
                  validation_player.n_won_battles,
                  round((validation_player.n_won_battles / n_battles_validation) * 100, 2),
                  round(time.time() - start, 2)
              ))


if use_validation:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(loop.create_task(do_battle_validation_params(list_of_params)))
