In [1]:
# Notebook for training and testing a Player in Pokemon Showdown using Monte Carlo in Stochastic Environment

In [2]:
"""
Comparative Table: https://prnt.sc/1ytqrzm
------
neptune
Action space: 4 moves + 5 switches
poke-env installed in C:\\Users\\-\\anaconda3\\envs\\poke_env\\lib\\site-packages
"""


'\nComparative Table: https://prnt.sc/1ytqrzm\n------\nneptune\nAction space: 4 moves + 5 switches\npoke-env installed in C:\\Users\\-\\anaconda3\\envs\\poke_env\\lib\\site-packages\n'

In [3]:
# Imports

import numpy as np

import sys
from gym import spaces
import asyncio
import time

from collections import defaultdict


from poke_env.player.player import Player 
from poke_env.player.random_player import RandomPlayer

from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.data import GenData

import matplotlib
import pandas as pd
from collections import namedtuple
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

np.random.seed(0)
use_neptune = False
if use_neptune:
    import neptune.new as neptune
    run = neptune.init(project='your_project_here',
                       api_token='your api token here')

In [4]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.append('../')

In [5]:
from src.playerMC import Player as PlayerMC


In [6]:
from py.variables.utils import name_to_id_sto as name_to_id


In [9]:
from py.agents.MaxDamagePlayer import MaxDamagePlayer as MaxDamagePlayer

In [12]:
from py.agents.MonteCarloPlayer import MCPlayer as MCPlayer

In [15]:
from py.variables.variables import our_team_sto as our_team
from py.variables.variables import op_team_sto as op_team

In [16]:
# Global variables

# 3 sets of tests; 10k battles each
n0_array = [0.0001, 0.001, 0.01]
gamma_array = [0.75, 0.75, 0.75] 
n_battles = [10000 for n0 in n0_array] #all tests have 10k battles; Can be modified to different values (must have same shape)
our_team_array = [our_team for n0 in n0_array] #all teams are the same; Can be modified to different teams (must have same shape)
op_team_array = [op_team for n0 in n0_array] #all opponents are the same; Can be modified to different teams (must have same shape)

tests =[ {'n0': n0,
          'gamma': gamma,
          'n_battles':n_battle,
          'team':our_team,
          'against':op_team,
          'battle_format':"gen8ou"}
        for n0,gamma,n_battle,our_team,op_team in zip(n0_array,gamma_array,n_battles,our_team_array,op_team_array)]


In [17]:
len(tests)

3

In [18]:
for test in tests:
    start = time.time()
    if use_neptune: run['params'] = test
    test['opponent'] = MaxDamagePlayer(battle_format="gen8ou", team=test['against'])
    test['player'] = MCPlayer(battle_format="gen8ou", team=test['team'], n0=test['n0'], gamma=test['gamma'])
    await test['player'].battle_against(test['opponent'], n_battles=test['n_battles'])
    
    print(
        "Player with N0=%f and gamma=%f won %d / %d battles [this is %f percent and took %f seconds]"
        % (
            round(test['n0'], 8), 
            round(test['gamma'], 8),
            test['player'].n_won_battles,
            len(test['player']._reward_buffer),
            round(test['player'].n_won_battles/len(test['player']._reward_buffer)*100, 2),
            round(time.time() - start, 2)
        )
    )
    


CancelledError: 

In [None]:
if use_neptune: run.stop()

In [19]:
import os
import json
import re
from datetime import date
today = date.today()


In [20]:

from py.variables.utils import save_to_json as save_to_json

In [21]:
# Saving Q and N to json files
for params in tests:
    save_to_json("./dump", params, "Q", params['player'].Q)
    save_to_json("./dump", params, "N", params['player'].N)

KeyError: 'player'

In [23]:
# Get values from json files
Qarray = []
directoryQ = r'path\211029\qvalues'
Narray = []
directoryN = r'path\211029\nvalues'
for filenameQ in os.listdir(directoryQ):
    Qjson_file = open(directoryQ+'/'+filenameQ,)
    Qjson = json.load(Qjson_file)
    for key in Qjson:
        Qjson[key] = np.array(Qjson[key])
    Qarray.append(Qjson)

for filenameN in os.listdir(directoryN):
    Njson_file = open(directoryN+'/'+filenameN,)
    Njson = json.load(Njson_file)
    for key in Njson:
        Njson[key] = np.array(Njson[key])
    Narray.append(Njson)
    

FileNotFoundError: [Errno 2] No such file or directory: 'path\\211029\\qvalues'

In [27]:
from py.agents.ValidationPlayer import ValidationPlayerMC as ValidationPlayer

In [31]:
# Validate with values from json - vs RandomPlayer
for test, qvalue, nvalue in zip(tests, Qarray, Narray):
    start = time.time()
    test['opponent'] = RandomPlayer(battle_format="gen8ou", team=test['against'])
    test['player_val'] = ValidationPlayer(battle_format="gen8ou", team=test['team'], Q=qvalue, N=nvalue, n0=test['n0'])
    await test['player_val'].battle_against(test['opponent'], n_battles=int(test['n_battles']/3))
    
    print(
        "Player with N0=%f and gamma=%f won %d / %d battles [this is %f percent and took %f seconds]"
        % (
            round(test['n0'], 8),
            round(test['gamma'], 8),
            test['player_val'].n_won_battles,
            int(test['n_battles']/3),
            round(test['player_val'].n_won_battles/int(test['n_battles']/3), 2),
            round(time.time() - start, 2)
        )
    )

In [29]:
# Validate with values from json - vs MaxPlayer
for test, qvalue, nvalue in zip(tests, Qarray, Narray):
    start = time.time()
    test['opponent'] = MaxDamagePlayer(battle_format="gen8ou", team=test['against'])
    test['player_val'] = ValidationPlayer(battle_format="gen8ou", team=test['team'], Q=qvalue, N=nvalue, n0=test['n0'])
    await test['player_val'].battle_against(test['opponent'], n_battles=int(test['n_battles']/3))
    
    print(
        "Player with N0=%f and gamma=%f won %d / %d battles [this is %f percent and took %f seconds]"
        % (
            round(test['n0'], 8),
            round(test['gamma'], 8),
            test['player_val'].n_won_battles,
            int(test['n_battles']/1000),
            round(test['player_val'].n_won_battles/int(test['n_battles']/3), 2),
            round(time.time() - start, 2)
        )
    )

In [None]:
output_folder = "images/vfunction"


In [None]:
# state is 4 moves_base_power, 4 moves_dmg_multiplier, [remaining_mon_team, remaining_mon_opponent]
# 3D graph: X: sum(moves_base_power * moves_dmg_multiplier), Y: remaining_mon_team - remaining_mon_opponent

In [None]:
# Data for plotting: Create value function from action-value function
# by picking the best action at each state

# x: index_pokemon*20+sum(moves_base_power * moves_dmg_multiplier)
# y: remaining_mon_team - remaining_mon_opponent
# z: value function

v_array = []
directoryQ = r'path\211029\qvalues'

for filenameQ in os.listdir(directoryQ):
    Qjson_file = open(directoryQ+'/'+filenameQ,)
    Qjson = json.load(Qjson_file)

    for key in Qjson:
        Qjson[key] = np.array(Qjson[key])
        
    z_values = []
    x_values = []
    y_values = []
    #for state, actions in test['player_val'].Q.items():
    for state, actions in Qjson.items():    
        action_value = np.max(actions)
        z_values.append(action_value)
        state = re.sub(r"[,!?><:'\[\]()@*~#]","", state)
        key_float = [float(k) for k in state.split()]
        x_emb = key_float[0]*20+key_float[1]*key_float[5]+key_float[2]*key_float[6]+key_float[3]*key_float[7]+key_float[4]*key_float[8]
        x_values.append(x_emb)
        y_emb = key_float[8]-key_float[9]
        y_values.append(y_emb)
        #V[x_emb,y_emb] = action_value
    v_array.append((x_values, y_values, z_values))


In [None]:
import pandas as pd
from scipy.interpolate import griddata

# x: index_pokemon*20+sum(moves_base_power * moves_dmg_multiplier)
# y: remaining_mon_team - remaining_mon_opponent
# z: value function

for vvalue, filenameQ in zip(v_array, os.listdir(directoryQ)):
    print(filenameQ.split('.')[0])
    x_values, y_values, z_values = vvalue 
    z_plot = np.array(z_values).reshape(len(z_values),1)
    x_plot = np.array(x_values)
    y_plot = np.array(y_values)
    xyz = {'x': x_plot, 'y': y_plot, 'z': np.array(z_values)}
    df = pd.DataFrame(xyz, index=range(len(xyz['x']))) 
    x1 = np.linspace(df['x'].min(), df['x'].max(), len(df['x'].unique()))
    y1 = np.linspace(df['y'].min(), df['y'].max(), len(df['y'].unique()))
    x2, y2 = np.meshgrid(x1, y1)
    z2 = griddata((df['x'], df['y']), df['z'], (x2, y2), method='nearest')

    fig = plt.figure(figsize=(20, 10))
    ax = fig.gca(projection='3d')
    ax.set_xlabel('index_pokemon*20+sum(moves_base_power * moves_dmg_multiplier)')
    ax.set_ylabel('remaining_mon_team - remaining_mon_opponent')
    ax.set_zlabel('Value')
    ax.set_title('Value - Index for x axis: 0 venusaur;  1*20 pikachuoriginal; 2*20 tauros, 3*20 sirfetchd, 4*20 blastoise, 5*20 charizard')

    surf = ax.plot_surface(x2, y2, z2, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm,
        linewidth=0, antialiased=False)
    fig.colorbar(surf)
    filename = filenameQ.split('.')[0]+'_Stoc_Index.pdf'
    path_plot = output_folder+'/MCControl'
    if not os.path.exists(path_plot):
        os.makedirs(path_plot)
    plt.savefig(path_plot+filename) 
    plt.show()


In [None]:
# x: sum(moves_base_power * moves_dmg_multiplier)
# y: remaining_mon_team - remaining_mon_opponent
# z: value function

v_array = []
directoryQ = r'path\211029\qvalues'

for filenameQ in os.listdir(directoryQ):
    Qjson_file = open(directoryQ+'/'+filenameQ,)
    Qjson = json.load(Qjson_file)

    for key in Qjson:
        Qjson[key] = np.array(Qjson[key])
        
    z_values = []
    x_values = []
    y_values = []

    for state, actions in Qjson.items():    
        action_value = np.max(actions)
        z_values.append(action_value)
        state = re.sub(r"[,!?><:'\[\]()@*~#]","", state)
        key_float = [float(k) for k in state.split()]
        x_emb = key_float[1]*key_float[5]+key_float[2]*key_float[6]+key_float[3]*key_float[7]+key_float[4]*key_float[8]
        x_values.append(x_emb)
        y_emb = key_float[8]-key_float[9]
        y_values.append(y_emb)
        #V[x_emb,y_emb] = action_value
    v_array.append((x_values, y_values, z_values))


In [None]:
# x: sum(moves_base_power * moves_dmg_multiplier)
# y: remaining_mon_team - remaining_mon_opponent
# z: value function

import pandas as pd
from scipy.interpolate import griddata
for vvalue, filenameQ in zip(v_array, os.listdir(directoryQ)):
    print(filenameQ)
    x_values, y_values, z_values = vvalue 
    z_plot = np.array(z_values).reshape(len(z_values),1)
    x_plot = np.array(x_values)
    y_plot = np.array(y_values)
    xyz = {'x': x_plot, 'y': y_plot, 'z': np.array(z_values)}
    df = pd.DataFrame(xyz, index=range(len(xyz['x']))) 
    x1 = np.linspace(df['x'].min(), df['x'].max(), len(df['x'].unique()))
    y1 = np.linspace(df['y'].min(), df['y'].max(), len(df['y'].unique()))
    x2, y2 = np.meshgrid(x1, y1)
    z2 = griddata((df['x'], df['y']), df['z'], (x2, y2), method='nearest')

    fig = plt.figure(figsize=(20, 10))
    ax = fig.gca(projection='3d')
    ax.set_xlabel('sum(moves_base_power * moves_dmg_multiplier)')
    ax.set_ylabel('remaining_mon_team - remaining_mon_opponent')
    ax.set_zlabel('Value')
    ax.set_title('Value - No Index')

    surf = ax.plot_surface(x2, y2, z2, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm,
        linewidth=0, antialiased=False)
    fig.colorbar(surf)
    filename = filenameQ.split('.')[0]+'_Stoc_noIndex.pdf'
    path_plot = output_folder+'/MCControl'
    if not os.path.exists(path_plot):
        os.makedirs(path_plot)
    plt.savefig(path_plot+filename) 
    plt.show()

In [None]:
# x: (remaining_mon_team - remaining_mon_opponent)*sum(moves_base_power * moves_dmg_multiplier)
# y: action
# z: value function

v_array = []
#for test in tests:
directoryQ = r'path\211029\qvalues'

for filenameQ in os.listdir(directoryQ):
    Qjson_file = open(directoryQ+'/'+filenameQ,)
    Qjson = json.load(Qjson_file)

    for key in Qjson:
        Qjson[key] = np.array(Qjson[key])
        
    z_values = []
    x_values = []
    y_values = []
    for state, actions in Qjson.items(): 
        
        action_value = np.max(actions)
        z_values.append(action_value)
        state = re.sub(r"[,!?><:'\[\]()@*~#]","", state)
        key_float = [float(k) for k in state.split()]
        x_emb = (key_float[8]-key_float[9])*(key_float[1]*key_float[5]+key_float[2]*key_float[6]+key_float[3]*key_float[7]+key_float[4]*key_float[8])
        x_values.append(x_emb)
        y_emb = np.argmax(actions)
        y_values.append(y_emb)
        #V[x_emb,y_emb] = action_value
    v_array.append((x_values, y_values, z_values))

In [None]:
# x: (remaining_mon_team - remaining_mon_opponent)*sum(moves_base_power * moves_dmg_multiplier)
# y: action
# z: value function

import pandas as pd
from scipy.interpolate import griddata
for vvalue, filenameQ in zip(v_array, os.listdir(directoryQ)):
    print(filenameQ)
    x_values, y_values, z_values = vvalue 
    z_plot = np.array(z_values).reshape(len(z_values),1)
    x_plot = np.array(x_values)
    y_plot = np.array(y_values)
    xyz = {'x': x_plot, 'y': y_plot, 'z': np.array(z_values)}
    df = pd.DataFrame(xyz, index=range(len(xyz['x']))) 
    x1 = np.linspace(df['x'].min(), df['x'].max(), len(df['x'].unique()))
    y1 = np.linspace(df['y'].min(), df['y'].max(), len(df['y'].unique()))
    x2, y2 = np.meshgrid(x1, y1)
    z2 = griddata((df['x'], df['y']), df['z'], (x2, y2), method='nearest')

    fig = plt.figure(figsize=(20, 10))
    ax = fig.gca(projection='3d')
    ax.set_xlabel('(remaining_mon_team - remaining_mon_opponent)*sum(moves_base_power * moves_dmg_multiplier)')
    ax.set_ylabel('action')
    ax.set_zlabel('Value')
    ax.set_title('Value Function - No index')

    surf = ax.plot_surface(x2, y2, z2, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm,
        linewidth=0, antialiased=False)
    fig.colorbar(surf)
    filename = filenameQ.split('.')[0]+'_Stoc_noIndex_action.pdf'
    path_plot = output_folder+'/MCControl'
    if not os.path.exists(path_plot):
        os.makedirs(path_plot)
    plt.savefig(path_plot+filename)     
    plt.show()    