In [1]:
from bson import ObjectId
import numpy as np
from pokeai.util import json_load, pickle_load, ROOT_DIR, DATASET_DIR, yaml_dump
from collections import Counter
import json
import yaml
import os
import copy

In [2]:
trainer_param_tmpl = yaml.safe_load("""
trainer_id: 601e7868853660e1ba22c78e
battles: 100000
checkpoint_per_battles: 10000
party_tags:
- random_200917_1
tags:
- testrun_210206_2
trainer:
  dqn_params:
    epsilon: 0.3
    epsilon_decay: 2.0e-06
    epsilon_min: 0.01
    gamma: 0.95
    lr: 1.0e-05
  feature_params:
    party_size: 3
  model_params:
    bn: false
    n_channels: 64
    n_layers: 3
surrogate_reward:
  hp_ratio: 0.25
  alive_ratio: 0.25
match_config:
  random_std: -1.0
""")

In [3]:
hillclimb_param_tmpl = yaml.safe_load("""
random_party_generator:
  regulation: "finalgoodmove3vs3"
  neighbor_poke_change_rate: 0.1
  neighbor_item_change_rate: 0.0
fitness_weight:
  party_feature_names: ["P", "M", "PM", "MM", "PP"]
  party_feature_penalty: 0.1
n: 871
generations: 100
populations: 10
trainer_id: 601e7868853660e1ba22c78e
dst_tags: []
""")

In [4]:
tag_prefix = "rl_loop_210227_1_"

In [5]:
file_base_dir = r"D:\dev\pokeai\pokeai\experiment\team\rl\rl_loop_210227_1"

In [6]:
regulation = "finalgoodmove3vs3"

In [7]:
cmds = ""
last_trainer_id = None
os.mkdir(file_base_dir)
for iternum in range(10):
    party_allonce = f"{tag_prefix}allonce_{iternum}"
    party_prev_gen = f"{tag_prefix}hc_{iternum}"
    if iternum == 0:
        cmds += f"""
python -m pokeai.ai.generate_party {party_prev_gen} -n 871 -r {regulation}
"""
    train_param = copy.deepcopy(trainer_param_tmpl)
    trainer_id = ObjectId()
    train_param["party_tags"] = [party_allonce, party_prev_gen]
    train_param["tags"] = [f"{tag_prefix}{iternum}"]
    train_param["trainer_id"] = str(trainer_id)
    train_param_file = os.path.join(file_base_dir, f"{tag_prefix}{iternum}_train.yaml")
    yaml_dump(train_param, train_param_file)
    party_gen = f"{tag_prefix}hc_{iternum+1}"
    hillclimb_param = copy.deepcopy(hillclimb_param_tmpl)
    hillclimb_param["trainer_id"] = str(trainer_id)
    hillclimb_param["dst_tags"] = [party_gen]
    hillclimb_param_file = os.path.join(file_base_dir, f"{tag_prefix}{iternum}_hc.yaml")
    yaml_dump(hillclimb_param, hillclimb_param_file)
    resume_trainer_id = f"--initialize_by_trainer {last_trainer_id}" if last_trainer_id else ""
    comment_info = json.dumps({"iternum": iternum, "trainer_id": str(trainer_id), "train_party_tags": train_param["party_tags"], "gen_party_tag": party_gen})
    cmds += f"""
python -m pokeai.ai.generate_party {party_allonce} --all_pokemon_once -r {regulation}
python -m pokeai.ai.generic_move_model.rl_train {train_param_file} {resume_trainer_id}
python -m pokeai.ai.party_feature.hillclimb_party {hillclimb_param_file}
rem {comment_info}
"""
    last_trainer_id = trainer_id

print(cmds)
with open(os.path.join(file_base_dir, f"{tag_prefix}run.bat"), "w") as f:
    f.write(cmds)



python -m pokeai.ai.generate_party rl_loop_210227_1_hc_0 -n 871 -r finalgoodmove3vs3

python -m pokeai.ai.generate_party rl_loop_210227_1_allonce_0 --all_pokemon_once -r finalgoodmove3vs3
python -m pokeai.ai.generic_move_model.rl_train D:\dev\pokeai\pokeai\experiment\team\rl\rl_loop_210227_1\rl_loop_210227_1_0_train.yaml 
python -m pokeai.ai.party_feature.hillclimb_party D:\dev\pokeai\pokeai\experiment\team\rl\rl_loop_210227_1\rl_loop_210227_1_0_hc.yaml
rem {"iternum": 0, "trainer_id": "6039caccaf28292a4eae380a", "train_party_tags": ["rl_loop_210227_1_allonce_0", "rl_loop_210227_1_hc_0"], "gen_party_tag": "rl_loop_210227_1_hc_1"}

python -m pokeai.ai.generate_party rl_loop_210227_1_allonce_1 --all_pokemon_once -r finalgoodmove3vs3
python -m pokeai.ai.generic_move_model.rl_train D:\dev\pokeai\pokeai\experiment\team\rl\rl_loop_210227_1\rl_loop_210227_1_1_train.yaml --initialize_by_trainer 6039caccaf28292a4eae380a
python -m pokeai.ai.party_feature.hillclimb_party D:\dev\pokeai\pokeai\exp