In [19]:
from sys import path
path.append("C:\\Users\\kenng\\Desktop\\Coding\\CustomModules")

from stage import eval_stage, train_stage, get_scores_data
from utility import get_existing_models

import os
import pandas as pd
import random

In [2]:
def clear_eval_files():
    import shutil
    stage_path = 'pool/stage_0/'

    model_ids = get_existing_models(stage_path + 'modellist.txt')
    for model_id in model_ids:
        shutil.rmtree(stage_path + model_id + '/eval_replay')
        if os.path.exists(stage_path + model_id + '/eval.json'):
            os.remove(stage_path + model_id + '/eval.json')

In [29]:
def get_best_stats(stage_path):
    '''
    Use with stage 0 only.
    '''
    BEST_MODELS_FILE_NAME = "best_models.txt"

    # Auto-adjust model_path to end with '/'
    if stage_path[-1] != '/' and stage_path[-1] != '\\':
        stage_path += '/'
    
    with open(stage_path + BEST_MODELS_FILE_NAME, 'r') as f:
        best_models = eval(f.read())
    
    data = []
    for model_id, score in best_models.items():
        with open(stage_path + model_id + '/info.json', 'r') as f:
            model_info = eval(f.read())
        n_parents = len(model_info['parents'])
        if n_parents == 0:
            parent = None
        else:
            parent = model_info['parents'][-1]
        if stage_path[-2] == '0':
            last_policy = model_info['policy']
        else:
            last_policy = model_info['train_params']['policy']
        last_train_params = model_info['train_history'][-1]
        last_lr = last_train_params['train_params']['learning_rate']
        last_gamma = last_train_params['train_params']['gamma']
        last_gl = last_train_params['train_params']['gae_lambda']
        last_opponent = 'agent_blank' if last_train_params['opponent']['model'] == None else last_train_params['opponent']['model']
        last_train_date = model_info['last_train']
        train_history = model_info['train_history']
        row = [model_id, score, parent, n_parents, last_policy, last_lr, last_gamma, last_gl, last_opponent, last_train_date, train_history]
        data.append(row)

    data = pd.DataFrame(data, columns=['id', 'score', 'parent', 'n_parents', 'last_policy', 'last_lr', 'last_gamma', 'last_gl', 'last_opponent', 'last_train_date', 'train_history'])
    data = data.sort_values('score', ascending=False)
    return data

In [None]:
def get_benchmarks(stage_path):
    # Auto-adjust model_path to end with '/'
    if stage_path[-1] != '/' and stage_path[-1] != '\\':
        stage_path += '/'

    BENCHMARK_MODELS_FILE_NAME = 'benchmark_models.txt'
    if os.path.exists(stage_path  + BENCHMARK_MODELS_FILE_NAME):
        with open(stage_path  + BENCHMARK_MODELS_FILE_NAME, 'r') as f:
            benchmark_models_paths = eval(f.read())
    else:
        benchmark_models_paths = []
    return benchmark_models_paths

In [None]:
def gen_run_ids(params, stage_path=None):
    RAN_RANGE = 99999
    p = params.copy()
    for param in p:
        param['run_id'] = random.randint(RAN_RANGE)
    if stage_path:
        # Auto-adjust model_path to end with '/'
        if stage_path[-1] != '/' and stage_path[-1] != '\\':
            stage_path += '/'
        with open(stage_path + 'train_params.txt', 'w') as f:
            f.write(str(p))
    return p

In [3]:
# Variables
stage_size = 30  # number of models in 1 stage
select = 8  # select the top 'select' models to pass into the next stage
spawn_new = 5  # each selected model should spawn 'spawn_new' new models
ini_steps = 1000000  # number of steps to train in the first stage (stage 0)
stage_steps = 100000  # number of steps to train between each stage
n_stages = 10  # number of stages before quitting the algorithm
policies = ['agent1', 'agent2', 'agent3']
stage_name_prefix = 'stage'
directory = None
default_model_params = {
    'learning_rate': 0.001,
    'gamma':0.995,
    'gae_lambda':0.95
}

'''
train_params = {
    'run_id': optional = None,
    'model_path': ,  either model_path or model_policy must be provided
    'model_policy: ,
    'opponent': optional = 'agent_blank',
    'step_count': optional = 100000,
    'learning_rate': optional = 0.001,
    'gamma': optional = 0.995,
    'gae_lambda': optional = 0.95,
    'n_copies': optional = 1,
    'replay_freq': optional = step_count,
}
'''

Stage 0  has already been completed (training and evaluation)

In [17]:
stage_paths = ['../modelcp/pool/' + stage_name_prefix + f'_{i}/' for i in range(n_stages)]    

In [30]:
get_best_stats(stage_paths[0])

Unnamed: 0,id,score,parent,n_parents,last_policy,last_lr,last_gamma,last_gl,last_opponent,last_train_date,train_history
2,17307,17.0,,0,agent1,0.001,0.995,0.95,agent_blank,09/11/2021-09:17:46,"[{'step_count': 1000000, 'train_params': {'lea..."
8,62633,15.0,,0,agent2,0.0005,0.995,0.95,agent_blank,10/11/2021-02:36:47,"[{'step_count': 1000000, 'train_params': {'lea..."
12,22598,15.0,,0,agent2,0.0001,0.995,0.95,agent_blank,10/11/2021-05:00:26,"[{'step_count': 1000000, 'train_params': {'lea..."
4,43075,14.666667,,0,agent2,0.0001,0.995,0.95,agent_blank,10/11/2021-06:33:19,"[{'step_count': 1000000, 'train_params': {'lea..."
16,11148,14.0,,0,agent1,0.001,0.995,0.95,agent_blank,09/11/2021-07:03:42,"[{'step_count': 1000000, 'train_params': {'lea..."
13,47097,11.333333,,0,agent2,0.001,0.995,0.95,agent_blank,10/11/2021-01:09:29,"[{'step_count': 1000000, 'train_params': {'lea..."
0,29464,9.0,,0,agent2,0.001,0.999,0.95,agent_blank,10/11/2021-04:13:43,"[{'step_count': 1000000, 'train_params': {'lea..."
3,48898,8.666667,,0,agent1,0.001,0.995,0.99,agent_blank,09/11/2021-22:27:49,"[{'step_count': 1000000, 'train_params': {'lea..."
1,67644,8.333333,,0,agent1,0.001,0.995,0.95,agent_blank,09/11/2021-01:53:28,"[{'step_count': 1000000, 'train_params': {'lea..."
5,34129,7.666667,,0,agent2,0.001,0.99,0.95,agent_blank,10/11/2021-08:23:12,"[{'step_count': 1000000, 'train_params': {'lea..."


In [None]:
tree_branch_template = [
    {
        'model_path':None,
        'model_policy':'agent1',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.001,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent1',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0004,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent1',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0001,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent1',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.001,
        'gamma':0.995,
        'gae_lambda':0.99,
    },
    {
        'model_path':None,
        'model_policy':'agent2',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.001,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent2',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0004,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent2',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0001,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent3',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.001,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent3',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0004,
        'gamma':0.995,
        'gae_lambda':0.95,
    },
    {
        'model_path':None,
        'model_policy':'agent3',
        'step_count':100000,
        'n_copies':1,
        'learning_rate':0.0001,
        'gamma':0.995,
        'gae_lambda':0.95,
    }
]

In [None]:
for i in range(1, n_stages+1):
    prev_best_models = get_best_stats(stage_paths[i-1])['id']
    prev_best_models_paths = [stage_paths[i-1] + model_id for model_id in prev_best_models]
    benchmark_models = get_benchmarks(stage_paths[i-1])
    params = tree_branch_template * len(prev_best_models)
    parent_path_vector = prev_best_models_paths * len(tree_branch_template)
    for n in range(len(params)):
        params[n]['model_path'] = parent_path_vector[n]
    params = gen_run_ids(params, stage_path=stage_paths[i])
    train_stage(params, stage_paths[i], replay=False)
    best_models = eval_stage(stage_paths[i], n_select=select, benchmark_models=benchmark_models, resume=True)