# テストの実行

In [77]:
import os

import time
import datetime

import subprocess
import logging
from concurrent import futures

import numpy as np
import pandas as pd

import optuna
from ipywidgets import Play, IntSlider, jslink, HBox, interactive_output
from matplotlib import pyplot as plt


In [78]:
TESTSET_DIR = os.path.join('/home', 'jovyan', 'work', '01_testset')
PRJ_DIR = os.path.join('/home', 'jovyan', 'work', '03_min_block')

## マスタの読み込み

In [79]:
pre_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '01_testset_pre_master.csv'), usecols=['seed'])
sys_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '02_testset_sys_master.csv'), usecols=['seed'])
stress_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '03_testset_stress_master.csv'), usecols=['seed'])

In [80]:
CHAMP_TAG = '20230327_1858'
CHAMP_DIR = os.path.join('/home', 'jovyan', 'work', 'result', 'champion')

champ_path = os.path.join(CHAMP_DIR, 'champ_all_{}.csv'.format(CHAMP_TAG))
champ_df = pd.read_csv(champ_path)

top_rate = 0.65  # 順位表を参考にチャンピオンスコアを補正(03/28時点)
champ_score_dict = {}

for _, row in champ_df.iterrows():
    seed = row['seed']
    score = row['champion_score']

    champ_score_dict[seed] = score

## 実行するロジックの指定

In [81]:
# 実行プログラムにタグをつけておく
PROG_TAG = 'trans_tune'

In [82]:
#def solve(seed, D, problem, prog_path, invalid_shift, valid_add, valid_shift, valid_redundant, valid_clear):
def solve(seed, D, problem, prog_path, invalid_shift, invalid_clear):
    problem_path = os.path.join(TESTSET_DIR, 'in', '{:0>4}.txt'.format(seed)) 
    command_str = 'echo {} {} {}  | {}'.format(problem_path, invalid_shift, invalid_clear, prog_path)

    start_time = time.perf_counter()
    res = subprocess.run(command_str, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

    # 経過時間(ミリ秒単位)
    e_time = time.perf_counter() - start_time
    e_time = int(1000 * e_time)    
    
    #print('{}'.format(prob_id))    
    return (seed, D, e_time, res)

In [83]:
def run_test(testset_name, invalid_shift, invalid_clear):
    result_df = pd.DataFrame()
    future_list = []

    logger.info('Start')

    testset_path = os.path.join(TESTSET_DIR, testset_name + '_master.csv')
    testset_df = pd.read_csv(testset_path)

    with futures.ThreadPoolExecutor(max_workers=24) as executor:
        for _, row in testset_df.iterrows():
            seed, D, problem = row

            # バッチ実行
            future = executor.submit(solve, seed=seed,  D=D, problem=problem, prog_path=prog_path, invalid_shift=invalid_shift, invalid_clear=invalid_clear)
            future_list.append(future)

        _ = futures.as_completed(fs=future_list)

    for future in future_list:
        seed, D, e_time, res = future.result()

        # 結果をまとめる
        solve_result = []
        
        solve_result.append(testset_name)

        # 問題パラメタ
        solve_result.append(seed)
        solve_result.append(D)

        # 経過時間
        solve_result.append(e_time)
        
        try:
            elem_cnt = 9
            
            # Result
            result = res.stderr.decode('utf-8').split()[-elem_cnt].replace('Result=', '')

            # Score
            score = int(res.stderr.decode('utf-8').split()[-elem_cnt+1].replace('Score=', ''))
            
            # 相対スコア
            rel_score = int(10 ** 9 * top_rate * champ_score_dict[seed] / score)

            # BlockCount
            block_cnt = int(res.stderr.decode('utf-8').split()[-elem_cnt+2].replace('BlockCnt=', ''))

            # Disuse Block Size
            disuse_block_size = int(res.stderr.decode('utf-8').split()[-elem_cnt+3].replace('DisuseBlockSize=', ''))
            
            # Size01
            size01 = int(res.stderr.decode('utf-8').split()[-elem_cnt+4].replace('Size01=', ''))
                       
            # Size04
            size04 = int(res.stderr.decode('utf-8').split()[-elem_cnt+5].replace('Size04=', ''))
            
            # Size09
            size09 = int(res.stderr.decode('utf-8').split()[-elem_cnt+6].replace('Size09=', ''))

            # Size29
            size29 = int(res.stderr.decode('utf-8').split()[-elem_cnt+7].replace('Size29=', ''))
            
            # Size30
            size30 = int(res.stderr.decode('utf-8').split()[-elem_cnt+8].replace('Size30=', ''))
            
        except Exception as e:
            print('Error: seed={}'.format(seed))
            print(e)
            return

        solve_result.append(score)
        solve_result.append(rel_score)

        solve_result.append(block_cnt)
        solve_result.append(disuse_block_size)
        
        solve_result.append(size01)
        solve_result.append(size04)
        solve_result.append(size09)
        solve_result.append(size29)
        solve_result.append(size30)
        solve_result.append(result)

        result_df = pd.concat([result_df, pd.DataFrame(solve_result).T], axis=0)

    logger.info('finish!')
    
    # 結果を整形
    result_df.index = range(result_df.shape[0])
    result_df.columns = ['testset', 'seed', 'D', 'time', 'score', 'rel_score', 'block_cnt', 'disuse_block_size', 'size01', 'size04', 'size09', 'size29', 'size30', 'result']

    # 自己相対スコアを算出
    
    return result_df

In [84]:
logger = logging.getLogger(__name__)

fmt = "%(asctime)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=fmt)

In [85]:
def get_summary_df(result_df):
    # 全体サマリ
    summary_all_df = pd.DataFrame()

    for testset in np.unique(result_df['testset']):
        test_result_df = result_df.query('testset == "{}"'.format(testset))

        summary_df = pd.DataFrame(
        {
            'testset': [testset],
            
            'time_mean': [int(np.mean(test_result_df['time']))],
            
            'score_mean': [int(np.mean(test_result_df['score']))],
            'score_min': [min(test_result_df['score'])],
            'score_max': [max(test_result_df['score'])],

            'rel_score_mean': [int(np.mean(test_result_df['rel_score']))],
            'rel_score_min': [min(test_result_df['rel_score'])],
            'rel_score_max': [max(test_result_df['rel_score'])],

            'block_cnt_mean': [np.mean(test_result_df['block_cnt'])],
            'block_cnt_min': [min(test_result_df['block_cnt'])],
            'block_cnt_max': [max(test_result_df['block_cnt'])],

            'disuse_block_size_mean': [np.mean(test_result_df['disuse_block_size'])],
            'disuse_block_size_min': [min(test_result_df['disuse_block_size'])],
            'disuse_block_size_max': [max(test_result_df['disuse_block_size'])],

            'size01_mean': [np.mean(test_result_df['size01'])],
            'size04_mean': [np.mean(test_result_df['size04'])],
            'size09_mean': [np.mean(test_result_df['size09'])],
            'size29_mean': [np.mean(test_result_df['size29'])],
            'size30_mean': [np.mean(test_result_df['size30'])],

            'time_max': [max(test_result_df['time'])],
        })

        summary_all_df = pd.concat([summary_all_df, summary_df], axis=0)   

    summary_all_df['tag'] = PROG_TAG
    
    cols = ['tag']
    cols.extend(summary_df.columns)
    
    summary_all_df = summary_all_df[cols]
    
    return summary_all_df

In [59]:
PROG_NAME_LIST = ['main_tune']

testset_name = '01_testset_pre'
#testset_name = '04_testset_param'
#testset_name = '02_testset_sys'
#testset_name = '03_testset_stress'

result_dict = {}
summary_all_dict = {}

for PROG_NAME in PROG_NAME_LIST:
    prog_path = os.path.join(PRJ_DIR, PROG_NAME)
    
    result_df = pd.DataFrame()
    
    testset_result_df = run_test(testset_name, 68, 1)
    result_df = pd.concat([result_df, testset_result_df], axis=0)
    
    result_dict[PROG_NAME] = result_df
    summary_all_dict[PROG_NAME] = get_summary_df(result_df)    

2023-03-30 15:46:03,187: Start
2023-03-30 15:46:15,029: finish!


In [60]:
sum(testset_result_df['block_cnt'])

538

In [67]:
def calc_score(param_list):
    invalid_shift, invalid_clear = param_list
    
    testset_name = '04_testset_param'
    testset_result_df = run_test(testset_name, invalid_shift=invalid_shift, invalid_clear=invalid_clear)
    
    score = sum(testset_result_df['block_cnt'])

    return score

In [70]:
def objective(trial):
    invalid_shift = trial.suggest_int('invalid_shift', 30, 99) 
    invalid_clear = trial.suggest_int('invalid_clear', 1, 20) 
    
    #valid_add = trial.suggest_int('valid_add', 1, 99) 
    #valid_shift = trial.suggest_int('valid_shift', 1, 99) 
    #valid_redundant = trial.suggest_int('valid_redundant', 1, 99) 
    #valid_clear = trial.suggest_int('valid_clear', 1, 20) 

    return calc_score([invalid_shift, invalid_clear]) 

In [75]:
study = optuna.create_study(direction="minimize")
study.enqueue_trial({'invalid_shift': 68, 'invalid_clear': 1})

study.optimize(objective, n_trials=10)

[32m[I 2023-03-30 16:05:06,867][0m A new study created in memory with name: no-name-104bcd88-f1a7-4cd6-a818-2336e91665e1[0m
2023-03-30 16:05:06,868: Start
2023-03-30 16:05:28,749: finish!
[32m[I 2023-03-30 16:05:28,750][0m Trial 0 finished with value: 1834.0 and parameters: {'invalid_shift': 68, 'invalid_clear': 1}. Best is trial 0 with value: 1834.0.[0m
2023-03-30 16:05:28,751: Start
2023-03-30 16:05:49,202: finish!
[32m[I 2023-03-30 16:05:49,204][0m Trial 1 finished with value: 2043.0 and parameters: {'invalid_shift': 97, 'invalid_clear': 13}. Best is trial 0 with value: 1834.0.[0m
2023-03-30 16:05:49,205: Start
2023-03-30 16:06:10,889: finish!
[32m[I 2023-03-30 16:06:10,890][0m Trial 2 finished with value: 1969.0 and parameters: {'invalid_shift': 65, 'invalid_clear': 12}. Best is trial 0 with value: 1834.0.[0m
2023-03-30 16:06:10,891: Start
2023-03-30 16:06:30,843: finish!
[32m[I 2023-03-30 16:06:30,844][0m Trial 3 finished with value: 2047.0 and parameters: {'invalid_

In [76]:
study.best_params

{'invalid_shift': 68, 'invalid_clear': 1}

In [74]:
int(study.best_value)

1944