# テストの実行

In [1]:
import os

import time
import datetime

import subprocess
import logging
from concurrent import futures

import numpy as np
import pandas as pd

from tqdm import tqdm

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
TESTSET_DIR = os.path.join('/home', 'jovyan', 'work', '01_testset')
PRJ_DIR = os.path.join('/home', 'jovyan', 'work')

PROG_PATH = os.path.join(PRJ_DIR, 'main')

## マスタの読み込み

In [9]:
pre_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '01_testset_pre_master.csv'), usecols=['seed'])
sys_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '02_testset_sys_master.csv'), usecols=['seed'])
stress_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '03_testset_stress_master.csv'), usecols=['seed'])
param_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '04_testset_param_master.csv'), usecols=['seed'])

## 相対スコア用にChampionDataの読み込み

In [10]:
if False:
    CHAMP_TAG = 'yyyymmdd_hhmm'
    CHAMP_DIR = os.path.join('/home', 'jovyan', 'work', 'result', 'champion')

    champ_path = os.path.join(CHAMP_DIR, 'champ_all_{}.csv'.format(CHAMP_TAG))
    champ_df = pd.read_csv(champ_path)

    top_rate = 1.00  # 順位表を参考にチャンピオンスコアを補正
    champ_score_dict = {}

    for _, row in champ_df.iterrows():
        seed = row['seed']
        score = row['champion_score']

        champ_score_dict[seed] = score

## 実行するロジックの指定

In [12]:
# 実行プログラムにタグをつけておく
## Champion管理用に単語は -(ハイフン) で区切る
PROG_TAG = 'first-prog'

In [13]:
def solve(seed):
    problem_path = os.path.join(TESTSET_DIR, 'in', '{:0>4}.txt'.format(seed)) 
    command_str = 'echo {} | {}'.format(problem_path, PROG_PATH)

    # stack overflow対策
    # command_str = 'ulimit -S -s 1048576 && echo {} | {}'.format(problem_path, PROG_PATH)

    start_time = time.perf_counter()
    res = subprocess.run(command_str, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

    # 経過時間(ミリ秒単位)
    e_time = time.perf_counter() - start_time
    e_time = int(1000 * e_time)    
    
    #print('{}'.format(prob_id))    
    return (seed, e_time, res)

In [60]:
def run_test(testset_name):
    result_df = pd.DataFrame()
    future_list = []

    logger.info('Start')

    testset_path = os.path.join(TESTSET_DIR, testset_name + '_master.csv')
    testset_df = pd.read_csv(testset_path)

    # 24並列実行
    with futures.ThreadPoolExecutor(max_workers=24) as executor:
        seed_list = testset_df['seed'].to_list()
        future_list = list(tqdm(executor.map(solve, seed_list), total=len(seed_list)))

    for future in future_list:
        seed, e_time, res = future

        # 結果をまとめる
        solve_result = []
        
        solve_result.append(testset_name)

        # 問題パラメタ
        solve_result.append(seed)

        # 経過時間
        solve_result.append(e_time)
        
        try:
            # -- start -- 生成コード貼り付け先
            elem_cnt = 2

            result = str(res.stderr.decode('utf-8').split()[-elem_cnt + 0].replace('Result=', ''))
            score = int(res.stderr.decode('utf-8').split()[-elem_cnt + 1].replace('Score=', ''))

            solve_result.append(score)
            solve_result.append(result)   
            # -- end -- 生成コード貼り付け先

            # 相対スコア
            # rel_score = int(10 ** 9 * top_rate * champ_score_dict[seed] / score)
            # solve_result.append(rel_score)

        except Exception as e:
            print('Error: seed={}'.format(seed))
            print(e)
            return

        result_df = pd.concat([result_df, pd.DataFrame(solve_result).T], axis=0)

    logger.info('finish!')
    
    # 結果を整形
    result_df.index = range(result_df.shape[0])
    cols = ['testset', 'seed', 'time', 'score', 'result']
    result_df.columns = cols

    return result_df

In [61]:
logger = logging.getLogger(__name__)

fmt = "%(asctime)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=fmt)

In [63]:
def get_summary_df(result_df):
    # 全体サマリ
    summary_all_df = pd.DataFrame()

    for testset in np.unique(result_df['testset']):
        test_result_df = result_df.query('testset == "{}"'.format(testset))

        summary_df = pd.DataFrame(
        {
            'testset': [testset],
            
            'time_mean': [int(np.mean(test_result_df['time']))],
            
            # -- start -- 生成コード貼り付け先
            'score_mean': [np.mean(test_result_df['score'])],
            'score_min': [min(test_result_df['score'])],
            'score_max': [max(test_result_df['score'])],
            # -- end -- 生成コード貼り付け先

            'time_max': [max(test_result_df['time'])],
        })

        summary_all_df = pd.concat([summary_all_df, summary_df], axis=0)   

    summary_all_df['tag'] = PROG_TAG
    
    cols = ['tag']
    cols.extend(summary_df.columns)
    
    summary_all_df = summary_all_df[cols]
    
    return summary_all_df

In [64]:
PROG_NAME_LIST = ['main']
#PROG_NAME_LIST = ['main', 'main_off']

testset_name = '01_testset_pre'
#testset_name = '02_testset_sys'
#testset_name = '03_testset_stress'
#testset_name = '04_testset_param'

result_dict = {}
summary_all_dict = {}

for PROG_NAME in PROG_NAME_LIST:
    prog_path = os.path.join(PRJ_DIR, PROG_NAME)
    
    result_df = pd.DataFrame()
    
    testset_result_df = run_test(testset_name)
    result_df = pd.concat([result_df, testset_result_df], axis=0)
    
    result_dict[PROG_NAME] = result_df
    summary_all_dict[PROG_NAME] = get_summary_df(result_df)    

2023-06-24 15:58:30,117: Start
2023-06-24 15:58:30,385: finish!


In [80]:
# pre test
result_sub_df = pd.merge(pre_seed_df, result_df, on='seed')
summary_df = get_summary_df(result_sub_df)
summary_df['testset'] = '01_test_pre'

summary_df

Unnamed: 0,tag,testset,time_mean,score_mean,score_min,score_max,time_max
0,first-prog,01_test_pre,10,0.0,0,0,21


In [81]:
# sys param
if testset_name == '04_testset_param' or testset_name == '02_testset_sys' or testset_name == '03_testset_stress':
    summary_df = pd.DataFrame()
    
    result_sub_df = pd.merge(param_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '04_test_param'

summary_df

In [82]:
# sys test
if testset_name == '02_testset_sys' or testset_name == '03_testset_stress':
    summary_df = pd.DataFrame()
    
    result_sub_df = pd.merge(sys_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '02_test_sys'

summary_df

In [84]:
# stress test
if testset_name == '03_testset_stress':
    summary_df = pd.DataFrame()
    
    result_sub_df = pd.merge(stress_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '03_test_stress'

summary_df

In [86]:
result_df.sort_values('time', ascending=True)

Unnamed: 0,testset,seed,time,score,result
1,01_testset_pre,1,3,0,dummy
3,01_testset_pre,3,3,0,dummy
9,01_testset_pre,9,3,0,dummy
10,01_testset_pre,10,3,0,dummy
70,01_testset_pre,70,4,0,dummy
...,...,...,...,...,...
40,01_testset_pre,40,19,0,dummy
39,01_testset_pre,39,19,0,dummy
33,01_testset_pre,33,19,0,dummy
48,01_testset_pre,48,20,0,dummy


In [87]:
#result_df.sort_values('rel_score', ascending=True).head(n=10)

# 結果ログの保存

In [88]:
t_now = datetime.datetime.now() + datetime.timedelta(hours=9)
time_str = t_now.strftime('%Y%m%d_%H%M')

for PROG_NAME in PROG_NAME_LIST:
    result_df = result_dict[PROG_NAME]
    
    for testset in np.unique(result_df['testset']):
        csv_df = result_df.query('testset == "{}"'.format(testset))
        csv_df.to_csv(PRJ_DIR+'/result/{}_{}_{}_{}.csv'.format(time_str,PROG_TAG, testset, PROG_NAME), index=False)

In [None]:
! cp $PRJ_DIR/main $PRJ_DIR/result/bin/$PROG_TAG