# テストの実行

In [2]:
import os

import time
import datetime

import subprocess
import logging
from concurrent import futures

import numpy as np
import pandas as pd

In [3]:
TESTSET_DIR = os.path.join('/home', 'jupyter', 'work', '01_testset')
PRJ_DIR = os.path.join('/home', 'jupyter', 'work')

## マスタの読み込み

In [4]:
pre_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '01_test_pre.csv'), usecols=['seed'])
sys_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '02_test_sys.csv'), usecols=['seed'])
stress_seed_df = pd.read_csv(os.path.join(TESTSET_DIR, '03_test_stress.csv'), usecols=['seed'])

## 実行するロジックの指定

In [5]:
# 実行プログラムにタグをつけておく
PROG_TAG = 'face_group_min_max_bet_1k'

In [6]:
def solve(seed, N, M, D, K, problem, prog_path):
    start_time = time.perf_counter()
    
    command_str = 'echo {} | {}'.format(problem, prog_path)
    res = subprocess.run(command_str, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

    # 経過時間(ミリ秒単位)
    e_time = time.perf_counter() - start_time
    e_time = int(1000 * e_time)    
    #print('{}'.format(prob_id))    
    return (seed, N, M, D, K, e_time, res)

In [7]:
def run_test(testset_name):
    result_df = pd.DataFrame()
    future_list = []

    logger.info('Start')

    testset_path = os.path.join(TESTSET_DIR, testset_name+'.csv')
    testset_df = pd.read_csv(testset_path)

    with futures.ThreadPoolExecutor(max_workers=9) as executor:
        for _, row in testset_df.iterrows():
            seed, N, M, D, K, problem = row

            # バッチ実行
            future = executor.submit(solve, seed=seed,  N=N, M=M, D=D, K=K, problem=problem, prog_path=prog_path)
            future_list.append(future)

        _ = futures.as_completed(fs=future_list)

    for future in future_list:
        seed, N, M, D, K, e_time, res = future.result()

        # 結果をまとめる
        solve_result = []
        
        solve_result.append(testset_name)

        # 問題パラメタ
        solve_result.append(seed)
        solve_result.append(N)
        solve_result.append(M)
        solve_result.append(D)
        solve_result.append(K)

        # 経過時間
        solve_result.append(e_time)
        
        try:
            # コスト
            cost = int(res.stderr.decode('utf-8').split()[-4].replace('Cost=', ''))

            # 非連結なノードペア数
            discon_cnt = int(res.stderr.decode('utf-8').split()[-3].replace('DisconCnt=', ''))

            # 工事辺数のオーバー
            over_k_cnt = int(res.stderr.decode('utf-8').split()[-2].replace('OverK=', ''))
            
            # 迂回路中の辺の数
            in_bypass_cnt = int(res.stderr.decode('utf-8').split()[-1].replace('InBypass=', ''))
                       
        except Exception as e:
            print('Error: seed={}'.format(seed))
            print(e)
            return

        solve_result.append(cost)
        solve_result.append(discon_cnt)
        solve_result.append(over_k_cnt)
        solve_result.append(in_bypass_cnt)

        result_df = pd.concat([result_df, pd.DataFrame(solve_result).T], axis=0)

    logger.info('finish!')
    
    # 結果を整形
    result_df.index = range(result_df.shape[0])
    result_df.columns = ['testset', 'seed', 'N', 'M', 'D', 'K', 'time', 'cost', 'discon_cnt', 'over_k_cnt', 'in_bypass_cnt']

    return result_df

In [8]:
logger = logging.getLogger(__name__)

fmt = "%(asctime)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=fmt)

In [25]:
def get_summary_df(result_df):
    # 全体サマリ
    summary_all_df = pd.DataFrame()

    for testset in np.unique(result_df['testset']):
        test_result_df = result_df.query('testset == "{}"'.format(testset))

        summary_df = pd.DataFrame(
        {
            'testset': [testset],
            
            'time_mean': [int(np.mean(test_result_df['time']))],
            
            'cost_mean': [int(np.mean(test_result_df['cost']))],
            'cost_min': [min(test_result_df['cost'])],
            'cost_max': [max(test_result_df['cost'])],

            'discon_cnt_mean': [np.mean(test_result_df['discon_cnt'])],
            'discon_cnt_max': [max(test_result_df['discon_cnt'])],

            'over_k_cnt_max': [max(test_result_df['over_k_cnt'])],

            'in_bypass_cnt_mean': [np.mean(test_result_df['in_bypass_cnt'])],
            'in_bypass_cnt_max': [max(test_result_df['in_bypass_cnt'])],

            'time_max': [max(test_result_df['time'])],
        })

        summary_all_df = pd.concat([summary_all_df, summary_df], axis=0)   

    summary_all_df['tag'] = PROG_TAG
    summary_all_df = summary_all_df[['tag', 'testset', 'time_mean', 
                                     'cost_mean', 'cost_min', 'cost_max',
                                     'discon_cnt_mean', 'discon_cnt_max',
                                     'over_k_cnt_max',
                                     'in_bypass_cnt_mean', 'in_bypass_cnt_max',
                                     'time_max']]
    
    return summary_all_df

In [26]:
PROG_NAME_LIST = ['main_adj_k']
#PROG_NAME_LIST = ['main', 'main_off']

#testset_name = '00_test_mini'
#testset_name = '01_test_pre'
testset_name = '02_test_sys'
#testset_name = '03_test_stress'

result_dict = {}
summary_all_dict = {}

for PROG_NAME in PROG_NAME_LIST:
    prog_path = os.path.join(PRJ_DIR, PROG_NAME)
    
    result_df = pd.DataFrame()
    
    testset_result_df = run_test(testset_name)
    result_df = pd.concat([result_df, testset_result_df], axis=0)
    
    result_dict[PROG_NAME] = result_df
    summary_all_dict[PROG_NAME] = get_summary_df(result_df)    

2023-02-05 05:27:52,672: Start


Error: seed=2641
invalid literal for int() with base 10: '1:'


KeyError: 'testset'

In [None]:
# pre test
result_sub_df = pd.merge(pre_seed_df, result_df, on='seed')
summary_df = get_summary_df(result_sub_df)
summary_df['testset'] = '01_test_pre'

summary_df

In [None]:
# sys test
summary_df = pd.DataFrame()

if testset_name == '02_test_sys' or testset_name == '03_test_stress':
    result_sub_df = pd.merge(sys_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '02_test_sys'

summary_df

In [15]:
# sys test
summary_df = pd.DataFrame()

if testset_name == '02_test_sys' or testset_name == '03_test_stress':
    result_sub_df = pd.merge(sys_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '02_test_sys'

summary_df

Unnamed: 0,tag,testset,time_mean,cost_mean,cost_min,cost_max,discon_cnt_mean,discon_cnt_max,over_k_cnt_max,in_bypass_cnt_mean,in_bypass_cnt_max,time_max
0,face_group_min_max_bet_1k,02_test_sys,1900,32585309,7264339,190856266,0.0,0,5,44.946,122,5297


In [100]:
# stress test
summary_df = pd.DataFrame()

if testset_name == '03_test_stress':
    result_sub_df = pd.merge(stress_seed_df, result_df, on='seed')
    summary_df = get_summary_df(result_sub_df)
    summary_df['testset'] = '03_test_stress'

summary_df

In [101]:
result_df.sort_values('N')

Unnamed: 0,testset,seed,N,M,D,K,time,cost,discon_cnt,over_k_cnt,in_bypass_cnt
1502,02_test_sys,7427,500,957,20,52,642,13808992,0,0,0
1207,02_test_sys,4739,500,1334,8,202,475,17163812,0,0,0
1816,02_test_sys,292,501,961,23,59,648,14828943,0,0,0
264,02_test_sys,5148,501,921,16,65,505,22920121,0,0,0
1834,02_test_sys,2718,501,913,19,51,630,19024381,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1599,02_test_sys,6254,999,2133,5,602,5290,45190397,0,0,9
103,02_test_sys,6401,1000,1992,22,95,2602,11331751,0,0,0
682,02_test_sys,1419,1000,1692,16,206,1672,29209255,0,0,0
1929,02_test_sys,6899,1000,1713,13,261,1282,32970926,0,0,0


# 結果ログの保存

In [102]:
t_now = datetime.datetime.now() + datetime.timedelta(hours=9)
time_str = t_now.strftime('%Y%m%d_%H%M')

for PROG_NAME in PROG_NAME_LIST:
    result_df = result_dict[PROG_NAME]
    
    for testset in np.unique(result_df['testset']):
        csv_df = result_df.query('testset == "{}"'.format(testset))
        csv_df.to_csv(PRJ_DIR+'/result/{}_{}_{}_{}.csv'.format(time_str,PROG_TAG, testset, PROG_NAME), index=False)