In [1]:
import os
import glob
import json
import tqdm
import numpy as np

import sys
sys.path.append('..')
import importlib
app = importlib.import_module('411551039')
check_game_end = getattr(app, 'check_game_end')

In [2]:
# check all saved games in LOG_DIR
LOG_DIR = '/vol/08822801/shawn.chen/subtraction_game/FINAL/minimaxV2_minimax3/'
REWARD = -15
PENALTY = 7

win = 0
tie = 0
max_p1_times = []
max_p2_times = []
json_fns = sorted(glob.glob(os.path.join(LOG_DIR, '*.json')))
with tqdm.tqdm(total=len(json_fns)) as pbar:
    for json_fn in json_fns:
        
        # parse
        with open(json_fn, 'r') as f:
            datum = json.load(f)
        board = np.array(datum['end_board'])
        #print(datum['start_board'])
        start_board = np.array(datum['start_board'])
        cost_p1 = datum['start_cost_p1']
        cost_p2 = datum['start_cost_p2']
        end_cost_p1 = datum['end_cost_p1']
        end_cost_p2 = datum['end_cost_p2']
        actions_p1 = datum['actions_p1']
        actions_p2 = datum['actions_p2']
        act_times_p1 = datum['act_times_p1'] if 'act_times_p1' in datum else []
        act_times_p2 = datum['act_times_p2'] if 'act_times_p2' in datum else []
        num_rows = board.shape[0]
        
        # check if end board is really a end game
        is_end, is_dead_end = check_game_end(board, False)
        assert is_end
        # check if end board contains non-negative numbers
        assert len(board[board < 0]) == 0
        
        # apply all actions
        for row_col_idx, sub_val in actions_p1:
            assert sub_val <= 3 and sub_val >= 0  # check action
            cost_p1 += sub_val
            if row_col_idx < num_rows:
                board[row_col_idx] += sub_val
            else:
                board[:, row_col_idx - num_rows] += sub_val
        for row_col_idx, sub_val in actions_p2:
            assert sub_val <= 3 and sub_val >= 0  # check action
            cost_p2 += sub_val
            if row_col_idx < num_rows:
                board[row_col_idx] += sub_val
            else:
                board[:, row_col_idx - num_rows] += sub_val

        # apply reward / penalty
        if is_dead_end:
            if len(actions_p2) == len(actions_p1):
                cost_p2 += PENALTY
            else:
                cost_p1 += PENALTY
        else:
            if len(actions_p2) == len(actions_p1):
                cost_p2 += REWARD
            else:
                cost_p1 += REWARD
                
        # compare board with start board
        assert np.sum(np.abs(board - start_board)) == 0
        
        # check cost
        assert cost_p1 == end_cost_p1
        assert cost_p2 == end_cost_p2
        
        if cost_p1 < cost_p2:
            win += 1
        else:
            #print(json_fn, cost_p1, cost_p2)
            pass
        if cost_p1 == cost_p2:
            tie += 1
        
        
        #
        if len(act_times_p1) > 0:
            max_p1_times.append(np.max(act_times_p1))
            max_p2_times.append(np.max(act_times_p2))
        
        pbar.update()
        
print('win_rate:', win / len(json_fns))
print('tie_rate:', tie / len(json_fns))
if len(max_p1_times) > 0:
    print('avg max_p1_times:', np.mean(max_p1_times))
    print('avg max_p2_times:', np.mean(max_p2_times))
    print('max max_p1_times:', np.max(max_p1_times))
    print('max max_p2_times:', np.max(max_p2_times))

100%|██████████| 1000/1000 [00:01<00:00, 755.46it/s]

win_rate: 0.999
tie_rate: 0.0
avg max_p1_times: 1.9914410901069641
avg max_p2_times: 0.36306853938102723
max max_p1_times: 15.299367189407349
max max_p2_times: 0.6760430335998535



