In [3]:
import time
import logging
import os
import random
import csv

import numpy as np
import coloredlogs
from FAdo.conversions import *

from utils.data_loader import *
from utils.heuristics import *

from alpha_zero.Coach import Coach
from alpha_zero.MCTS import MCTS
from alpha_zero.utils import *
from alpha_zero.state_elimination.StateEliminationGame import StateEliminationGame as Game
from alpha_zero.state_elimination.pytorch.NNet import NNetWrapper as nn


In [4]:
log = logging.getLogger(__name__)
coloredlogs.install(level='INFO')
args = dotdict({
    'numIters': 1000,
    # Number of complete self-play games to simulate during a new iteration.
    'numEps': 100,
    'tempThreshold': 0,        # temperature hyperparameters
    # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'updateThreshold': 0.6,
    # Number of game examples to train the neural networks.
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
    # Number of games to play during arena play to determine if new net will be accepted.
    'arenaCompare': 40,
    'cpuct': 1,
    'checkpoint': './alpha_zero/models/',
    'load_model': True,
    'load_folder_file': ('./alpha_zero/models/', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})
min_n = 3
max_n = 5
n_range = max_n - min_n + 1
alphabet = [2, 5, 10]
density = [0.2, 0.5]
sample_size = 10


In [5]:
def test_heuristics():
    if os.path.isfile('./result/heuristics_experiment_result.pkl'):
        with open('./result/heuristics_experiment_result.pkl', 'rb') as fp:
            exp = load(fp)
            return exp
    else:
        data = load_data()
        exp = [[[[[0, 0] for d in range(len(density))] for k in range(
            len(alphabet))] for n in range(n_range)] for c in range(6)]
        for n in range(n_range):
            for k in range(len(alphabet)):
                for d in range(len(density)):
                    for i in range(sample_size):
                        random.seed(i)
                        print('n' + str(n + min_n) + 'k' + ('2' if not k else ('5' if k == 1 else '10')) + (
                            's' if not d else 'd') + '\'s ' + str(i + 1) + ' sample')
                        # eliminate_randomly
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = eliminate_randomly(gfa)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[0][n][k][d][0] += result_time
                        exp[0][n][k][d][1] += result_size

                        # decompose with eliminate_randomly
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = decompose(gfa, False, False)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[1][n][k][d][0] += result_time
                        exp[1][n][k][d][1] += result_size

                        # eliminate_by_state_weight_heuristic
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = eliminate_by_state_weight_heuristic(gfa)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[2][n][k][d][0] += result_time
                        exp[2][n][k][d][1] += result_size

                        # decompose + eliminate_by_state_weight_heuristic
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = decompose(gfa, True, False)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[3][n][k][d][0] += result_time
                        exp[3][n][k][d][1] += result_size

                        # eliminate_by_repeated_state_weight_heuristic
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = eliminate_by_repeated_state_weight_heuristic(
                            gfa)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[4][n][k][d][0] += result_time
                        exp[4][n][k][d][1] += result_size

                        # decompose + eliminate_by_repeated_state_weight_heuristic
                        gfa = data[n][k][d][i].dup()
                        start_time = time.time()
                        result = decompose(gfa, True, True)
                        end_time = time.time()
                        result_time = end_time - start_time
                        result_size = result.treeLength()
                        exp[5][n][k][d][0] += result_time
                        exp[5][n][k][d][1] += result_size
        with open('./result/heuristics_experiment_result.pkl', 'wb') as fp:
            dump(exp, fp)


In [6]:
# exp = test_heuristics()

In [7]:
import pandas as pd

2023-03-14 12:36:02 ksk numexpr.utils[1410428] INFO Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2023-03-14 12:36:02 ksk numexpr.utils[1410428] INFO NumExpr defaulting to 8 threads.


In [8]:
def test_alpha_zero():
    if not os.path.isfile('./result/alpha_zero_experiment_result.pkl'):
        with open('./result/alpha_zero_experiment_result.pkl', 'rb') as fp:
            exp = load(fp)
        with open('./result/c7.csv', 'w', newline='') as fp:
            writer = csv.writer(fp)
            for n in range(5 - 3, 11 - 3):
                size_value = exp[n][1][0][1] / 100
                writer.writerow([size_value])
    else:
        data = load_data()
        exp = [[[[0, 0] for d in range(len(density))] for k in range(
            len(alphabet))] for n in range(n_range)]
        g = Game()
        nnet = nn(g)
        mcts = MCTS(g, nnet, args)
        def player(x): return np.argmax(mcts.getActionProb(x, temp=0))
        curPlayer = 1
        if args.load_model:
            nnet.load_checkpoint(args.checkpoint, args.load_folder_file[1])
        else:
            print("Can't test without pre-trained model")
            exit()
        for n in range(n_range):
            for k in range(len(alphabet)):
                for d in range(len(density)):
                    for i in range(sample_size):
                        #print('n' + str(n + min_n) + 'k' + ('2' if not k else ('5' if k == 1 else '10')) + (
                        #    's' if not d else 'd') + '\'s ' + str(i + 1) + ' sample')
                        gfa = data[n][k][d][i].dup()
                        board = g.getInitBoard(
                            gfa, n + min_n, alphabet[k], density[d])
                        order = []
                        start_time = time.time()
                        while g.getGameEnded(board, curPlayer) == -1:
                            action = player(
                                g.getCanonicalForm(board, curPlayer))
                            valids = g.getValidMoves(
                                g.getCanonicalForm(board, curPlayer), 1)
                            if valids[action] == 0:
                                assert valids[action] > 0
                            board, curPlayer = g.getNextState(
                                board, curPlayer, action)
                            order.append(action)
                            
                        result = g.gfaToBoard(board)[0][n + min_n + 1].treeLength()
                        end_time = time.time()
                        gfa.eliminateAll(order)
                        '''
                        if (result != gfa.delta[0][n + min_n + 1].treeLength()):
                            print('order', order)
                            print('result length', result)
                            print('valid length',
                                  gfa.delta[0][n + min_n + 1].treeLength())
                            print('Something is wrong')
                            exit()
                        '''
                        result_time = end_time - start_time
                        exp[n][k][d][0] += result_time
                        exp[n][k][d][1] += result
        with open('./result/alpha_zero_experiment_result.pkl', 'wb') as fp:
            dump(exp, fp)

In [9]:
test_alpha_zero()

order [1, 3, 2]
result length 7
valid length 13
Something is wrong
order [1, 2, 3]
result length 7
valid length 15
Something is wrong
order [1, 3]
result length 3
valid length 9
Something is wrong
order [3, 2, 1]
result length 9
valid length 15
Something is wrong
order [3, 1, 2]
result length 7
valid length 15
Something is wrong
order [3, 2, 1]
result length 8
valid length 12
Something is wrong
order [2, 3, 1]
result length 10
valid length 14
Something is wrong
order [3, 2, 1]
result length 8
valid length 14
Something is wrong
order [3, 1, 2]
result length 12
valid length 18
Something is wrong
order [2, 1, 3]
result length 1
valid length 5
Something is wrong
order [2, 3, 1]
result length 27
valid length 31
Something is wrong
order [3, 2, 1]
result length 31
valid length 35
Something is wrong
order [2, 3, 1]
result length 46
valid length 50
Something is wrong
order [2, 3, 1]
result length 25
valid length 29
Something is wrong
order [3, 1, 2]
result length 40
valid length 48
Something is

KeyboardInterrupt: 

: 

In [11]:
with open('./result/alpha_zero_experiment_result.pkl', 'rb') as fp:
    exp_alpha = load(fp)

with open('./result/heuristics_experiment_result.pkl', 'rb') as fp:
    exp_heuristic = load(fp)

In [12]:
exp_heuristic[5], exp_alpha

([[[[0.005059957504272461, 110], [0.0067386627197265625, 463]],
   [[0.0071070194244384766, 404], [0.012964010238647461, 1303]],
   [[0.011895179748535156, 980], [0.02732706069946289, 2682]]],
  [[[0.00681304931640625, 259], [0.01198124885559082, 1837]],
   [[0.011904239654541016, 1472], [0.03433036804199219, 6085]],
   [[0.024356365203857422, 3899], [0.0637514591217041, 13106]]],
  [[[0.008751392364501953, 672], [0.023267507553100586, 6312]],
   [[0.02171802520751953, 4400], [0.07539701461791992, 23845]],
   [[0.0574953556060791, 14637], [0.17096233367919922, 49070]]]],
 [[[[0.1249995231628418, 116], [0.1600053310394287, 437]],
   [[0.1589818000793457, 424], [0.17643022537231445, 1313]],
   [[0.1768794059753418, 998], [0.17255425453186035, 2916]]],
  [[[0.2143995761871338, 255], [0.3462491035461426, 1710]],
   [[0.32443714141845703, 1479], [0.3667898178100586, 5438]],
   [[0.3663046360015869, 4535], [0.37824249267578125, 12188]]],
  [[[0.38141417503356934, 657], [0.5702736377716064, 6