In [1]:
import os
import random
import itertools
import math

from __future__ import print_function
import pickle
import trueskill
from trueskill import Rating, quality_1vs1, rate_1vs1, rate

from game import Board, Game
from mcts_pure import MCTSPlayer as MCTS_Pure
from mcts_alphaZero import MCTSPlayer
from policy_value_net_numpy import PolicyValueNetNumpy
from policy_value_net_pytorch import PolicyValueNet  # Pytorch

In [2]:
print(os.path)

<module 'posixpath' from '/home/qz99@drexel.edu/.pyenv/versions/3.9.6/lib/python3.9/posixpath.py'>


In [3]:
model_path = os.getcwd() + '/PyTorch_models'

In [4]:
for root, dirs, files in os.walk(model_path):
    print(files)
#     print(type(files))
#     print(len(files))

['best_policy_885_pt_600.model', 'best_policy_885_pt_25300.model', 'best_policy_885_pt_14050.model', 'best_policy_885_pt_23500.model', 'best_policy_885_pt_10500.model', 'best_policy_885_pt_19600.model', 'best_policy_885_pt_11000.model', 'best_policy_885_pt_19250.model', 'best_policy_885_pt_18450.model', 'best_policy_885_pt_50.model', 'best_policy_885_pt_17750.model', 'best_policy_885_pt_3000.model', 'best_policy_885_pt_18050.model', 'best_policy_885_pt_12950.model', 'best_policy_885_pt_5200.model']
[]


In [5]:
def readModel():
    model_path = os.getcwd() + '/PyTorch_models'
    model_list = [files for root, dirs, files in os.walk(model_path)][0] # have two lists, second is empty
    model_dict = {}
    for model in model_list:
        model_dict['model' + '_' + model.split('.')[0].split('_')[-1]] = model
    return model_dict

In [6]:
model_dict = readModel()

In [7]:
model_dict

{'model_600': 'best_policy_885_pt_600.model',
 'model_25300': 'best_policy_885_pt_25300.model',
 'model_14050': 'best_policy_885_pt_14050.model',
 'model_23500': 'best_policy_885_pt_23500.model',
 'model_10500': 'best_policy_885_pt_10500.model',
 'model_19600': 'best_policy_885_pt_19600.model',
 'model_11000': 'best_policy_885_pt_11000.model',
 'model_19250': 'best_policy_885_pt_19250.model',
 'model_18450': 'best_policy_885_pt_18450.model',
 'model_50': 'best_policy_885_pt_50.model',
 'model_17750': 'best_policy_885_pt_17750.model',
 'model_3000': 'best_policy_885_pt_3000.model',
 'model_18050': 'best_policy_885_pt_18050.model',
 'model_12950': 'best_policy_885_pt_12950.model',
 'model_5200': 'best_policy_885_pt_5200.model'}

In [8]:
def compete(model_file_1, model_file_2): # take two arguments, model_file_1, model_file_2
    
    # build the board
    n = 5
    width, height = 8, 8

    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)
        
        # for pyTorch
        best_policy_1 = PolicyValueNet(width, height, model_file_1, use_gpu=True)
        mcts_player_1 = MCTSPlayer(best_policy_1.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance
        
        best_policy_2 = PolicyValueNet(width, height, model_file_2, use_gpu=True)
        mcts_player_2 = MCTSPlayer(best_policy_2.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance

        winner = game.start_play(mcts_player_1, mcts_player_2, start_player=1, is_shown=0)
        return winner
#         print('winner is {}'.format(winner))
            
    except KeyboardInterrupt:
        print('\n\rquit')

In [9]:
model_dict = readModel()

In [10]:
model_dict

{'model_600': 'best_policy_885_pt_600.model',
 'model_25300': 'best_policy_885_pt_25300.model',
 'model_14050': 'best_policy_885_pt_14050.model',
 'model_23500': 'best_policy_885_pt_23500.model',
 'model_10500': 'best_policy_885_pt_10500.model',
 'model_19600': 'best_policy_885_pt_19600.model',
 'model_11000': 'best_policy_885_pt_11000.model',
 'model_19250': 'best_policy_885_pt_19250.model',
 'model_18450': 'best_policy_885_pt_18450.model',
 'model_50': 'best_policy_885_pt_50.model',
 'model_17750': 'best_policy_885_pt_17750.model',
 'model_3000': 'best_policy_885_pt_3000.model',
 'model_18050': 'best_policy_885_pt_18050.model',
 'model_12950': 'best_policy_885_pt_12950.model',
 'model_5200': 'best_policy_885_pt_5200.model'}

In [11]:
test_models = [[model, Rating()] for model in model_dict.values()]

In [12]:
test_models

[['best_policy_885_pt_600.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=25

In [13]:
model_1 = random.choice(test_models)

In [14]:
model_1

['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.000, sigma=8.333)]

In [15]:
model_2 = sorted([(quality_1vs1(model_1[1], m[1]), m) for m in test_models if m[0] != model_1[0]])[-1][1]

In [16]:
model_2

['best_policy_885_pt_600.model', trueskill.Rating(mu=25.000, sigma=8.333)]

In [17]:
def test():
    model_dict = readModel()
    test_models = [[model, Rating()] for model in model_dict.values()]
    for _ in range(5):
        model_1 = random.choice(test_models)
        model_2 = sorted([(quality_1vs1(model_1[1], m[1]), m) for m in test_models if m[0] != model_1[0]])[-1][1]
        
#         model_1 = random.choice(list(model_dict.values()))
#         model_2 = random.choice(list(m2 for m2 in model_dict.values() if m2 != m1))
        #print(model_file_1, model_file_2)
        
        model_file_1 = os.getcwd() + '/PyTorch_models/' + model_1[0]
        model_file_2 = os.getcwd() + '/PyTorch_models/' + model_2[0]
        
        winner = compete(model_file_1, model_file_2)
        game_result = []
        game_result.append(winner)
        model1_wins = winner == 1
        model2_wins = winner == 2
        tie = winner == -1

        if model1_wins:
            model_1[1], model_2[1] = rate_1vs1(model_1[1], model_2[1])
        if model2_wins:
            model_2[1], model_1[1] = rate_1vs1(model_2[1], model_1[1])
        if tie:
            model_1[1], model_2[1] = rate_1vs1(model_1[1], model_2[1], drawn=True)
        
        print('game {} completed'.format(_))
        
    return test_models, game_result

In [18]:
test()

  x_act = F.log_softmax(self.act_fc1(x_act))


game 0 completed
game 1 completed
game 2 completed
game 3 completed
game 4 completed


([['best_policy_885_pt_600.model', trueskill.Rating(mu=25.000, sigma=3.855)],
  ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.000, sigma=4.610)],
  ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.000, sigma=6.036)],
  ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.000, sigma=5.460)],
  ['best_policy_885_pt_19600.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_11000.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.000, sigma=6.458)],
  ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.000, sigma=8.333)],
  ['best_policy_885_pt_18050.model', trueskill

### Testing algo

In [15]:
def winProb(model_file_1, model_file_2):
    delta_mu = sum(r.mu for r in model_file_1) - sum(r.mu for r in model_file_2)
    sum_sigma = sum(r.sigma ** 2 for r in itertools.chain(model_file_1, model_file_2))
    size = len(model_file_1) + len(model_file_2)
    denom = math.sqrt(size * (1 * 1) + sum_sigma)
    ts = trueskill.global_env()
    return ts.cdf(delta_mu / denom)

In [16]:
for _ in range(100):
    model_file_1 = random.choice(test_models)
    model_file_2 = sorted([(quality_1vs1(model_file_1[1], m[1]), m) for m in test_models if m[0] != model_file_1[0]])[-1][1]
#     print(model_file_1[0], model_file_2[0])
    model1_wins = random.random() < winProb([model_file_1[1]], [model_file_2[1]])
#     print(model1_wins)
    if model1_wins:
        model_file_1[1], model_file_2[1] = rate_1vs1(model_file_1[1], model_file_2[1])
    else:
        model_file_2[1], model_file_1[1] = rate_1vs1(model_file_2[1], model_file_1[1])

In [19]:
def run():
    n = 5
    width, height = 8, 8
    model_file_1 = os.getcwd() + '/PyTorch_models/best_policy_885_pt_50.model'
    model_file_2 = os.getcwd() + '/PyTorch_models/best_policy_885_pt_10500.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)
        
        # for pyTorch
        best_policy_1 = PolicyValueNet(width, height, model_file_1, use_gpu=True)
        mcts_player_1 = MCTSPlayer(best_policy_1.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance
        
        best_policy_2 = PolicyValueNet(width, height, model_file_2, use_gpu=True)
        mcts_player_2 = MCTSPlayer(best_policy_2.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance

        # set start_player=0 for human first  
        winner = game.start_play(mcts_player_1, mcts_player_2, start_player=1, is_shown=0)
#             print('winner is {}'.format(winner))
        return winner
    except KeyboardInterrupt:
        print('\n\rquit')

In [20]:
run()

2