In [1]:
import os
import random
import itertools
import math

from __future__ import print_function
import pickle
import trueskill
from trueskill import Rating, quality_1vs1, rate_1vs1, rate

from game import Board, Game
from mcts_pure import MCTSPlayer as MCTS_Pure
from mcts_alphaZero import MCTSPlayer
from policy_value_net_numpy import PolicyValueNetNumpy
from policy_value_net_pytorch import PolicyValueNet  # Pytorch

In [2]:
print(os.path)

<module 'posixpath' from '/home/qz99@drexel.edu/.pyenv/versions/3.9.6/lib/python3.9/posixpath.py'>


In [3]:
model_path = os.getcwd() + '/PyTorch_models'

In [4]:
for root, dirs, files in os.walk(model_path):
    print(files)
#     print(type(files))
#     print(len(files))

['best_policy_885_pt_600.model', 'best_policy_885_pt_25300.model', 'best_policy_885_pt_14050.model', 'best_policy_885_pt_23500.model', 'best_policy_885_pt_10500.model', 'best_policy_885_pt_19600.model', 'best_policy_885_pt_11000.model', 'best_policy_885_pt_19250.model', 'best_policy_885_pt_18450.model', 'best_policy_885_pt_50.model', 'best_policy_885_pt_17750.model', 'best_policy_885_pt_3000.model', 'best_policy_885_pt_18050.model', 'best_policy_885_pt_12950.model', 'best_policy_885_pt_5200.model']
[]


In [5]:
def readModel():
    model_path = os.getcwd() + '/PyTorch_models'
    model_list = [files for root, dirs, files in os.walk(model_path)][0] # have two lists, second is empty
    model_dict = {}
    for model in model_list:
        model_dict['model' + '_' + model.split('.')[0].split('_')[-1]] = model
    return model_dict

In [6]:
model_dict = readModel()

In [7]:
model_dict

{'model_600': 'best_policy_885_pt_600.model',
 'model_25300': 'best_policy_885_pt_25300.model',
 'model_14050': 'best_policy_885_pt_14050.model',
 'model_23500': 'best_policy_885_pt_23500.model',
 'model_10500': 'best_policy_885_pt_10500.model',
 'model_19600': 'best_policy_885_pt_19600.model',
 'model_11000': 'best_policy_885_pt_11000.model',
 'model_19250': 'best_policy_885_pt_19250.model',
 'model_18450': 'best_policy_885_pt_18450.model',
 'model_50': 'best_policy_885_pt_50.model',
 'model_17750': 'best_policy_885_pt_17750.model',
 'model_3000': 'best_policy_885_pt_3000.model',
 'model_18050': 'best_policy_885_pt_18050.model',
 'model_12950': 'best_policy_885_pt_12950.model',
 'model_5200': 'best_policy_885_pt_5200.model'}

In [8]:
def compete(model_file_1, model_file_2):
    
    '''compare any two models'''
    
    # build the board
    n = 5
    width, height = 8, 8

    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)
        
        # for pyTorch
        best_policy_1 = PolicyValueNet(width, height, model_file_1, use_gpu=True)
        mcts_player_1 = MCTSPlayer(best_policy_1.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance
        
        best_policy_2 = PolicyValueNet(width, height, model_file_2, use_gpu=True)
        mcts_player_2 = MCTSPlayer(best_policy_2.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance

        winner = game.start_play(mcts_player_1, mcts_player_2, start_player=1, is_shown=0)
        return winner
#         print('winner is {}'.format(winner))
            
    except KeyboardInterrupt:
        print('\n\rquit')

In [9]:
# test_models = [[model, Rating()] for model in model_dict.keys()]

In [10]:
test_models = [[0, random.random(), model_name, Rating()] for model_name in model_dict.keys()]

In [11]:
test_models = sorted(test_models)

In [12]:
test_models

[[0,
  0.0072591591706265834,
  'model_14050',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.0692102197236486,
  'model_12950',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.2327762628980069,
  'model_17750',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.37283039810491936,
  'model_50',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.407879445419541,
  'model_25300',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.43574199648558243,
  'model_18050',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.4508288586393796,
  'model_10500',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.5404522543867997,
  'model_600',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.617979257402781,
  'model_18450',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.7106370030031306,
  'model_11000',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.7429738891444317,
  'model_19250',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.75111750863787

In [14]:
# model_1 = random.choice(test_models)

In [15]:
# model_1

In [16]:
# model_2 = sorted([(quality_1vs1(model_1[1], m[1]), m) for m in test_models if m[0] != model_1[0]])[-1][1]

In [17]:
# model_2

In [18]:
def test():
    model_dict = readModel()
    test_models = [[0, random.random(), model_name, Rating()] for model_name in model_dict.keys()]
    
    for i in range(5):
        
#         random.shuffle(test_models)
        test_models = sorted(test_models)
        model_1 = test_models[0]
        model_2 = test_models[1]
#         model_1 = random.choice(test_models)
#         model_2 = sorted([(quality_1vs1(model_1[1], m[1]), m) for m in test_models if m[0] != model_1[0]])[-1][1]
        
        # count the number of games each model has played
        model_1[0] += 1
        model_2[0] += 1
        
        # find relevant model files
        model_file_1 = os.getcwd() + '/PyTorch_models/' + model_dict[model_1[2]]
        model_file_2 = os.getcwd() + '/PyTorch_models/' + model_dict[model_2[2]]
        
        winner = compete(model_file_1, model_file_2)
#         model1_wins = winner == 1
#         model2_wins = winner == 2
#         tie = winner == -1

        if winner == 1:
            model_1[-1], model_2[-1] = rate_1vs1(model_1[-1], model_2[-1])
        elif winner == 2:
            model_2[-1], model_1[-1] = rate_1vs1(model_2[-1], model_1[-1])
        elif winner == -1:
            model_1[-1], model_2[-1] = rate_1vs1(model_1[-1], model_2[-1], drawn=True)
        else:
            print("unknown winner type ", winner)
        
        print('game {} completed: {} vs {}, winner is {}'.format(i, model_1[2], model_2[2], winner))
    
    print(test_models)
    return test_models

In [19]:
test()

  x_act = F.log_softmax(self.act_fc1(x_act))


game 0 completed: model_10500 vs model_3000, winner is 2
game 1 completed: model_18450 vs model_12950, winner is 1
game 2 completed: model_17750 vs model_23500, winner is 2
game 3 completed: model_18050 vs model_19250, winner is -1
game 4 completed: model_5200 vs model_50, winner is -1
[[1, 0.5204261977481448, 'model_5200', trueskill.Rating(mu=25.000, sigma=6.458)], [1, 0.5615293014988033, 'model_50', trueskill.Rating(mu=25.000, sigma=6.458)], [0, 0.6422192977260516, 'model_19600', trueskill.Rating(mu=25.000, sigma=8.333)], [0, 0.7382583343403695, 'model_14050', trueskill.Rating(mu=25.000, sigma=8.333)], [0, 0.766139795054323, 'model_600', trueskill.Rating(mu=25.000, sigma=8.333)], [0, 0.9314452719639532, 'model_11000', trueskill.Rating(mu=25.000, sigma=8.333)], [0, 0.9346586206471538, 'model_25300', trueskill.Rating(mu=25.000, sigma=8.333)], [1, 0.06216122152776349, 'model_10500', trueskill.Rating(mu=20.604, sigma=7.171)], [1, 0.1061179760372215, 'model_3000', trueskill.Rating(mu=29.3

[[1,
  0.5204261977481448,
  'model_5200',
  trueskill.Rating(mu=25.000, sigma=6.458)],
 [1, 0.5615293014988033, 'model_50', trueskill.Rating(mu=25.000, sigma=6.458)],
 [0,
  0.6422192977260516,
  'model_19600',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.7382583343403695,
  'model_14050',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0, 0.766139795054323, 'model_600', trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.9314452719639532,
  'model_11000',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [0,
  0.9346586206471538,
  'model_25300',
  trueskill.Rating(mu=25.000, sigma=8.333)],
 [1,
  0.06216122152776349,
  'model_10500',
  trueskill.Rating(mu=20.604, sigma=7.171)],
 [1,
  0.1061179760372215,
  'model_3000',
  trueskill.Rating(mu=29.396, sigma=7.171)],
 [1,
  0.23768319461551957,
  'model_18450',
  trueskill.Rating(mu=29.396, sigma=7.171)],
 [1,
  0.24657046914822955,
  'model_12950',
  trueskill.Rating(mu=20.604, sigma=7.171)],
 [1,
  0.27213421700941043,
  'model_1

### Rsults from terminal

In [14]:
tochee1_result = [['best_policy_885_pt_600.model', trueskill.Rating(mu=27.427, sigma=2.842)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=15.621, sigma=2.088)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=26.730, sigma=2.513)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=27.328, sigma=1.640)], ['best_policy_885_pt_10500.model', trueskill.Rating(mu=26.291, sigma=2.292)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=26.679, sigma=3.157)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=15.585, sigma=5.268)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=18.190, sigma=2.473)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=27.981, sigma=1.501)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=26.969, sigma=0.936)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=23.381, sigma=4.099)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=26.799, sigma=2.373)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=20.567, sigma=2.358)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=26.906, sigma=2.613)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=19.208, sigma=1.484)]]

In [15]:
tochee1_result

[['best_policy_885_pt_600.model', trueskill.Rating(mu=27.427, sigma=2.842)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=15.621, sigma=2.088)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=26.730, sigma=2.513)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=27.328, sigma=1.640)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=26.291, sigma=2.292)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=26.679, sigma=3.157)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=15.585, sigma=5.268)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=18.190, sigma=2.473)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=27.981, sigma=1.501)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=26.969, sigma=0.936)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=23.381, sigma=4.099)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=26.799, sigma=2.373)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=20

In [16]:
pc_result = [['best_policy_885_pt_10500.model', trueskill.Rating(mu=22.704, sigma=2.076)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=17.412, sigma=1.650)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.548, sigma=3.220)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.741, sigma=1.002)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.896, sigma=2.279)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=27.748, sigma=2.179)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.857, sigma=2.249)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=21.463, sigma=4.564)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=18.651, sigma=2.223)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=24.059, sigma=1.620)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=17.885, sigma=2.409)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=26.476, sigma=1.695)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=21.351, sigma=3.800)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=25.924, sigma=1.888)], ['best_policy_885_pt_600.model', trueskill.Rating(mu=35.422, sigma=3.303)]]

In [17]:
pc_result

[['best_policy_885_pt_10500.model', trueskill.Rating(mu=22.704, sigma=2.076)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=17.412, sigma=1.650)],
 ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.548, sigma=3.220)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.741, sigma=1.002)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.896, sigma=2.279)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=27.748, sigma=2.179)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.857, sigma=2.249)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=21.463, sigma=4.564)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=18.651, sigma=2.223)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=24.059, sigma=1.620)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=17.885, sigma=2.409)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=26.476, sigma=1.695)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=

In [18]:
tochee1_result2 = [['best_policy_885_pt_600.model', trueskill.Rating(mu=25.519, sigma=0.887)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.536, sigma=1.961)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.569, sigma=2.199)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.443, sigma=3.058)], ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.438, sigma=2.438)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.584, sigma=2.817)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=9.636, sigma=3.690)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.595, sigma=2.273)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.593, sigma=2.119)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.570, sigma=2.844)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=18.731, sigma=2.595)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.515, sigma=2.388)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=19.871, sigma=2.591)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.592, sigma=1.398)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=35.190, sigma=3.007)]]

In [19]:
tochee1_result2

[['best_policy_885_pt_600.model', trueskill.Rating(mu=25.519, sigma=0.887)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.536, sigma=1.961)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.569, sigma=2.199)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.443, sigma=3.058)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.438, sigma=2.438)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.584, sigma=2.817)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=9.636, sigma=3.690)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.595, sigma=2.273)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.593, sigma=2.119)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.570, sigma=2.844)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=18.731, sigma=2.595)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.515, sigma=2.388)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=19.

In [20]:
pc_result2 = [['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.035, sigma=2.602)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=10.202, sigma=3.687)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.069, sigma=3.210)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.129, sigma=1.418)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=18.876, sigma=1.740)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=20.555, sigma=1.793)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.059, sigma=2.600)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.170, sigma=2.237)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.376, sigma=2.861)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.130, sigma=2.084)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.149, sigma=3.390)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.180, sigma=2.169)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.114, sigma=2.154)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=34.701, sigma=3.126)], ['best_policy_885_pt_600.model', trueskill.Rating(mu=25.101, sigma=0.917)]]

In [21]:
pc_result2

[['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.035, sigma=2.602)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=10.202, sigma=3.687)],
 ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.069, sigma=3.210)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.129, sigma=1.418)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=18.876, sigma=1.740)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=20.555, sigma=1.793)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.059, sigma=2.600)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.170, sigma=2.237)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.376, sigma=2.861)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.130, sigma=2.084)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.149, sigma=3.390)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.180, sigma=2.169)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=

In [23]:
tochee1_result3 = [['best_policy_885_pt_600.model', trueskill.Rating(mu=28.700, sigma=3.779)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.878, sigma=1.654)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.456, sigma=3.410)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=21.285, sigma=1.414)], ['best_policy_885_pt_10500.model', trueskill.Rating(mu=18.545, sigma=3.992)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=25.948, sigma=2.315)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=29.108, sigma=1.982)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.863, sigma=1.340)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=17.783, sigma=3.234)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=18.001, sigma=3.614)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=17.162, sigma=2.249)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=20.095, sigma=1.618)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=17.366, sigma=2.946)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=17.201, sigma=1.279)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=28.795, sigma=2.718)]]

In [24]:
tochee1_result3

[['best_policy_885_pt_600.model', trueskill.Rating(mu=28.700, sigma=3.779)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.878, sigma=1.654)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.456, sigma=3.410)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=21.285, sigma=1.414)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=18.545, sigma=3.992)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=25.948, sigma=2.315)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=29.108, sigma=1.982)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.863, sigma=1.340)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=17.783, sigma=3.234)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=18.001, sigma=3.614)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=17.162, sigma=2.249)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=20.095, sigma=1.618)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=17

In [26]:
tochee1_result4 = [['best_policy_885_pt_600.model', trueskill.Rating(mu=30.239, sigma=1.691)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=26.272, sigma=1.323)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=16.370, sigma=2.467)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.669, sigma=2.297)], ['best_policy_885_pt_10500.model', trueskill.Rating(mu=23.250, sigma=1.833)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=21.303, sigma=2.508)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=17.358, sigma=2.388)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=26.334, sigma=1.526)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=12.902, sigma=2.813)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=21.223, sigma=1.666)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=15.859, sigma=3.032)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=30.217, sigma=1.405)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=13.071, sigma=3.014)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=21.187, sigma=3.217)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=30.181, sigma=2.136)]]

In [27]:
tochee1_result4

[['best_policy_885_pt_600.model', trueskill.Rating(mu=30.239, sigma=1.691)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=26.272, sigma=1.323)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=16.370, sigma=2.467)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.669, sigma=2.297)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=23.250, sigma=1.833)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=21.303, sigma=2.508)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=17.358, sigma=2.388)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=26.334, sigma=1.526)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=12.902, sigma=2.813)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=21.223, sigma=1.666)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=15.859, sigma=3.032)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=30.217, sigma=1.405)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=13

In [28]:
tochee1_result5 = [['best_policy_885_pt_600.model', trueskill.Rating(mu=25.154, sigma=0.928)], ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.180, sigma=2.696)], ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.137, sigma=1.914)], ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.135, sigma=2.824)], ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.158, sigma=2.242)], ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.930, sigma=1.979)], ['best_policy_885_pt_11000.model', trueskill.Rating(mu=16.874, sigma=2.349)], ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.171, sigma=2.398)], ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.190, sigma=2.136)], ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.152, sigma=2.208)], ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.000, sigma=8.333)], ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.171, sigma=2.263)], ['best_policy_885_pt_18050.model', trueskill.Rating(mu=18.070, sigma=2.554)], ['best_policy_885_pt_12950.model', trueskill.Rating(mu=25.178, sigma=1.925)], ['best_policy_885_pt_5200.model', trueskill.Rating(mu=32.229, sigma=1.974)]]

In [29]:
tochee1_result5

[['best_policy_885_pt_600.model', trueskill.Rating(mu=25.154, sigma=0.928)],
 ['best_policy_885_pt_25300.model', trueskill.Rating(mu=25.180, sigma=2.696)],
 ['best_policy_885_pt_14050.model', trueskill.Rating(mu=25.137, sigma=1.914)],
 ['best_policy_885_pt_23500.model', trueskill.Rating(mu=25.135, sigma=2.824)],
 ['best_policy_885_pt_10500.model', trueskill.Rating(mu=25.158, sigma=2.242)],
 ['best_policy_885_pt_19600.model', trueskill.Rating(mu=30.930, sigma=1.979)],
 ['best_policy_885_pt_11000.model', trueskill.Rating(mu=16.874, sigma=2.349)],
 ['best_policy_885_pt_19250.model', trueskill.Rating(mu=25.171, sigma=2.398)],
 ['best_policy_885_pt_18450.model', trueskill.Rating(mu=25.190, sigma=2.136)],
 ['best_policy_885_pt_50.model', trueskill.Rating(mu=25.152, sigma=2.208)],
 ['best_policy_885_pt_17750.model', trueskill.Rating(mu=25.000, sigma=8.333)],
 ['best_policy_885_pt_3000.model', trueskill.Rating(mu=25.171, sigma=2.263)],
 ['best_policy_885_pt_18050.model', trueskill.Rating(mu=18

### Testing algo

In [15]:
def winProb(model_file_1, model_file_2):
    delta_mu = sum(r.mu for r in model_file_1) - sum(r.mu for r in model_file_2)
    sum_sigma = sum(r.sigma ** 2 for r in itertools.chain(model_file_1, model_file_2))
    size = len(model_file_1) + len(model_file_2)
    denom = math.sqrt(size * (1 * 1) + sum_sigma)
    ts = trueskill.global_env()
    return ts.cdf(delta_mu / denom)

In [16]:
for _ in range(100):
    model_file_1 = random.choice(test_models)
    model_file_2 = sorted([(quality_1vs1(model_file_1[1], m[1]), m) for m in test_models if m[0] != model_file_1[0]])[-1][1]
#     print(model_file_1[0], model_file_2[0])
    model1_wins = random.random() < winProb([model_file_1[1]], [model_file_2[1]])
#     print(model1_wins)
    if model1_wins:
        model_file_1[1], model_file_2[1] = rate_1vs1(model_file_1[1], model_file_2[1])
    else:
        model_file_2[1], model_file_1[1] = rate_1vs1(model_file_2[1], model_file_1[1])

In [19]:
def run():
    n = 5
    width, height = 8, 8
    model_file_1 = os.getcwd() + '/PyTorch_models/best_policy_885_pt_50.model'
    model_file_2 = os.getcwd() + '/PyTorch_models/best_policy_885_pt_10500.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)
        
        # for pyTorch
        best_policy_1 = PolicyValueNet(width, height, model_file_1, use_gpu=True)
        mcts_player_1 = MCTSPlayer(best_policy_1.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance
        
        best_policy_2 = PolicyValueNet(width, height, model_file_2, use_gpu=True)
        mcts_player_2 = MCTSPlayer(best_policy_2.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance

        # set start_player=0 for human first  
        winner = game.start_play(mcts_player_1, mcts_player_2, start_player=1, is_shown=0)
#             print('winner is {}'.format(winner))
        return winner
    except KeyboardInterrupt:
        print('\n\rquit')

In [20]:
run()

2