In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
from tqdm.notebook import tqdm, trange

In [11]:
%load_ext autoreload
%autoreload 2

from hex_show import *
from hex_pathfinding import *
from hex_helpers import *
from hex_minmax import *
from hex_train_helpers import *
from hex_transfer import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Selfplay

In [24]:
from hex_minmax import *
from hex_nnet import *
from hex_mcts import *

bsize = 9
N = 9


import pickle

boards = None
def selfplay_batched(ai, ngames=1000, verbose=0):
    
    bnum = ai.nparallel
    game_records = []
    
    ai.eta = 0.3

    # Iterator with tqdm??
    if verbose>=1:
        pbar = tqdm(total=ngames)
        
    def newboard():
        part = np.random.rand()*.5
        frame = np.random.choice([2,1,1,0,0,0])
        while True:
            board = filledboard(bsize, part, frame=frame)
            board = filledboard(bsize, 2, frame=frame)
            if not winner(board):
                return board
            
    global boards
    if isinstance(boards, type(None)):
        boards = [ newboard() for _ in range(bnum) ]
    turns = [ getturn(brd) for brd in boards ]
    records = [ [] for _ in range(bnum) ]
    
    
    completedgames = 0
    while completedgames < ngames:
        
        check_new_model(net, 'net_temp.dat')
        ai.clear()
        moves = ai.findmove(boards)
        for b in range(bnum):
            turn = turns[b]
            x, y = moves[b]
            records[b] += [(boards[b].copy(), (x, y), turn)] if turn > 0 else \
                          [(-boards[b].T.copy(), (y, x), turn)]
            boards[b][x, y] = turns[b]
            turns[b] *= -1
            
            won = winner(boards[b])
            if won:
                game_records += [ (b, m, t*won) for (b, m, t) in records[b] ]
                completedgames += 1
                records[b] = []
                boards[b] = newboard()
                turns[b] = getturn(boards[b])
                if verbose>=1:
                    pbar.update(1)
    
    if verbose>=1:
        pbar.close()
        
    return game_records




In [128]:
#newgames = selfplay(MultiMCTS(nparallel=1, nsearches=50, net=net), ngames=200, verbose=1)  # 25 min für 200 games!
#newgames = selfplay(MultiMCTS(nparallel=10, nsearches=50, net=net), ngames=200, verbose=1)  #  5 min für 200 games! batching verschlechtert spielperformance ein wenig!
#newgames = selfplay_batched(BatchMCTS(nparallel=20, nsearches=50, net=net), ngames=200, verbose=1) # 3 min für 200 games!

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))




In [28]:
from hex_net9x9 import NetAdaptive as Net

net = Net()

In [None]:

while True:

    game_records = selfplay_batched(BatchMCTS(nparallel=20, nsearches=150, net=net), ngames=100, verbose=1)
    save_selfplay(game_records)

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 04:22:17!

Loaded new model parameters at time 04:24:54!
Loaded new model parameters at time 04:24:59!

Saved 2710 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 04:38:42!

Saved 2923 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 04:48:02!
Loaded new model parameters at time 04:48:54!
Loaded new model parameters at time 04:50:09!
Loaded new model parameters at time 04:50:59!
Loaded new model parameters at time 04:51:47!
Loaded new model parameters at time 04:52:38!
Loaded new model parameters at time 04:53:30!
Loaded new model parameters at time 04:56:29!

Saved 2658 boards to temp/selfplay2.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 05:01:29!
Loaded new model parameters at time 05:07:00!

Saved 2783 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Saved 2811 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 05:19:58!

Saved 2820 boards to temp/selfplay3.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 05:32:33!

Saved 2611 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 05:44:35!

Saved 2784 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 05:57:21!

Saved 2622 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 06:09:07!

Saved 2644 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 06:20:41!

Saved 2460 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 06:32:11!

Saved 2681 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 06:43:29!

Saved 2796 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Saved 2833 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 06:55:31!

Saved 2597 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 07:07:12!

Saved 2633 boards to temp/selfplay2.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 07:19:06!

Saved 2754 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 07:29:59!

Saved 2495 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 07:42:42!

Saved 2791 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 07:52:30!

Saved 2645 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:04:21!

Saved 2490 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:15:55!

Saved 2383 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:25:37!

Saved 2752 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:36:44!

Saved 2465 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:46:28!

Saved 2423 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 08:57:10!

Saved 2483 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 09:07:17!

Saved 2567 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 09:19:02!

Saved 2673 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 09:30:16!

Saved 2709 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 09:41:04!

Saved 2488 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 09:52:56!

Saved 2256 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Saved 2435 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 10:05:26!

Saved 2516 boards to temp/selfplay2.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Couldnt load new model parameters at time 10:17:18!
Loaded new model parameters at time 10:17:22!

Saved 2542 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 10:29:14!

Saved 2277 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 10:41:11!

Saved 2521 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Saved 2337 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 10:52:20!

Saved 2493 boards to temp/selfplay0.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 11:04:25!

Saved 2663 boards to temp/selfplay2.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 11:16:44!

Saved 2246 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 11:29:16!

Saved 2288 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Saved 2193 boards to temp/selfplay1.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 11:41:05!

Saved 2374 boards to temp/selfplay3.dat.


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Loaded new model parameters at time 11:52:46!
