In [1]:
from othello import Othello
import numpy as np
from tqdm import tqdm

In [2]:
def pos_to_token(i,j):
    out = 8*i + j
    if out > 36:
        out -= 4
    elif out > 28:
        out -= 2
    return out

In [3]:
#seed 9 for train, 10 for test

def generate_data(data_size, seed):
    rng = np.random.default_rng(seed)
    moves = np.zeros((data_size,60), dtype=int)
    for run in tqdm(range(data_size)):
        game = Othello()
        for i in range(60):
            valid_moves = game.valid_moves()
            if not valid_moves:
                game.move_pass()
                moves[run,i]=60
            else:
                move = rng.choice(valid_moves)
                game.move(*move)
                moves[run,i]=pos_to_token(*move)

    return moves
#np.save('mini_training_moves.npy', moves)
#loaded = np.load('test_moves.npy')

In [4]:
def run_game(run):
    game = Othello()
    moves = np.zeros((60,), dtype=int)
    rng = np.random.default_rng(9 + 25*run)
    for i in range(60):
        
        valid_moves = game.valid_moves()
        if not valid_moves:
            game.move_pass()
            moves[i]=60
        else:
            move = rng.choice(valid_moves)
            game.move(*move)
            moves[i]=pos_to_token(*move)
    return moves

In [5]:
from multiprocessing import Pool

pool = Pool()

results = list(tqdm(pool.imap_unordered(run_game, range(1048576)), total = 1048576))
pool.close()
pool.join()

100%|██████████| 1048576/1048576 [17:13<00:00, 1014.30it/s]


In [7]:
np.array(results)

(1048576, 60)

In [8]:
np.save('large_training.npy', np.array(results))

In [9]:
np.save('large_val.npy', generate_data(4096, 1000))

100%|██████████| 4096/4096 [00:51<00:00, 79.28it/s]
