In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
# torch.manual_seed(0)
# np.random.seed(0)

torch.device("cuda")

device(type='cuda')

In [3]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [4]:
# American checkers: wikipedia.org/wiki/English_draughts
# 	on a 8x8 checkerboard, both players start with 12 pawn
#	Black plays the first move
#	all pieces can only move and capture diagonally
#	pawn can only move/capture diagonally forward
#	kings can move/capture in any diagonal direction
#	if a man reaches the other side of the board, the turn ends and it becomes a king
#	captures are made by moving any piece diagonally over an opponent's
#	if a capture can be made, it must be taken
#	mutliple captures can be made in a single turn and with a single piece
#	the game ends when a players captures all the opponent's pieces
#	a player also whens when the opponent can not make a legal move

#	example board: 
#	/b/b/b/b	b/w = Black/White man {1, -1}
#	b/b/b/b/	B/W = Black/White king {3, -3}
#	/b/b/b/b	_ = empty square {0}
#	_/_/_/_/	/ = unusable square
#	/_/_/_/_
#	w/w/w/w/
#	/w/w/w/w
#	w/w/w/w/	* since pieces only mmove diagonally, only 32 squares are used

# number of opponent pieces captured (max = 12)
def num_captured(board):
	return 12 - np.sum(board < 0)

def num_branches(board, x, y):
	count = 0
	if (board[x, y] >= 1 and x < 6):
		if (y < 6):
			if (board[x+1, y+1] < 0 and board[x+2, y+2] == 0):
				board[x+2, y+2] = board[x, y]
				board[x, y] = 0
				temp = board[x+1, y+1]
				board[x+1, y+1] = 0
				count += num_branches(board, x+2, y+2) + 1
				board[x+1, y+1] = temp
				board[x, y] = board[x+2, y+2]
				board[x+2, y+2] = 0
		if (y > 1):
			if (board[x+1, y-1] < 0 and board[x+2, y-2] == 0):
				board[x+2, y-2] = board[x, y]
				board[x, y] = 0
				temp = board[x+1, y-1]
				board[x+1, y-1] = 0
				count += num_branches(board, x+2, y-2) + 1
				board[x+1, y-1] = temp
				board[x, y] = board[x+2, y-2]
				board[x+2, y-2] = 0
	if (board[x, y] == 3 and x > 0):
		if (y < 6):
			if (board[x-1, y+1] < 0 and board[x-2, y+2] == 0):
				board[x-2, y+2] = board[x, y]
				board[x, y] = 0
				temp = board[x-1, y+1]
				board[x-1, y+1] = 0
				count += num_branches(board, x-2, y+2) + 1
				board[x-1, y+1] = temp
				board[x, y] = board[x-2, y+2]
				board[x-2, y+2] = 0
		if (y > 1):
			if (board[x-1, y-1] < 0 and board[x-2, y-2] == 0):
				board[x-2, y-2] = board[x, y]
				board[x, y] = 0
				temp = board[x-1, y-1]
				board[x-1, y-1] = 0
				count += num_branches(board, x-2, y-2) + 1
				board[x-1, y-1] = temp
				board[x, y] = board[x-2, y-2]
				board[x-2, y-2] = 0
	return count

def possible_moves(board):
	count = 0
	for i in range(0, 8):
		for j in range(0, 8):
			if (board[i, j] > 0):
				count += num_branches(board, i, j)
	if (count > 0):
		return count
	for i in range(0, 8):
		for j in range(0, 8):
			if (board[i, j] >= 1 and i < 7):
				if (j < 7):
					count += (board[i+1, j+1] == 0)
				if (j > 0):
					count += (board[i+1, j-1] == 0)
			if (board[i, j] == 3 and i > 0):
				if (j < 7):
					count += (board[i-1, j+1] == 0)
				elif (j > 0):
					count += (board[i-1, j-1] == 0)
	return count


def game_winner(board):
	if (np.sum(board < 0) == 0):
		return 1
	elif (np.sum(board > 0) == 0):
		return -1
	if (possible_moves(board) == 0):
		return -1
	elif (possible_moves(reverse(board)) == 0):
		return 1
	else:
		return 0

def at_enemy(board):
	count = 0
	for i in range(5, 8):
		count += np.sum(board[i] == 1) + np.sum(board[i] == 3)
	return count

def at_middle(board):
	count = 0
	for i in range(3, 5):
		count += np.sum(board[i] == 1) + np.sum(board[i] == 3)
	return count

def num_pawn(board):
	return np.sum(board == 1)

def num_kings(board):
	return np.sum(board == 3)

def capturables(board): # possible number of unsupported enemies
	count = 0
	for i in range(1, 7):
		for j in range(1, 7):
			if (board[i, j] < 0):
				count += (board[i+1, j+1] >= 0 and board[i+1, j-1] >= 0 and  board[i-1, j+1] >= 0 and board[i-1, j-1] >= 0)
	return count

def semicapturables(board): # number of own units with at least one support
	return (12 - uncapturables(board) - capturables(reverse(board)))

def uncapturables(board): # number of own units that can't be captured
	count = 0
	for i in range(1, 7):
		for j in range(1, 7):
			if (board[i, j] > 0):
				count += ((board[i+1, j+1] > 0 < board[i+1, j-1]) or (board[i-1, j+1] > 0 < board[i-1, j-1]) or (board[i+1, j+1] > 0 < board[i-1, j+1]) or (board[i+1, j-1] > 0 < board[i-1, j-1]))
	count += np.sum(board[0] == 1) + np.sum(board[0] == 3) + np.sum(board[1:7, 0] == 1) + np.sum(board[1:7, 0] == 3) + np.sum(board[7] == 1) + np.sum(board[7] == 3) + np.sum(board[1:7, 7] == 1) + np.sum(board[1:7, 7] == 3)
	return count

def reverse(board):
	b = -board
	b = np.fliplr(b)
	b = np.flipud(b)
	return b

def get_metrics(board): # returns [label, 10 labeling metrics]
	'''
	return [score, captured, potential, num_pawn, num_king, num_captureable, semi_capturable, num_uncaptureable, num_piece at middle, num_piece at enemy, game_state]
	'''
	b = expand(board)

	capped = num_captured(b) - num_captured(reverse(b))
	potential = possible_moves(b) - possible_moves(reverse(b))
	pawn = num_pawn(b) - num_pawn(reverse(b))
	kings = num_kings(b) - num_kings(reverse(b))
	caps = capturables(b) - capturables(reverse(b))
	semicaps = semicapturables(b) - semicapturables(reverse(b))
	uncaps = uncapturables(b) - uncapturables(reverse(b))
	mid = at_middle(b) - at_middle(reverse(b))
	far = at_enemy(b) - at_enemy(reverse(b))
	won = game_winner(b)

	score = (4*capped + potential + pawn + 3*kings + 3*caps + 2*semicaps + 2*uncaps + 2*mid + 3*far + 100*won)

	# score = mapping(score, -100, 100, 0, 10)

	score = sigmoid(score)

	return np.array([score, capped, potential, pawn, kings, caps, semicaps, uncaps, mid, far, won])

	# if score > 10:
		# score = 10
	
	# if score < 0:
		# score = 0

	# if score > 0:
	# 	return np.array([1, capped, potential, pawn, kings, caps, semicaps, uncaps, mid, far, won])

	# else:
	# 	return np.array([-1, capped, potential, pawn, kings, caps, semicaps, uncaps, mid, far, won])


def np_board():
	return np.array(get_board())

def get_board():
	return [1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,  -1, -1, -1, -1,  -1, -1, -1, -1,  -1, -1, -1, -1]

def expand(board):
	b = np.zeros((8, 8), dtype='b')
	for i in range(0, 8):
		if (i%2 == 0):
			b[i] = np.array([0, board[i*4], 0, board[i*4 + 1], 0, board[i*4 + 2], 0, board[i*4 + 3]])
		else:
			b[i] = np.array([board[i*4], 0, board[i*4 + 1], 0, board[i*4 + 2], 0, board[i*4 + 3], 0])
	return b

def compress(board):
	b = np.zeros((1,32), dtype='b')
	for i in range(0, 8):
		if (i%2 == 0):
			b[0, i*4 : i*4+4] = np.array([board[i, 1], board[i, 3], board[i, 5], board[i, 7]])
		else:
			b[0, i*4 : i*4+4] = np.array([board[i, 0], board[i, 2], board[i, 4], board[i, 6]])
	return b

def generate_branches(board, x, y):
	bb = compress(board)
	if (board[x, y] >= 1 and x < 6):
		temp_1 = board[x, y]
		if (y < 6):
			if (board[x+1, y+1] < 0 and board[x+2, y+2] == 0):
				board[x+2, y+2] = board[x, y]
				if (x+2 == 7):
					board[x+2, y+2] = 3
				temp = board[x+1, y+1]
				board[x+1, y+1] = 0
				if (board[x, y] != board[x+2, y+2]):
					board[x, y] = 0
					bb = np.vstack((bb, compress(board)))
				else:
					board[x, y] = 0
					bb = np.vstack((bb, generate_branches(board, x+2, y+2)))
				board[x+1, y+1] = temp
				board[x, y] = temp_1
				board[x+2, y+2] = 0
		if (y > 1):
			if (board[x+1, y-1] < 0 and board[x+2, y-2] == 0):
				board[x+2, y-2] = board[x, y]
				if (x+2 == 7):
					board[x+2, y-2] = 3
				temp = board[x+1, y-1]
				board[x+1, y-1] = 0
				if (board[x, y] != board[x+2, y-2]):
					board[x, y] = 0
					bb = np.vstack((bb, compress(board)))
				else:
					board[x, y] = 0
				bb = np.vstack((bb, generate_branches(board, x+2, y-2)))
				board[x+1, y-1] = temp
				board[x, y] = temp_1
				board[x+2, y-2] = 0
	if (board[x, y] == 3 and x > 0):
		if (y < 6):
			if (board[x-1, y+1] < 0 and board[x-2, y+2] == 0):
				board[x-2, y+2] = board[x, y]
				board[x, y] = 0
				temp = board[x-1, y+1]
				board[x-1, y+1] = 0
				bb = np.vstack((bb, generate_branches(board, x-2, y+2)))
				board[x-1, y+1] = temp
				board[x, y] = board[x-2, y+2]
				board[x-2, y+2] = 0
		if (y > 1):
			if (board[x-1, y-1] < 0 and board[x-2, y-2] == 0):
				board[x-2, y-2] = board[x, y]
				board[x, y] = 0
				temp = board[x-1, y-1]
				board[x-1, y-1] = 0
				bb = np.vstack((bb, generate_branches(board, x-2, y-2)))
				board[x-1, y-1] = temp
				board[x, y] = board[x-2, y-2]
				board[x-2, y-2] = 0
	return bb

def generate_next(board):
	bb = np.array([get_board()])
	for i in range(0, 8):
		for j in range(0, 8):
			if (board[i, j] > 0):
				bb = np.vstack((bb, generate_branches(board, i, j)[1:]))
	if (len(bb) > 1):
		return bb[1:]
	for i in range(0, 8):
		for j in range(0, 8):
			if (board[i, j] >= 1 and i < 7):
				temp = board[i, j]
				if (j < 7):
					if (board[i+1, j+1] == 0):
						board[i+1, j+1] = board[i, j]
						if (i+1 == 7):
							board[i+1, j+1] = 3
						board[i, j] = 0
						bb = np.vstack((bb, compress(board)))
						board[i, j] = temp
						board[i+1, j+1] = 0
				if (j > 0):
					if (board[i+1, j-1] == 0):
						board[i+1, j-1] = board[i, j]
						if (i+1 == 7):
							board[i+1, j-1] = 3
						board[i, j] = 0
						bb = np.vstack((bb, compress(board)))
						board[i, j] = temp
						board[i+1, j-1] = 0
			if (board[i, j] == 3 and i > 0):
				if (j < 7):
					if (board[i-1, j+1] == 0):
						board[i-1, j+1] = board[i, j]
						board[i, j] = 0
						bb = np.vstack((bb, compress(board)))
						board[i, j] = board[i-1, j+1]
						board[i-1, j+1] = 0
				elif (j > 0):
					if (board[i-1, j-1] == 0):
						board[i-1, j-1] = board[i, j]
						board[i, j] = 0
						bb = np.vstack((bb, compress(board)))
						board[i, j] = board[i-1, j-1]
						board[i-1, j-1] = 0
	return bb[1:]

def random_board():
	b = get_board()
	promote = 0.9
	remove = 0.4
	add = 0
	for piece in b:
		# randomly promote, remove, or add piece
		rand = np.random.random()
		if piece is not 0 and rand > promote:
			piece = piece * 3
			promote = promote + 0.005
		elif piece is not 0 and rand < remove:
			piece = 0
			remove = remove - 0.025
			add = add + 0.05
		elif piece is 0 and rand < add:
			if np.random.random() > 0.5:
				piece = 1
			else:
				piece = -1
	return b

  if piece is not 0 and rand > promote:
  elif piece is not 0 and rand < remove:
  elif piece is 0 and rand < add:


In [5]:
np_board()

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,
        0,  0,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])

In [216]:
start_board = expand(np_board())
boards_list = generate_next(start_board)
branching_position = 0
nmbr_generated_game = 10000


while len(boards_list) < nmbr_generated_game:
	temp = len(boards_list) - 1

	for i in range(branching_position, len(boards_list)):
		if (possible_moves(reverse(expand(boards_list[i]))) > 0):
				new_boards = generate_next(reverse(expand(boards_list[i])))
				boards_list = np.vstack((boards_list, new_boards))
	
	branching_position = temp
count = [0, 0]

np.random.shuffle(boards_list)

boards_list = boards_list[:nmbr_generated_game]

for i in range(nmbr_generated_game):
	temp = get_metrics(boards_list[i])
	needed = count.index(min(count))
	if needed == 0:
		if temp[0] > .5:
			boards_list[i] = compress(reverse(expand(boards_list[i])))
			temp = -temp
			temp[0] = 1 + temp[0]
		
		count[0] += 1
	
	else:
		if temp[0] <= .5:
			boards_list[i] = compress(reverse(expand(boards_list[i])))
			temp = -temp

			temp[0] = 1 + temp[0]
		
		count[1] += 1
		
np.random.shuffle(boards_list)


In [217]:
import pickle

with open('data.pickle', 'wb') as f:
	pickle.dump(boards_list, f)

In [218]:
import pickle

with open('data.pickle', 'rb') as f:
	boards_list = pickle.load(f)

In [219]:
metrics	= np.zeros((0, 10))
winning = np.zeros((0, 1))
for board in boards_list:
	temp = get_metrics(board)
	metrics = np.vstack((metrics, temp[1:]))
	winning = np.vstack((winning, temp[0]))

	# if temp[0] == -1:
		# winning = np.vstack((winning, [1, 0]))
	# else:
		# winning = np.vstack((winning, [0, 1]))

In [220]:
class Network(nn.Module):
	def __init__(self) -> None:
		super().__init__()

		self.hidden1 = nn.Linear(10, 4)

		self.hidden2 = nn.Linear(4, 4)

		self.output = nn.Linear(4, 1)

		self.ReLu = nn.ReLU()

		self.sigmoid = nn.Sigmoid()

		self.softmax = nn.Softmax()


	def forward(self, x):
		x = self.hidden1(x)
		x = self.ReLu(x)
		x = self.hidden2(x)
		x = self.ReLu(x)
		x = self.output(x)
		x = self.sigmoid(x)

		return x

In [221]:
model = Network()

In [222]:
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)


In [223]:
epochs = 32
batch_size = 64
log_interval = 100
train_losses = []
train_counter = []

In [224]:
def train(epoch):
	model.train()
	total_loss = 0
	correct = 0
	for batch_idx, (data, target) in enumerate(zip(metrics, winning)):
		optimizer.zero_grad()
		output = model(torch.Tensor(data))
		loss = criterion(output, torch.FloatTensor(target))
		loss.backward()
		optimizer.step()

		pred = output.tolist()
		res = target.tolist()
		correct += (pred[0] > .5) == (res[0] > .5)

		total_loss += loss

		if (batch_idx+1) % log_interval == 0:
			print(f'Train Epoch: {epoch} [{batch_idx+1}/{len(metrics)} ({100. * (batch_idx+1) / len(metrics):.0f}%)]\tLoss: {total_loss.item() / (batch_idx+1):.6}\tAccuracy: {correct}/{batch_idx+1} ({100.*correct/(batch_idx+1):.0f}%)')

			train_losses.append(loss.item())
			train_counter.append(
				(batch_idx*batch_size) + ((epoch-1)*len(metrics)))
	
			torch.save(model.state_dict(), 'results/model.pth')
			torch.save(optimizer.state_dict(), 'results/optimizer.pth')

In [225]:
for i in range(5):
	train(i)
# train(0)



In [226]:
model = Network()
model.load_state_dict(torch.load('results/model.pth'))

<All keys matched successfully>

In [233]:
def test():
	model.eval()
	test_loss = 0
	correct = 0
	with torch.no_grad():
		for data, target in zip(metrics, winning):
			output: torch.Tensor = model(torch.Tensor(data))
			test_loss += criterion(output, torch.FloatTensor(target))
			pred = output.tolist()
			res = target.tolist()

			# correct += pred.index(max(pred)) == res.index(max(res))
			correct += (pred[0] > .5) == (res[0] > .5)

	test_loss /= len(metrics)
	print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
	test_loss, correct, len(metrics),
	100. * correct / len(metrics)))

In [234]:
test()


Test set: Avg. loss: 0.2857, Accuracy: 9809/10000 (98%)



In [235]:
class Board_Network(nn.Module):
	def __init__(self) -> None:
		super().__init__()

		self.hidden1 = nn.Linear(32, 16)
		self.hidden2 = nn.Linear(16, 8)
		self.hidden3 = nn.Linear(8, 4)

		self.output = nn.Linear(4, 1)

		self.ReLu = nn.ReLU()

		self.sigmoid = nn.Sigmoid()


	def forward(self, x):
		x = self.hidden1(x)
		x = self.ReLu(x)
		x = self.hidden2(x)
		x = self.ReLu(x)
		x = self.hidden3(x)
		x = self.ReLu(x)
		x = self.output(x)
		x = self.sigmoid(x)

		return x

In [236]:
board_model = Board_Network()

In [237]:
criterion = nn.BCELoss()
optimizer = optim.SGD(board_model.parameters(), lr = 0.001)

In [238]:
data = boards_list
probabilistic: torch.Tensor = model(torch.Tensor(metrics))

probabilistic = probabilistic.detach().numpy()


In [239]:
def train(epoch):
	board_model.train()
	total_loss = 0
	correct = 0
	for batch_idx, (_data, target) in enumerate(zip(data, probabilistic)):
		optimizer.zero_grad()
		output = board_model(torch.Tensor(_data))
		loss = criterion(output, torch.Tensor(target))
		loss.backward()
		optimizer.step()

		pred = output.tolist()
		res = target.tolist()

		# correct += pred.index(max(pred)) == res.index(max(res))

		correct += (pred[0] > .5) == (res[0] > .5)

		total_loss += loss

		if (batch_idx+1) % log_interval == 0:
			print(f'Train Epoch: {epoch} [{batch_idx+1}/{len(data)} ({100. * (batch_idx+1) / len(data):.0f}%)]\tLoss: {total_loss.item() / (batch_idx+1):.6}\tAccuracy: {correct}/{batch_idx+1} ({100.*correct/(batch_idx+1):.0f}%)')

			train_losses.append(loss.item())
			train_counter.append(
				(batch_idx*batch_size) + ((epoch-1)*len(data)))
			torch.save(board_model.state_dict(), 'results/board_model.pth')
			torch.save(optimizer.state_dict(), 'results/board_optimizer.pth')

In [242]:
for i in range(epochs):
	train(i)



In [243]:
import timeit

def sec_to_time(sec):
	sec = int(sec)
	h = sec//3600
	sec%=3600
	m = sec//60
	sec%=60

	return f'{h:02}:{m:02}:{sec:02}'


In [245]:
reinforced_model = Board_Network()
reinforced_model.load_state_dict(torch.load('results/board_model.pth'))

<All keys matched successfully>

In [246]:
criterion = nn.MSELoss()
optimizer = optim.Adadelta(reinforced_model.parameters(), lr = 0.001)

In [247]:
def train(epoch, data, results):
	board_model.train()
	total_loss = 0
	correct = 0
	for batch_idx, (_data, target) in enumerate(zip(data, results)):
		optimizer.zero_grad()
		output = board_model(torch.Tensor(_data))
		loss = criterion(output, torch.Tensor(target))
		loss.backward()
		optimizer.step()

		pred = output.tolist()
		res = target.tolist()

		# correct += pred.index(max(pred)) == res.index(max(res))

		correct += (pred[0] > .5) == (res[0] > .5)

		total_loss += loss


In [248]:
data = np.zeros((1, 32))
labels = np.zeros(1)
win = lose = draw = 0
winrates = []
learning_rate = 0.01
discount_factor = 0.95

runtimes = []

for gen in range(0, 500):
	starttime = timeit.default_timer()
	for game in range(0, 200):
		temp_data = np.zeros((1, 32))
		board = expand(np_board())
		player = np.sign(np.random.random() - 0.5)
		turn = 0
		while (True):
			moved = False
			boards = np.zeros((0, 32))
			if (player == 1):
				boards = generate_next(board)
			else:
				boards = generate_next(reverse(board))

			scores = reinforced_model(torch.Tensor(boards))
			scores = scores.detach().numpy()

			max_index = np.argmax(scores)
			best = boards[max_index]

			if (player == 1):
				board = expand(best)
				temp_data = np.vstack((temp_data, compress(board)))
			else:
				board = reverse(expand(best))

			player = -player

			# punish losing games, reward winners  & drawish games reaching more than 200 turns
			winner = game_winner(board)
			if (winner == 1 or (winner == 0 and turn >= 200) ):
				if winner == 1:
					win = win + 1
					reward = 10

				else:
					draw = draw + 1

					score = get_metrics(compress(board)[0])[0]
					if score >= 0.5:
						reward = 5
					else:
						reward = -5

				old_prediction = reinforced_model(torch.Tensor(temp_data[1:]))
				old_prediction = old_prediction.detach().numpy()
				optimal_futur_value = np.ones(old_prediction.shape)
				temp_labels = old_prediction + learning_rate * (reward + discount_factor * optimal_futur_value - old_prediction )
				data = np.vstack((data, temp_data[1:]))
				labels = np.vstack((labels, temp_labels))
				break

			elif (winner == -1):
				lose = lose + 1
				reward = -10
				old_prediction = reinforced_model(torch.Tensor(temp_data[1:]))
				old_prediction = old_prediction.detach().numpy()
				optimal_futur_value = -1*np.ones(old_prediction.shape)
				temp_labels = old_prediction + learning_rate * (reward + discount_factor * optimal_futur_value - old_prediction )
				data = np.vstack((data, temp_data[1:]))
				labels = np.vstack((labels, temp_labels))
				break
			
			turn = turn + 1

		if ((game+1) % 200 == 0):
			for i in range(16):
				train(i, data[1:], labels[1:])
				
			data = np.zeros((1, 32))
			labels = np.zeros(1)

	endtime = timeit.default_timer()
	runtime = endtime - starttime

	runtimes.append(runtime)


	winrate = int((win)/(win+draw+lose)*100)

	drawrate = int((draw)/(win+draw+lose)*100)

	meantime = sum(runtimes) / len(runtimes)

	print(f'Generation: {gen+1} [{gen+1}/500] \tWinrate: {winrate}%\tDrawrate: {drawrate}%\ttime: {sec_to_time(runtime)}, estimate: {sec_to_time((500-gen-1)*meantime)}')


	winrates.append(winrate+drawrate)
	
	torch.save(reinforced_model.state_dict(), 'results/reinforced_model.pth')
	# reinforced_model.save_weights('reinforced_model.h5')
 
print('Checkers Board Model updated by reinforcement learning & saved to: reinforced_model.pth')

Generation: 1 [1/500] 	Winrate: 0%	Drawrate: 100%	time: 00:02:31, estimate: 21:03:56
Generation: 2 [2/500] 	Winrate: 0%	Drawrate: 100%	time: 00:02:39, estimate: 21:31:20
Generation: 3 [3/500] 	Winrate: 0%	Drawrate: 100%	time: 00:02:38, estimate: 21:36:12
Generation: 4 [4/500] 	Winrate: 0%	Drawrate: 100%	time: 00:02:37, estimate: 21:35:41
Generation: 5 [5/500] 	Winrate: 0%	Drawrate: 100%	time: 00:02:52, estimate: 21:59:48


KeyboardInterrupt: 

In [172]:
def best_move(board):
  compressed_board = compress(board)
  boards = np.zeros((0, 32))
  boards = generate_next(board)
  scores = reinforced_model(torch.Tensor(boards))
  scores = scores.detach().numpy()
  max_index = np.argmax(scores)
  best = boards[max_index]
  return best

def print_board(board):
  for row in board:
    for square in row:
      if square == 1:
        caracter = "|o"
      elif square == -1:
        caracter = "|x"
      elif square == -3:
        caracter = "|X"
      elif square == 3:
        caracter = "|O"
      else:
        caracter = "| "
      print(str(caracter), end='')
    print('|')

In [206]:
start_board = np.array([[ 0,  3,  0,  3,  0,  0,  0,  0],
						[ 1,  0,  0,  0,  0,  0,  0,  0],
						[ 0,  0,  0,  0,  0,  0,  0, -1],
						[ 0,  0,  0,  0,  0,  0,  0,  0],
						[ 0,  0,  0,  0,  0,  0,  0, -1],
						[ 0,  0,  0,  0,  0,  0,  0,  0],
						[ 0, -3,  0,  0,  0, -3,  0,  0],
						[ 0,  0,  0,  0,  0,  0,  0,  0]])

start_board = - start_board


start_board = reverse(start_board)

next_board = expand(best_move(start_board))

print("Starting position : ")
print_board(reverse(start_board))

print("\nBest next move : ")
print_board(reverse(next_board))

Starting position : 
| |X| |X| | | | |
|x| | | | | | | |
| | | | | | | |o|
| | | | | | | | |
| | | | | | | |o|
| | | | | | | | |
| |O| | | |O| | |
| | | | | | | | |

Best next move : 
| |X| | | | | | |
|x| |X| | | | | |
| | | | | | | |o|
| | | | | | | | |
| | | | | | | |o|
| | | | | | | | |
| |O| | | |O| | |
| | | | | | | | |
