# Reinforcement Learning

In [None]:
import chess
import numpy as np
from IPython.display import clear_output
from chessbot.chessbot import ChessBot

def play_game_and_learn(white_model, black_model, exploration_rate=0.0, should_visualise=False):
	white = ChessBot(white_model, exploration_rate)
	black = ChessBot(black_model, exploration_rate)

	board = chess.Board()

	if should_visualise:
		display(board)

	while not board.is_game_over():
		board.push(black.move(board) if board.turn == chess.BLACK else white.move(board))

		if should_visualise:
			clear_output(wait=True)
			display(board)

	result = board.result()
	if result == '1-0':
		print('has partially fit')
		black_model.partial_fit(black.moves_made, np.zeros(len(black.moves_made)))
		white_model.partial_fit(white.moves_made, np.ones(len(white.moves_made)))
	elif result == '0-1':
		print('has partially fit')
		black_model.partial_fit(black.moves_made, np.ones(len(black.moves_made)))
		white_model.partial_fit(white.moves_made, np.zeros(len(white.moves_made)))

	return result

In [None]:
import numpy as np
from sklearn.neural_network import MLPRegressor

num_games = 100
exploration_rates = np.linspace(1, 0, num_games)**4

white_model = MLPRegressor()
black_model = MLPRegressor()

results = []
for i in range(num_games):
	result = play_game_and_learn(white_model, black_model, exploration_rate=exploration_rates[i])
	results.append(result)

for result in ['1-0', '0-1', '1/2-1/2']:
	print(f'{result}: {results.count(result)}')