In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from QuantumChessGame import * 
from ChessPuzzles import *
from GameToTensor import *
from ChessPuzzles import chess_puzzles

from MCTS import MCTS_Node

import numpy as np
import pandas as pd 

import QChessNN
import MCTS_NN

In [2]:
torch.manual_seed(42)

#Declare a new model
NNmodel = QChessNN.QChessNN()



# Load the model
#NNmodel = QChessNN.QChessNN()
#NNmodel = torch.load('testExport.pth')

In [3]:
import pandas as pd
#import mathplotlib.pyplot as plt
#%mathplotlib inline

In [4]:
game = QuantumChessGame()
game.new_game()

gameData = game.get_game_data()
game_tensor = torch.zeros(1,12,8,8)

game_tensor[0] = gameToTensor(gameData, 0)
#print(game_tensor)
y = torch.zeros(12)



In [5]:
class MCTS_AI:
    def __init__(self):
        return

    def find_best_move(self, game, simVar):
        root = MCTS_Node(game)
        gamedata = game.get_game_data()
        bestmove = root.best_action(gamedata.ply, simVar)
        return bestmove

In [6]:
class NetworkMCTS():
    def __init__(self):
            return

    
    def find_best_move(self, game, model, simVar):
        root = MCTS_NN.MCTS_Node(game, model)
        gamedata = game.get_game_data()
        bestmove = root.best_action(gamedata.ply, model, simVar)
        value = root.getConcreteNodeValue()
        return bestmove, value


In [7]:
mcts_nn = NetworkMCTS()

MCTSAI =  MCTS_AI()

In [8]:

def self_play_game(model, moveMax):
    board_data_B = []
    board_data_W = []
    game = QuantumChessGame()
    game.new_game({'initial_state_fen':get_puzzle_fen(34),  'max_split_moves':[0,1]});
    movecode = 0;
    while not game.is_game_over():
        gamedata = game.get_game_data()
        
        
        best_move, value = mcts_nn.find_best_move(game, model, 10)

        #best_move = MCTSAI.find_best_move(game, 15)
        print(f"player # {gamedata.ply}")
        print(f"move taken {best_move}")
        print("found best move")

        
        # Record the state, policy, and value
        if (gamedata.ply % 2 == 0):
            board_data_W.append(gamedata)

        if (gamedata.ply % 2 == 1):
            board_data_B.append(gamedata)

        if (gamedata.ply == moveMax):
            return board_data_B, 0
        
        # Apply the move to the board
        board_state, movecode = game.do_move(best_move)
        game.print_board_and_probabilities()
        
    if(movecode == 2):
        return board_data_W, 1  # Return +1 for  white win, 0 for draw, -1 for black win

    if(movecode == 3):
        return board_data_W, -1  # Return +1 for  white win, 0 for draw, -1 for black win

    if(movecode == 5):
        return board_data_W, 0  # Return +1 for  white win, 0 for draw, -1 for black win

In [9]:

# Training loop

game_tensor = torch.zeros(1,12,8,8)
for epoch in range(10):
    print(f"starting epoch {epoch + 1}")
    for game in range(2):
        #print(game)
        board_data, result = self_play_game(NNmodel, 10)  # Play a game
        
       
        
        print(f"game {game + 1} finished")
        # Train the model on the collected game data
        #print(board_data)

optimizer = torch.optim.Adam(NNmodel.parameters(), lr=0.001)
rIndex = 0
gIndex = 0
all_values = []
for result in all_results:
    for state in board_data:
        optimizer.zero_grad()
        #print(value)
    
        game_tensor[0] = gameToTensor(state, 0)
        
        predicted_value = NNmodel(game_tensor)
        
        true_value = torch.tensor([[all_results[rIndex]]], dtype=torch.float32)
        
        # Forward pass
        value_loss = F.mse_loss(predicted_value, true_value)
        # Backpropagate and optimize
        value_loss.backward()
        optimizer.step()
        # Compute loss (value loss + policy loss)
        gIndex = gIndex + 1 
    rRndex = rIndex + 1


torch.save(NNmodel, 'testExport.pth')





starting epoch 1
player # 1
move taken a8a7
found best move
 +-------------------------------------------------+
8|   .     .     .     .     .     .     .     .   |
7| 100:k 100:P   .     .     .     .     .     .   |
6| 100:P   .   100:N   .     .     .     .     .   |
5|   .     .     .     .     .     .     .     .   |
4|   .     .     .     .     .     .     .     .   |
3|   .     .     .     .     .     .     .     .   |
2|   .     .     .     .     .     .     .   100:K |
1|   .     .     .     .     .     .     .     .   |
 +-------------------------------------------------+
     a     b     c     d     e     f     g     h
player # 2
move taken b7b8N
found best move
 +-------------------------------------------------+
8|   .   100:N   .     .     .     .     .     .   |
7| 100:k   .     .     .     .     .     .     .   |
6| 100:P   .   100:N   .     .     .     .     .   |
5|   .     .     .     .     .     .     .     .   |
4|   .     .     .     .     .     .     .     .   |

KeyboardInterrupt: 

In [None]:
game = QuantumChessGame()
game.new_game({'initial_state_fen':get_puzzle_fen(33),  'max_split_moves':[0,1]});
game_tensor[0] = gameToTensor(game.get_game_data(), 0)

output = NNmodel(game_tensor)
print(output)

In [None]:
epochs = 100
losses = []

for i in range(epochs):
    y_pred = model.forward(X_train)

    loss = criterion(y_pred, y_train)

    losses.append(loss.detach().numpy())

    if i % 10 == 0:
        print(f'Epoch: {i} and loss: {loss}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
plt.plot(range(epochs), losses)
plt.ylabel("loss/error")
plt.xlabel('epoch')