In [None]:
from utility_for_network import *
from utility_for_boardmovement import *
from utility_for_reinforcement import *
import copy

The following Python code defines a function `L_layer_model` that implements an L-layer neural network for a chess game. The function takes a DataFrame (`df`) containing information about the chess games, pretrained parameters (`pretrained_prems`), layer dimensions (`layers_dims`), learning rate (`learning_rate`), number of iterations (`num_iterations`), and a flag to print the cost (`print_cost`).

In [None]:
def L_layer_model(df,pretrained_prems, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):#lr was 0.009
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.
    
    Arguments:
    X -- data, numpy array of shape (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(1)
    costs = []                         # keep track of cost
    
    # Parameters initialization. (≈ 1 line of code)
    ### START CODE HERE ###
    if pretrained_prems==[]:
       parameters = initialize_parameters_deep(layers_dims)
    else:
         parameters = pretrained_prems
          
    ### END CODE HERE ###
    startingposition = [[5,3,4,9,10,4,3,5],
         [1,1,1,1,1,1,1,1],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [-1,-1,-1,-1,-1,-1,-1,-1],
         [-5,-3,-4,-9,-10,-4,-3,-5]

         
         ]
   
    for index, row in df.iterrows():
        board=copy.deepcopy(startingposition)
        
       
        moves = row['moves']
        print(moves)
       
       
        all=allboards(board,moves,True)
        #board=startingposition
        #print(all)
        
        result = row['result']

        

        for b in all:
        
         X= b
         Y=result
      
    # Loop (gradient descent)
         for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
        ### START CODE HERE ### (≈ 1 line of code)
            AL, caches = L_model_forward(X, parameters)
        ### END CODE HERE ###
        
        # Compute cost.
        ### START CODE HERE ### (≈ 1 line of code)
            cost = compute_cost(AL, Y)
        ### END CODE HERE ###
    
        # Backward propagation.
        ### START CODE HERE ### (≈ 1 line of code)
            grads = L_model_backward(AL, Y, caches)
        ### END CODE HERE ###
 
        # Update parameters.
        ### START CODE HERE ### (≈ 1 line of code)
            parameters = update_parameters(parameters, grads, learning_rate)
        ### END CODE HERE ###
                
        # Print the cost every 100 training example
            
            print ("Cost after iteration %i: %f" %(i, cost))
            
            costs.append(cost)
            # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate for game 1 =" + str(learning_rate))
    plt.show()   
            
    
    
    return parameters


# Neural Network Training using L_layer_model

The following Python code uses the `L_layer_model` function to train a neural network on a DataFrame (`df`) loaded from a CSV file named 'games.csv'.

In [None]:
import pandas as pd
df =pd.read_csv('games.csv')
parameters= L_layer_model(df,[], [64,100,50,30,20,5,1], learning_rate = 0.085, num_iterations = 1, print_cost=True)

In [None]:
df =pd.read_csv('games2.csv')
parameters= L_layer_model(df,parameters, [64,100,50,30,20,5,1], learning_rate = 0.085, num_iterations = 1, print_cost=True)

In [None]:
df =pd.read_csv('games3.csv')
parameters= L_layer_model(df,parameters, [64,100,50,30,20,5,1], learning_rate = 0.085, num_iterations = 1, print_cost=True)

## Function Signature:

```python
def thegame(startingposition, toggle, parameters):
    """
    Simulates a chess game.

    Arguments:
    - startingposition: Initial chessboard configuration.
    - toggle: Toggle to determine the player's turn (True for white, False for black).
    - parameters: Pre-trained neural network parameters.

    Returns:
    - history: List containing the history of chessboard configurations during the game.
    """

In [None]:
def thegame(startingposition,toggle,parameters):
    history=[]
    board=copy.deepcopy(startingposition)
    history.append(board)
    movecount=0
    iswhitekingmoved=False
    isblackkingmoved=False
    lastsix=[]
    lastsix.append(startingposition)
   
    moves = calculate_all_moves(board)
    
    if toggle:
       for row in range(8):
        for col in range(8):
            if board[row][col] == 10:  # Black king is represented by -10
                king_position = (row, col)
                break
       
       
       moves=filter_moves(board,moves,  toggle,king_position)
       moves=specialmoves(board,moves,isblackkingmoved,toggle,lastsix)
       white,black=dividebothmoves(moves)
    else:
          for row in range(8):
            for col in range(8):
              if board[row][col] == -10:  # Black king is represented by -10
                king_position = (row, col)
                break
          moves=specialmoves(board,moves,isblackkingmoved,toggle,lastsix)
          moves=filter_moves(board,moves,  toggle,king_position) 
          white,black=dividebothmoves(moves) 
   
    while(movecount<300  and( cheakmate(board,toggle,moves,white,black) and isdraw(board,toggle,white,black,lastsix))):
       if toggle:
          evaluation={}
          for moves in white:
             board_copy = [row[:] for row in board]
             board_copy= performMove(moves,board_copy)
             
             ans,_=L_model_forward(np.reshape(board_copy, (64, 1)),parameters)
             evaluation[moves]=ans
          max_keys = [key for key, value in evaluation.items() if value == max(evaluation.values())]  
          #if len(max_keys)>1:
          bestmove=max_keys[0]
          #elif max_keys==(((10,0,4,0,6),(5,0,7,0,5))) or max_keys== ((10,0,4,0,2),(5,0,0,0,3)) : 
          #     bestmove=max_keys[0]
                
         # else:
          #   bestmove=max_keys[0]
             
          #print(bestmove)     
          board=performMove(bestmove,board)
          movecount=movecount+1
          history.append(copy.deepcopy(board))
          
          
          lastsix.append(copy.deepcopy(board))
          
          if len(lastsix)>6:
              lastsix.pop(0)
          if bestmove[0]==10 or len(bestmove)==2 and bestmove[0][0]==10:
              iswhitekingmoved=True
          toggle=not toggle
          for row in range(8):
            for col in range(8):
                   if board[row][col] == -10:  # Black king is represented by -10
                     king_position = (row, col)
                     break
          moves = calculate_all_moves(board)  
                
          
          moves=filter_moves(board,moves,toggle,king_position)
          
          moves=specialmoves(board,moves,isblackkingmoved,toggle,lastsix) 
         
          white,black=dividebothmoves(moves) 
         
       else:  
           evaluation={}
           for moves in black:
             board_copy = [row[:] for row in board]
             board_copy= performMove(moves,board_copy)
             
             ans,_=L_model_forward(np.reshape(board_copy, (64, 1)),parameters)
             evaluation[moves]=ans
           min_keys = [key for key, value in evaluation.items() if value == min(evaluation.values())]  
          
           bestmove=min_keys[0]
           
             
               
           board=performMove(bestmove,board)
          # print(board)
           lastsix.append(copy.deepcopy(board))
           
           history.append(copy.deepcopy(board))
           if len(lastsix)>6:
              lastsix.pop(0)
           if bestmove[0]==-10 or len(bestmove)==2 and bestmove[0][0]==-10:
              isblackkingmoved=True
           toggle=not toggle
           for row in range(8):
             for col in range(8):
                   if board[row][col] == 10:  # Black king is represented by -10
                     king_position = (row, col)
                     break
           moves = calculate_all_moves(board)
           moves=filter_moves(board,moves,  toggle,king_position)
           moves=specialmoves(board,moves,iswhitekingmoved,toggle,lastsix) 
           white,black=dividebothmoves(moves) 
    return history


# Reinforcement Learning Self-Play Chess Game

The following Python code defines a function `selfplay` that conducts self-play chess games using a neural network in the context of reinforcement learning. This function is part of a larger reinforcement learning system designed for training a neural network to play chess.

## Function Signature:
Starting Position:

The function initializes the chessboard with a standard starting position.
Results List:

A list results is created to store the outcomes of each game (win, loss, or draw).
Main Loop for Iterations:

The function iteratively plays chess games for the specified number of iterations, simulating the reinforcement learning process.
Game Simulation:

Each game is played using the thegame function, starting with the provided neural network parameters.
Outcome Determination:

The outcome of each game is determined based on whether it's a win, loss, or draw. This outcome is crucial for reinforcement learning as it guides the model's learning process.
Updating Neural Network Parameters:

The neural network parameters are updated using the update function, which utilizes the outcomes as rewards. This step is fundamental in reinforcement learning as it adjusts the model's parameters to improve its performance over time.
Print Results:

The function prints the outcome of each game and the overall statistics, providing insights into the learning progress of the neural network

In [None]:
def selfplay(num_iteration,parameters):
    startingposition=[[5,3,4,9,10,4,3,5],
         [1,1,1,1,1,1,1,1],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [0,0,0,0,0,0,0,0],
         [-1,-1,-1,-1,-1,-1,-1,-1],
         [-5,-3,-4,-9,-10,-4,-3,-5]

         
         ]
    results=[]
    for i in range(0,num_iteration):
        X=thegame(startingposition,True,parameters)

        lastposition=X[-1]
        for row in range(8):
            for col in range(8):
                   if lastposition[row][col] == 10:  # Black king is represented by -10
                     white_king_position = (row, col)
                     break
        
        moves=calculate_all_moves(lastposition)
        moves=filter_moves(lastposition,moves,True,white_king_position)
        white,black=dividebothmoves(moves)
        if  not cheakmate(lastposition,True,moves,white,black):
            Y=1
        elif  not isdraw(lastposition,True,white,black,[[]]):    
            Y==0.5
        else:    
         for row in range(8):
            for col in range(8):
                   if lastposition[row][col] == -10:  # Black king is represented by -10
                     black_king_position = (row, col)
                     break    
         moves=calculate_all_moves(lastposition)
         moves=filter_moves(lastposition,moves,False,black_king_position)
         white,black=dividebothmoves(moves)
         if  not cheakmate(lastposition,False,moves,white,black):
            Y=0
         elif  not isdraw(lastposition,False,white,black,[[]]):    
            Y==0.5    
         else:
            pieces = 0

            for row in lastposition:
                for piece in row:
                   if piece != 0:
                     pieces+=piece
            if  pieces>5:
                 Y=1
            elif pieces<-5: 
                Y=0 
            else:
                Y=0.5    

            
        results.append(Y)
        print(Y)  
        parameters=update(X,Y,parameters,learning_rate=0.75,num_iterations=30,print_cost=False)  
    draws=0
    Ww =0
    bw =0  
    for a in results:
        if a==1:
            Ww+=1
        elif  a==0: 
            bw+=1
        else:
            draws+=1
    print("white wins %i",(Ww))               
    print("black wins%i",(bw))
    print("draws%i",(draws))

In [None]:
selfplay(5,parameters)