```python
Math 579 Project Documentation

Date: 2025-04-21  
Name: Samisoni Palu  
Instructor: Dr. Sun
```

In [None]:
import chess
from chess import pgn
import json
import torch
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F

```pyton
## Utils
Board data
---
`generate_uci_move_list`
- list all possible moves as a 2-tuple of from_square, to_square
- tweaked for promotion move counts
- will Include Redundant or Invalid Moves
    - **`a1a1`, `d4d4`**, etc. → no actual move is made
    - **Illegal under any real game condition**
    - Also includes nonsense like `h2h8` (rook-style moves for pawns)

But remember:  
> You’re not saying “these are all valid moves”  
> **this is the full move *vocabulary***—the possible *labels* in a classification task.
> That is, our **vocabulary** list is all the two-tuple pairings of squares, e.g. (`h2g3`), along with other special moves

Our total vocabulary size count includes the sum of
- 64x64 (each pairing of squares)
- 8 (number of columns) x 2 (white-black promotions) x 4 (choices of upgrade) x 3 (capture types) - 16 (edge cases)

We should expect our logits vector to have size **4272**. 

**Benefits of the chosen Vocabulary**

1. Simplicity in Output Shape
    - 1-to-1 mapping: index ↔ UCI
    - You don’t need dynamic output heads or custom decoders
    - You can store logits as `torch.tensor([4672])` and just mask out illegal ones at runtime

2. Consistency
    - Your label space is fixed across:
      - Training
      - Inference
      - Evaluation

3. Non-moves Never Get Trained On
    - No master ever plays `a1a1`
    - So those output indices **never get gradient updates**
    - They just sit in the model—harmless dead neurons

**Why You Might Remove Redundant Moves**

1. Smaller Output Space
    - Saves compute on final linear layer and softmax
    - Slightly faster training (maybe)

2. Model Capacity Allocation
    - You force the network to **only ever consider valid move templates**
    - Could lead to sharper learning curve

But you pay with **more complexity**:
- Dynamic move indexing
- Pre-mask needs to align with training mask
- Harder debugging
```


In [None]:
def generate_uci_move_list():
    all_moves = set()
    for from_sq in chess.SQUARES:
        for to_sq in chess.SQUARES:
            move = chess.Move(from_sq, to_sq)
            all_moves.add(move.uci())
            # Add promotions
            for promo in [chess.QUEEN, chess.ROOK, chess.BISHOP, chess.KNIGHT]:
                from_rank = chess.square_rank(from_sq)
                to_rank = chess.square_rank(to_sq)
                from_file = chess.square_file(from_sq)
                to_file = chess.square_file(to_sq)
                # Only allow forward promotion (white or black)
                if (from_rank , to_rank) in [(6, 7), (1, 0)]:  # white/black promotion ranks
                    if abs(from_file - to_file) <= 1:         # straight or diagonal
                        promo_move = chess.Move(from_sq, to_sq, promotion=promo)
                        all_moves.add(promo_move.uci())
    return sorted(all_moves)

def save_move_index_map(path="data/move_index_map.json"):
    moves = generate_uci_move_list()
    uci_to_index = {uci: i for i, uci in enumerate(moves)}
    with open(path, "w") as f:
        json.dump(uci_to_index, f)

In [None]:
move_list = generate_uci_move_list()
print('Logits size = ',len(move_list))

In [None]:
# board state gets entries from cuhh
PIECE_TO_IDX = {
    None: 0,
    chess.PAWN: 1,
    chess.KNIGHT: 2,
    chess.BISHOP: 3,
    chess.ROOK: 4,
    chess.QUEEN: 5,
    chess.KING: 6,
}

# cuhh make the board go mathematical
def encode_board(board: chess.Board):
    board_array = np.zeros((8, 8), dtype=np.int64)
    
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        row = 7 - (square // 8)
        col = square % 8

        if piece is not None:
            base = PIECE_TO_IDX[piece.piece_type]
            offset = 0 if piece.color == chess.WHITE else 6
            board_array[row][col] = base + offset
        else:
            board_array[row][col] = 0  # empty

    return board_array  # shape: [8,8] of ints in [0,12]


In [None]:
# Here it go right here
board = chess.Board()
encode_board(board)

In [None]:
# this just a mlp frfr
class PolicyNet(nn.Module):
    def __init__(self, embedding_dim=32, num_moves=4272):
        super().__init__()
        self.embed = nn.Embedding(13, embedding_dim)  # 13 tokens -> vector
        self.fc = nn.Sequential(
            nn.Flatten(),                # [8,8,32] -> [2048]
            nn.Linear(8*8*embedding_dim, 512),
            nn.Tanh(),
            nn.Linear(512, num_moves)   # Final logits
        )


    def forward(self, x):  # x: [B, 8, 8]
        x = self.embed(x)  # [B, 8, 8, D]
        return self.fc(x)


In [None]:
mlp = PolicyNet(embedding_dim=2)

### What this do frfr ##


We want a neural network that takes:
- Input: an `8×8` grid of piece tokens (entries are integers from 0 to 12)
- Output: a **4272-dimensional logits vector**

---

### CLASS STRUCTURE: `PolicyNet`

```python
class PolicyNet(nn.Module):
    def __init__(self, embedding_dim=32, num_moves=4272):
```

- **`embedding_dim=32`**: each board square will be represented by a **32-dimensional vector**.
- **`num_moves=4272`**: the size of **output layer**, corresponding to all possible moves.

--- 

```python
        super().__init__()
```

- Standard for initializing the parent class (`nn.Module`).

---

### Embedding Layer

```python
        self.embed = nn.Embedding(13, embedding_dim)
```

- This layer turns each square’s integer (0–12) into a vector of dimension `[embedding_dim]`.
- `[8,8]` board → `[8,8,32]` tensor.

---

### Fully Connected Network (MLP)
These are our hidden and output logits layer. 
```python
        self.fc = nn.Sequential(
            nn.Flatten(),                
            nn.Linear(8*8*embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, num_moves)
        )
```

#### `nn.Flatten()`
- Converts `[B, 8, 8, 32]` into `[B, 2048]`, similar to .view
- Needed to feed into `Linear` layers

#### `nn.Linear(2048, 512)`
- Fully connected layer reducing 2048 features to 512 neurons

#### `nn.ReLU()`
- Non-linear activation to let the model learn more complex patterns

#### `nn.Linear(512, num_moves)`
- Final layer: predicts logit value for each of the 4672 moves

---

### `forward` Function

```python
    def forward(self, x):  # x: [B, 8, 8]
        x = self.embed(x)  # [B, 8, 8, D]
        x = x.permute(0, 3, 1, 2)  # Optional: [B, D, 8, 8]
        return self.fc(x)
```

- **Input**: `x` is a batch of boards, shape `[batch_size, 8, 8]`
- **Embedding**: turns each square into a vector: `[B, 8, 8, 32]`
- **FC Network**: outputs a `[B, 4272]` tensor of logits

---

## Summary

- network **understands piece identity** through embeddings.
- it **flattens** the board to make a prediction using fully connected layers.
- The output is a **score for every possible move**, and later **mask illegal ones**.

## DATA SETS

Documentation *X*:**INPUT** and *Y*:**TARGET** 

*X* is a **board state**.

> **Board state:**  
> - *8×8 matrix*  
> - Entries in `[0, 12]`  
> - Entries map to **piece type**  

*Y* is a list of **move indices**.

>**Move Indices (`logits`):**
> - vector in $\mathbb{R}^{4272}$
> - entries map to a **move**

A reminder of how moves are defined follows. 

>**Move:**
> - pair of grid values (Why?)
> - e.g. *d3d2*

Recall that the policy net (MLP) outputs a **softmaxed** list of (logits). Training occurs via `cross_entropy`. We read move data from text file in **PGN form**. X lists all the board states of a given game in the order it was played. Y lists all the moves made from a given board state. Let $S_i$ be the $i$ -th board state of the given game, and let $a_i$ be the action (move) made from this state. Then, we may visualize $X$ and $Y$ as the following:

$$\text{Input:}\ S_1,\ S_2,\ ...,\ S_n$$

$$\text{Output:}\ a_1,\ a_2,\ ...,\ a_n$$



In [None]:
in_data = 'C:\Users\samip\Documents\quick-maffs\neural_nets\makemorechessmoves\data\raw_games\Carlsen.pgn'

def load_dataset():
    with open(in_data, "r") as file:
        game = pgn.read_game(file)
        board = game.board()
        X, Y = [], []

        for move in game.mainline_moves():
           board_state = encode_board(board)  # BEFORE the move
           try:
               move_index = move_list.index(move.uci()) # move is INDEXED here, move list starts from first move
               X.append(board_state) # first board state is neutral board, it must be appended to have good beginning game
               Y.append(move_index)
               board.push(move)  # Move AFTER data capture
           except ValueError:
               print(f"Move {move.uci()} not found in move_list. Skipping this move.")
        print(move)
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X,Y
#what happens if we remove winning move (movelist[-1]), will the model still choose victory? >:P

In [None]:
X,Y = load_dataset(in_data)
n1 = int(0.8*len(X))
n2 = int(0.9*len(X))

# shuffled indices
newInd = torch.randint(low=0,high=Y.shape[0],size = (X.shape[0],))

# shuffle X and Y to match
X = X[newInd]
Y = Y[newInd]

Xtr, Ytr = X[:n2],Y[:n2]
Xdev, Ydev = X[n2:],Y[n2:]

**A Note on Initialization**

> We must initialize our layers carefully in order to optimize training speed in the gradient step. 

Consider $tanh$ activation for instance: 


In [None]:
mlp = PolicyNet(embedding_dim=2)
g = torch.Generator().manual_seed(2147483647)

In [None]:
# Extract weights from the first linear layer
weights = mlp.fc[1].weight.detach().numpy()

# Plot the weights as a heatmap
plt.figure(figsize=(10, 8))
plt.imshow(weights, aspect='auto', cmap='viridis')
plt.colorbar()
plt.title("Weights of the First Linear Layer")
plt.xlabel("Input Features")
plt.ylabel("Output Neurons")
plt.show()

print(weights)

In [None]:
plt.plot(np.linspace(-10,10,100), np.tanh(np.linspace(-10,10,100)))
plt.show()
c

Note that gradient values for large $x$ of `tanh` are close to 0. In essence, neurons may be killed off too early with a sloppy initialization. 

Due to this fact, you might consider `ReLu` for its beneficial properties, including: 

- no upper bound
- no vanishing for positive inputs
- easier initialization

allowing for faster training and increased stability. Our policy net (MLP) already initializes efficiently using layers defined by the `nn` package, but the point is clear, 

> We should fix our initializations depending on what activation function is used. 

This is useful later when we compare different bot models in competition. 

**Training Step**

We start with a very minimal training loop.
- static learning rate
- no optimizers
- no batch normalizations (in fact, we won't need it)

In [None]:
count_step = 10
lossi = []
lr = 0.1
batch_size = 16
for i in range(1000):
    batch = torch.randint(low=0,high=Y.shape[0], size=(batch_size,))
    logits = mlp(X[batch])
    loss = F.cross_entropy(logits, Y[batch])
    for p in mlp.parameters():
        p.grad = None
    loss.backward()
    for p in mlp.parameters():
        p.data += -lr*p.grad
    if i%count_step==0:
        print(f'Step[{i}]: Current loss value = {loss}')
    lossi.append(loss.item())


In [None]:

with torch.no_grad():
    logits = mlp(X[:16])            # [batch_size, 4672]
    logits=logits[1]                # Take one board, shape [4672]

    probs = F.softmax(logits, dim=0)    # Turn logits into probabilities

    move_index = torch.multinomial(probs, num_samples=1).item()

# flow 
# start: white, update board state, I make move, update board state, 

## Tests

### Independents
**Optimizers**
**LRs**
**Activation Functions**
**vs Stockfish**
**Model Dimensions**
- layer no
- layer size
**embed vs one hot**
**embedding dimensions**
**batch size in training**
**argmax vs softmax**
**different training data**


### Metrics
**Win Rate**
**Training Speed**
**Stability**

### To do 
- embedding visualization

### Functionality

With our first test, we should gauge how well the base model plays. We first define a move selecting function...

In [None]:
def select_move_softmax(model, board, move_index_to_uci, uci_to_index, temperature=1.0):
    model.eval()
    with torch.no_grad():
        # Encode the board
        board_tensor = encode_board(board)
        board_tensor = torch.tensor(board_tensor, dtype=torch.long).unsqueeze(0)  # Add batch dim

        # Forward pass
        logits = model(board_tensor)[0]  # remove batch dim, shape [4272]
        
        # Mask illegal moves
        legal_moves = list(board.legal_moves)
        legal_move_indices = []
        for move in legal_moves:
            uci = move.uci()
            if uci in uci_to_index:
                legal_move_indices.append(uci_to_index[uci])

        mask = torch.zeros_like(logits)
        mask[legal_move_indices] = 1

        # mask
        logits = logits + (mask - 1) * 1e9  # large negative for illegal moves

        # Apply temperature scaling
        logits = logits / temperature

        # Softmax
        probs = F.softmax(logits, dim=0)

        # Sample
        move_index = torch.multinomial(probs, num_samples=1).item()
        
        move_uci = move_index_to_uci[move_index]
        return chess.Move.from_uci(move_uci)



In [None]:
index_to_uci = {index:move for index,move in enumerate(move_list)}
uci_to_index = {move:index for index,move in enumerate(move_list)}

... as well as a game loop. 

In [None]:
def model_vs_itself(): 
    game = []
    board = chess.Board()
    while not board.is_game_over():
        move = select_move_softmax(mlp, board, index_to_uci, uci_to_index, temperature=1.0)
        board.push(move)
        game.append(move.uci())
    return game

In [None]:
# Three games
game1 = model_vs_itself()
game2 = model_vs_itself()
game3 = model_vs_itself()

print(len(game1))
print(len(game2))
print(len(game3))

Typically, well-played games range between 40-80 moves. 

In [None]:
pgn_game = chess.pgn.Game()
pgn = pgn_game


for move in game:
    pgn = pgn.add_variation(chess.Move.from_uci(move))

print(pgn_game)

In [None]:
with open("output_game.pgn", "w") as file:
    file.write(str(pgn_game))