```python
Math 579 Project Documentation

Date: 2025-04-21  
Name: Samisoni Palu  
Instructor: Dr. Sun
```

In [54]:
import chess
import json
import torch
import numpy as np
import torch.nn as nn

```pyton
## Utils
Board data
---
`generate_uci_move_list`
- list all possible moves as a 2-tuple of from_square, to_square
- tweaked for promotion move counts
- will Include Redundant or Invalid Moves
    - **`a1a1`, `d4d4`**, etc. → no actual move is made
    - **Illegal under any real game condition**
    - Also includes nonsense like `h2h8` (rook-style moves for pawns)

But remember:  
> You’re not saying “these are all valid moves”  
> **this is the full move *vocabulary***—the possible *labels* in a classification task.
> That is, our **vocabulary** list is all the two-tuple pairings of squares, e.g. (`h2g3`), along with other special moves

Our total vocabulary size count includes the sum of
- 64x64 (each pairing of squares)
- 8 (number of columns) x 2 (white-black promotions) x 4 (choices of upgrade) x 3 (capture types) - 16 (edge cases)

We should expect our logits vector to have size **4272**. 

**Benefits of the chosen Vocabulary**

1. Simplicity in Output Shape
    - 1-to-1 mapping: index ↔ UCI
    - You don’t need dynamic output heads or custom decoders
    - You can store logits as `torch.tensor([4672])` and just mask out illegal ones at runtime

2. Consistency
    - Your label space is fixed across:
      - Training
      - Inference
      - Evaluation

3. Non-moves Never Get Trained On
    - No master ever plays `a1a1`
    - So those output indices **never get gradient updates**
    - They just sit in the model—harmless dead neurons

**Why You Might Remove Redundant Moves**

1. Smaller Output Space
    - Saves compute on final linear layer and softmax
    - Slightly faster training (maybe)

2. Model Capacity Allocation
    - You force the network to **only ever consider valid move templates**
    - Could lead to sharper learning curve

But you pay with **more complexity**:
- Dynamic move indexing
- Pre-mask needs to align with training mask
- Harder debugging
```


In [55]:
def generate_uci_move_list():
    all_moves = set()
    for from_sq in chess.SQUARES:
        for to_sq in chess.SQUARES:
            move = chess.Move(from_sq, to_sq)
            all_moves.add(move.uci())
            # Add promotions
            for promo in [chess.QUEEN, chess.ROOK, chess.BISHOP, chess.KNIGHT]:
                from_rank = chess.square_rank(from_sq)
                to_rank = chess.square_rank(to_sq)
                from_file = chess.square_file(from_sq)
                to_file = chess.square_file(to_sq)
                # Only allow forward promotion (white or black)
                if (from_rank , to_rank) in [(6, 7), (1, 0)]:  # white/black promotion ranks
                    if abs(from_file - to_file) <= 1:         # straight or diagonal
                        promo_move = chess.Move(from_sq, to_sq, promotion=promo)
                        all_moves.add(promo_move.uci())
    return sorted(all_moves)


def save_move_index_map(path="data/move_index_map.json"):
    moves = generate_uci_move_list()
    uci_to_index = {uci: i for i, uci in enumerate(moves)}
    with open(path, "w") as f:
        json.dump(uci_to_index, f)

In [56]:
move_list = generate_uci_move_list()
print('Logits size = ',len(move_list))

Logits size =  4272


In [57]:
# board state gets entries from cuhh
PIECE_TO_IDX = {
    None: 0,
    chess.PAWN: 1,
    chess.KNIGHT: 2,
    chess.BISHOP: 3,
    chess.ROOK: 4,
    chess.QUEEN: 5,
    chess.KING: 6,
}

# cuhh make the board go mathematical
def encode_board(board: chess.Board):
    board_array = np.zeros((8, 8), dtype=np.int64)
    
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        row = 7 - (square // 8)
        col = square % 8

        if piece is not None:
            base = PIECE_TO_IDX[piece.piece_type]
            offset = 0 if piece.color == chess.WHITE else 6
            board_array[row][col] = base + offset
        else:
            board_array[row][col] = 0  # empty

    return board_array  # shape: [8,8] of ints in [0,12]


In [58]:
# Here it go right here
board = chess.Board()
encode_board(board)

array([[10,  8,  9, 11, 12,  9,  8, 10],
       [ 7,  7,  7,  7,  7,  7,  7,  7],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1],
       [ 4,  2,  3,  5,  6,  3,  2,  4]])

In [80]:
# this just a mlp frfr
class PolicyNet(nn.Module):
    def __init__(self, embedding_dim=32, num_moves=4272):
        super().__init__()
        self.embed = nn.Embedding(13, embedding_dim)  # 13 tokens -> vector
        self.fc = nn.Sequential(
            nn.Flatten(),                # [8,8,32] -> [2048]
            nn.Linear(8*8*embedding_dim, 512),
            nn.Tanh(),
            nn.Linear(512, num_moves)   # Final logits
        )


    def forward(self, x):  # x: [B, 8, 8]
        x = self.embed(x)  # [B, 8, 8, D]
        return self.fc(x)


In [84]:
mlp = PolicyNet(embedding_dim=2)

### What this do frfr ##


We want a neural network that takes:
- Input: an `8×8` grid of piece tokens (entries are integers from 0 to 12)
- Output: a **4672-dimensional logits vector**

---

### CLASS STRUCTURE: `PolicyNet`

```python
class PolicyNet(nn.Module):
    def __init__(self, embedding_dim=32, num_moves=4272):
```

- **`embedding_dim=32`**: each board square will be represented by a **32-dimensional vector**.
- **`num_moves=4272`**: the size of **output layer**, corresponding to all possible moves.

---

```python
        super().__init__()
```

- Standard for initializing the parent class (`nn.Module`).

---

### Embedding Layer

```python
        self.embed = nn.Embedding(13, embedding_dim)
```

- This layer turns each square’s integer (0–12) into a vector of dimension `[embedding_dim]`.
- `[8,8]` board → `[8,8,32]` tensor.

---

### Fully Connected Network (MLP)
These are our hidden and output logits layer. 
```python
        self.fc = nn.Sequential(
            nn.Flatten(),                
            nn.Linear(8*8*embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, num_moves)
        )
```

#### `nn.Flatten()`
- Converts `[B, 8, 8, 32]` into `[B, 2048]`, similar to .view
- Needed to feed into `Linear` layers

#### `nn.Linear(2048, 512)`
- Fully connected layer reducing 2048 features to 512 neurons

#### `nn.ReLU()`
- Non-linear activation to let the model learn more complex patterns

#### `nn.Linear(512, num_moves)`
- Final layer: predicts logit value for each of the 4672 moves

---

### `forward` Function

```python
    def forward(self, x):  # x: [B, 8, 8]
        x = self.embed(x)  # [B, 8, 8, D]
        x = x.permute(0, 3, 1, 2)  # Optional: [B, D, 8, 8]
        return self.fc(x)
```

- **Input**: `x` is a batch of boards, shape `[batch_size, 8, 8]`
- **Embedding**: turns each square into a vector: `[B, 8, 8, 32]`
- **FC Network**: outputs a `[B, 4272]` tensor of logits

---

## Summary

- network **understands piece identity** through embeddings.
- it **flattens** the board to make a prediction using fully connected layers.
- The output is a **score for every possible move**, and later **mask illegal ones**.

## DATA SETS

Documentation *X*:**INPUT** and *Y*:**TARGET** 

*X* is a **board state**.

> **Board state:**  
> - *8×8 matrix*  
> - Entries in `[0, 12]`  
> - Entries map to **piece type**  

*Y* is a list of **move indices**.

>**Move Indices (`logits`):**
> - vector in $\mathbb{R}^{4272}$
> - entries map to a **move**

A reminder of how moves are defined follows. 

>**Move:**
> - pair of grid values (Why?)
> - e.g. *d3d2*

Recall that the policy net (MLP) outputs a **softmaxed** list of (logits). Training occurs via `cross_entropy`. We read move data from text file in **PGN form**. X lists all the board states of a given game in the order it was played. Y lists all the moves made from a given board state. Let $S_i$ be the $i$ -th board state of the given game, and let $a_i$ be the action (move) made from this state. Then, we may visualize $X$ and $Y$ as the following:

$$\text{Input:}\ S_1,\ S_2,\ ...,\ S_n$$

$$\text{Output:}\ a_1,\ a_2,\ ...,\ a_n$$



In [None]:
with open(r"C:\Users\samip\Documents\quick-maffs\neural_nets\makemorechessmoves\data\raw_games\Carlsen.pgn", "r") as file:
    game = chess.pgn.read_game(file)
    board = game.board()
    X, Y = [], []

    for move in game.mainline_moves():
       board_state = encode_board(board)  # BEFORE the move
       try:
           move_index = move_list.index(move.uci()) # move is INDEXED here, move list starts from first move
           X.append(board_state) # first board state is neutral board, it must be appended to have good beginning game
           Y.append(move_index)
       except ValueError:
           print(f"Move {move.uci()} not found in move_list. Skipping this move.")
       board.push(move)  # Move AFTER data capture

X = torch.tensor(X)
Y = torch.tensor(Y)

#what happens if we remove winning move (movelist[-1]), will the model still choose victory? >:P

In [100]:
for param in mlp.parameters():
    print(param)

Parameter containing:
tensor([[ 1.2769,  0.3354],
        [-0.0834, -0.2623],
        [-0.5078,  0.5190],
        [ 0.3147,  0.6602],
        [ 0.3148,  0.5528],
        [-0.5301, -0.8638],
        [-0.6249,  0.3751],
        [-1.0270, -1.0949],
        [-1.7253,  0.1751],
        [-1.9656,  0.3177],
        [ 0.1604,  0.2982],
        [-0.6572, -0.2269],
        [-0.2176, -0.1560]], requires_grad=True)
Parameter containing:
tensor([[ 0.0188,  0.0023,  0.0845,  ...,  0.0628, -0.0132,  0.0742],
        [-0.0514, -0.0034,  0.0845,  ...,  0.0316, -0.0465, -0.0582],
        [ 0.0131,  0.0725,  0.0181,  ..., -0.0165, -0.0463,  0.0703],
        ...,
        [-0.0679,  0.0563, -0.0597,  ...,  0.0255, -0.0558,  0.0443],
        [ 0.0596,  0.0063, -0.0364,  ..., -0.0352,  0.0788, -0.0103],
        [-0.0258, -0.0659,  0.0808,  ..., -0.0689,  0.0096, -0.0495]],
       requires_grad=True)
Parameter containing:
tensor([-0.0622, -0.0828, -0.0115, -0.0009, -0.0131, -0.0626,  0.0041,  0.0690,
        

In [None]:
import torch.nn.functional as F
lossi = []
lr = 0.1
batch_size = 16
for i in range(1000): 

    batch = torch.randint(low=0,high=Y.shape[0], size=(batch_size,))

    logits = mlp(X[batch])
    loss = F.cross_entropy(logits, Y[batch])
    for p in mlp.parameters():
        p.grad = None
    loss.backward()
    for p in mlp.parameters(): 
        p.data += -lr*p.grad
    lossi.append(loss.item())


In [None]:
# Convert the board state to a tensor if it's not already
board_tensor = torch.tensor(board_state, dtype=torch.long)

# Add a batch dimension to the board tensor
board_tensor = board_tensor.unsqueeze(0)  # Shape: [1, 8, 8]

# Pass the board tensor through the model to get logits
output_logits = mlp(board_tensor)

# apply softmax to get probabilities
output_probs = torch.softmax(output_logits, dim=1)
print("Logits:", output_logits)
print("Probabilities:", output_probs)

# Sample a move index from the probabilities using torch.multinomial
sampled_move_index = torch.multinomial(output_probs.squeeze(0), num_samples=1).item()

# Retrieve the corresponding UCI move from the move list
sampled_move = move_list[sampled_move_index]

print("Sampled Move Index:", sampled_move_index)
print("Sampled Move (UCI):", sampled_move)

Logits: tensor([[ 0.0077,  0.7184,  0.0869,  ..., -0.3247,  0.2151,  0.2154]],
       grad_fn=<AddmmBackward0>)
Probabilities: tensor([[0.0002, 0.0005, 0.0002,  ..., 0.0002, 0.0003, 0.0003]],
       grad_fn=<SoftmaxBackward0>)


In [61]:
## specify some training parameters
N_steps = 100000
print_step = 1000
batch_size = 32
eps = 1e-5
## training the neural net
lossi = []
for i in range(N_steps):
    lr = 0.1 if i<(N_steps/2) else 0.01
    batch = torch.randint(low=0, high=Ytr.shape[0], size=(batch_size,))
    emb = C[X[batch]]
    # h before the activation
    hpreact = emb.view(-1,block_size*emd_dim)@W1+b1
    hmean = hpreact.mean(0, keepdim=True)
    hstd = hpreact.std(0, keepdim=True)
    hpreact2 = gamma * (hpreact-hmean)/(hstd+eps) + beta
    with torch.no_grad():
        mean_runing = 0.999*mean_runing+0.001*hmean
        std_runing = 0.999*std_runing+0.001*hstd
    # tanh action
    h = torch.tanh(hpreact2)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, Ytr[batch])
    for p in parameters:
        p.grad = None
    loss.backward()
    for p in parameters:
        p.data += -lr*p.grad
    if i%print_step == 0:
        print(f"step: {i}; loss: {loss.item()}")
    lossi.append(loss.item())

NameError: name 'Ytr' is not defined