In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import statistics

In [2]:
filepath = "data/chessData.csv"  
df = pd.read_csv(filepath)

In [3]:
def process_evaluation(value):
    value = str(value).strip()
    if value.startswith("#"):  # Checkmate cases
        return 20000 if "+" in value else -20000
    return float(value)

In [4]:
df["Evaluation"] = df["Evaluation"].apply(process_evaluation).astype(np.float32)  # Changed to float16

In [5]:
# Scale Evaluation to [-1000, 1000]
eval_max = df["Evaluation"].abs().max()
df["Evaluation"] = (df["Evaluation"] / eval_max) * 1000

In [2]:
# Encoding
def encoding(fen):
    piece_map = {"p": -1, "n": -2, "b": -3, "r": -4, "q": -5, "k": -6,
                 "P": 1, "N": 2, "B": 3, "R": 4, "Q": 5, "K": 6}
    board, turn = fen.split()[:2]  # Get board state and move turn
    encoded_halfka = np.zeros(128, dtype=np.float32)
    encoded_halfkp = np.zeros(128, dtype=np.float32)
    
    squares = []
    king_pos = {"w": None, "b": None}  # White and Black King positions

    for row in board.split("/"):
        for char in row:
            if char.isdigit():
                squares.extend([None] * int(char))
            else:
                squares.append(char)
    
    for i, piece in enumerate(squares):
        if piece == "K":
            king_pos["w"] = i
        elif piece == "k":
            king_pos["b"] = i
    
    side_to_move = "w" if turn == "w" else "b"
    king_idx = king_pos[side_to_move]  # King position of the side to encode
    
    for i, piece in enumerate(squares):
        if piece:
            encoded_halfka[i * 2] = np.float16(piece_map[piece])
            encoded_halfka[i * 2 + 1] = np.float16(1.0 if piece.isupper() else -1.0)
            
            if piece.lower() == "p":
                encoded_halfkp[i * 2] = np.float16(piece_map[piece])
                encoded_halfkp[i * 2 + 1] = np.float16(1.0 if piece.isupper() else -1.0)

    return np.concatenate([encoded_halfka, encoded_halfkp]).astype(np.float32)

In [3]:
print(encoding("1qr1r1k1/pb1ppp1p/1pnb2p1/7n/2PNP3/1PN1BPP1/P2Q3P/2RR1BK1 w - - 1 17"))

[ 0.  0. -5. -1. -4. -1.  0.  0. -4. -1.  0.  0. -6. -1.  0.  0. -1. -1.
 -3. -1.  0.  0. -1. -1. -1. -1. -1. -1.  0.  0. -1. -1.  0.  0. -1. -1.
 -2. -1. -3. -1.  0.  0.  0.  0. -1. -1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0. -2. -1.  0.  0.  0.  0.  1.  1.  2.  1.
  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  2.  1.  0.  0.  3.  1.
  1.  1.  1.  1.  0.  0.  1.  1.  0.  0.  0.  0.  5.  1.  0.  0.  0.  0.
  0.  0.  1.  1.  0.  0.  0.  0.  4.  1.  4.  1.  0.  0.  3.  1.  6.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 -1. -1.  0.  0.  0.  0. -1. -1. -1. -1. -1. -1.  0.  0. -1. -1.  0.  0.
 -1. -1.  0.  0.  0.  0.  0.  0.  0.  0. -1. -1.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.
  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.
  0.  0.  1.  1.  1.  1.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0

In [7]:
import json

# Assuming x_data is your list of vectors
# Example: x_data = [np.array([1,2,3]), np.array([4,5,6])]

# Save the encodings to a file
def save_encodings(x_data, filename='encodings.txt'):
    # Convert numpy arrays to lists for JSON serialization
    serializable_data = [x.tolist() if isinstance(x, np.ndarray) else x for x in x_data]
    
    with open(filename, 'w') as f:
        json.dump(serializable_data, f)
    
    print(f"Encodings saved to {filename}")


In [8]:

# Load the encodings from a file
def load_encodings(filename='encodings.txt'):
    with open(filename, 'r') as f:
        data = json.load(f)
    
    # Convert lists back to numpy arrays if needed
    x_data = [np.array(x) for x in data]
    
    print(f"Loaded {len(x_data)} encodings from {filename}")
    return x_data


In [9]:
# Apply encoding and gradually delete data to optimize memory
x_data = []
y_data = []

drop_interval = 500000
drop_indices = []
x_min, x_max = None, None

for index, row in df.iterrows():
    encoded = encoding(row["FEN"])
    x_data.append(encoded)
    y_data.append(row["Evaluation"])
    drop_indices.append(index)
    
    if x_min is None:
        x_min, x_max = encoded.copy(), encoded.copy()
    else:
        x_min = np.minimum(x_min, encoded)
        x_max = np.maximum(x_max, encoded)
    
    if len(drop_indices) >= drop_interval:
        df.drop(drop_indices, inplace=True)
        drop_indices = []
        print(f"Processed {index + 1} rows")

df = None  # Free memory

Processed 500000 rows
Processed 1000000 rows
Processed 1500000 rows
Processed 2000000 rows
Processed 2500000 rows
Processed 3000000 rows
Processed 3500000 rows
Processed 4000000 rows
Processed 4500000 rows
Processed 5000000 rows
Processed 5500000 rows
Processed 6000000 rows
Processed 6500000 rows
Processed 7000000 rows
Processed 7500000 rows
Processed 8000000 rows
Processed 8500000 rows
Processed 9000000 rows
Processed 9500000 rows
Processed 10000000 rows
Processed 10500000 rows
Processed 11000000 rows
Processed 11500000 rows
Processed 12000000 rows
Processed 12500000 rows


In [10]:
# # Chuyển x_data thành numpy array để dễ thao tác
# x_data = np.array(x_data, dtype=np.float32)

# # Tính mean và std theo từng feature (trục 0)
# mean = np.mean(x_data, axis=0)
# std = np.std(x_data, axis=0)

# # Tránh chia cho 0 nếu std quá nhỏ
# std[std == 0] = 1e-8  

# # Chuẩn hóa dữ liệu
# x_data = (x_data - mean) / std


In [11]:

# Example usage:
# Save encodings
# save_encodings(x_data)

# Later in your code or in another session, load the encodings
# loaded_data = load_encodings()

In [12]:
# Convert to PyTorch Tensor and manually standard scale
x_data = np.vstack(x_data).astype(np.float16)
y_data = np.array(y_data, dtype=np.float16).reshape(-1, 1)


In [13]:
# Check for NaN values before scaling
if np.isnan(x_data).any():
    print("Warning: x_data contains NaN values!")
else:
    print("No NaN values found in x_data.")

No NaN values found in x_data.


In [14]:
x_data = (x_data - x_min) / (x_max - x_min + 1e-3)

In [15]:
# Load dữ liệu từ RAM (chỉ thực hiện một lần trước khi lưu)
x_data = np.array(x_data, dtype=np.float16)
y_data = np.array(y_data, dtype=np.float16)

# Shuffle dữ liệu trước khi lưu
indices = np.arange(len(y_data))
np.random.shuffle(indices)

x_data = x_data[indices]
y_data = y_data[indices]


In [16]:
# Lưu xuống file để đọc từ ổ cứng khi train
np.save("x_data.npy", x_data)
np.save("y_data.npy", y_data)

In [17]:
# Giải phóng bộ nhớ
del x_data, y_data
import gc
gc.collect()

20

In [18]:
# # Check for NaN values before scaling
# if np.isnan(x_data).any():
#     print("Warning: x_data contains NaN values!")
# else:
#     print("No NaN values found in x_data.")

In [19]:
# # Convert to PyTorch Tensor
# x_data = torch.tensor(x_data, dtype=torch.float16)
# y_data = torch.tensor(y_data, dtype=torch.float16)

In [10]:
class ChessDataset(Dataset):
    def __init__(self, x_file, y_file):
        self.x_data = np.load(x_file, mmap_mode='r')  
        self.y_data = np.load(y_file, mmap_mode='r')
    
    def __len__(self):
        return len(self.y_data) 
    
    def __getitem__(self, idx):
        x = self.x_data[idx].astype(np.float32)
        y = self.y_data[idx].astype(np.float32)
        return torch.tensor(x), torch.tensor(y)

# Tạo DataLoader
batch_size = 128
dataset = ChessDataset("x_data.npy", "y_data.npy")
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [11]:
class NNUE(nn.Module):
    def __init__(self, input_size):
        super(NNUE, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)  # Thêm BatchNorm
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, 128)
        self.bn4 = nn.BatchNorm1d(128)
        self.fc5 = nn.Linear(128, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.relu(self.bn4(self.fc4(x)))
        return self.fc5(x)
# Khởi tạo mô hình với kích thước input lấy từ file
x_sample = np.load("x_data.npy", mmap_mode='r')[0]
model = NNUE(input_size=len(x_sample)).to("cuda" if torch.cuda.is_available() else "cpu").float()

In [4]:
import numpy as np

# Load dữ liệu mà không cần đưa toàn bộ vào RAM
x_data = np.load("x_data.npy", mmap_mode='r')

# Đếm số lượng chiều có giá trị bằng 0 trên tất cả các vector
zero_counts = np.sum(x_data == 0, axis=0)

# In kết quả
print("Số lượng giá trị 0 trên từng chiều của vector:\n", zero_counts)

Số lượng giá trị 0 trên từng chiều của vector:
 [   45434  4155546   140813  1851819   309110  3181555    88056  3452432
   934260  3141716   391262  4280386  6081824  6479623   710162  1990693
    27836  4582928    59672  5522287   117918  3063382   184248  3132908
   327471  3851872   406830  7821089   849773  7803385   498150  6768671
    15282  3723906    42329  2906374    80266  3421127   135929  4640880
   178299  4234874   238038  4541000   175192  5534493    97310  3470321
     9880  1985013    23899  1661620    45020  3767617    73593  2490921
    90987  3092336    87984  2015484    63846  1319801    33599  1608650
     7890   619680    17959  1283534    26813  1060191    35387  1067387
    39060  1017914    38304   772455    32596   762514    17548   527772
     5748   283462    12361   313831    14472   374922    17287   381355
    17251   262012    18808   239385    16980   177762     8931   216667
     3237   228371     6264   289869     6714   219775     6326   194037
   

In [3]:
zero_counts = np.sum(x_data == 0, axis=0)

NameError: name 'x_data' is not defined

In [12]:
# Cấu hình loss function và optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Huấn luyện mô hình
num_epochs = 100
best_loss = float("inf")

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")
    
    # Lưu mô hình tốt nhất
    if avg_loss < best_loss:
        best_loss = avg_loss
        torch.save(model.state_dict(), "best_nnue.pth")
        print("Model saved!")

print("Training complete!")
