In [10]:
# Import necessary libraries
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, BertTokenizer, AdamW
from sklearn.model_selection import train_test_split
from transformers import get_linear_schedule_with_warmup
import numpy as np
import pandas as pd
import time
import datetime
import random

In [11]:
# Function to format time
def format_time(elapsed):
    return str(datetime.timedelta(seconds=int(round((elapsed)))))

# Function for fixed random seed
def set_seed(seed_value=42):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

# Load the data
def load_data(file_path):
    data = pd.read_csv(file_path)
    data['input_sequence'].fillna('[START]', inplace=True)
    return data

# Tokenize the chess moves
def tokenize_chess_moves(data, tokenizer, max_length=64):
    input_ids = []
    attention_masks = []

    for seq in data['input_sequence']:
        encoded_dict = tokenizer.encode_plus(
            seq,
            add_special_tokens=True,
            max_length=max_length,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(data['target_move'].apply(lambda x: tokenizer.convert_tokens_to_ids([x])[0]).tolist())

    return input_ids, attention_masks, labels

# Prepare the dataset and dataloader
def create_data_loader(input_ids, attention_masks, labels):
    dataset = TensorDataset(input_ids, attention_masks, labels)
    sampler = RandomSampler(dataset)
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=32)
    return dataloader

# Training function
def train(model, train_dataloader, validation_dataloader, device, epochs=20):
    total_steps = len(train_dataloader) * epochs
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    for epoch_i in range(0, epochs):
        print(f'======== Epoch {epoch_i + 1} / {epochs} ========')
        total_train_loss = 0
        model.train()

        for step, batch in enumerate(train_dataloader):
            b_input_ids, b_input_mask, b_labels = batch
            b_input_ids = b_input_ids.to(device)
            b_input_mask = b_input_mask.to(device)
            b_labels = b_labels.to(device)  # Ensure this is aligned with your batch size

            model.zero_grad()        
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            loss = outputs.loss
            total_train_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

        avg_train_loss = total_train_loss / len(train_dataloader)
        print(f"  Average training loss: {avg_train_loss:.2f}")

        print("  Running Validation...")
        model.eval()
        total_eval_accuracy = 0
        total_eval_loss = 0
        for batch in validation_dataloader:
            b_input_ids, b_input_mask, b_labels = batch
            b_input_ids = b_input_ids.to(device)
            b_input_mask = b_input_mask.to(device)
            b_labels = b_labels.to(device)  # Ensure this is aligned with your batch size
            
            with torch.no_grad():        
                outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            
            loss = outputs.loss
            total_eval_loss += loss.item()
            logits = outputs.logits
            total_eval_accuracy += (logits.argmax(axis=-1) == b_labels).sum().item()

        avg_val_loss = total_eval_loss / len(validation_dataloader)
        avg_acc = total_eval_accuracy / len(validation_dataloader)

        print(f"  Validation Loss: {avg_val_loss:.2f}")
        print(f"  Validation Accuracy: {avg_acc:.2f}")

    print("Training complete!")

# Prediction function
def predict(model, sentence, tokenizer, device):
    model.eval()
    inputs = tokenizer(sentence, return_tensors="pt", max_length=64, pad_to_max_length=True, add_special_tokens=True)
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=None, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_index = logits.argmax(axis=-1).item()
    predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
    return predicted_token

# Main function to execute the tasks
def main():
    set_seed()  # Set seed for reproducibility
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    lichess_username = "rootsec1"
    # Load and preprocess the dataset
    file_path = f'../data/processed/sequence_target_map_{lichess_username}.csv'  # Change this to your dataset path
    data = load_data(file_path)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    input_ids, attention_masks, labels = tokenize_chess_moves(data, tokenizer)

    # Split the data into train and validation sets
    train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(
        input_ids,
        attention_masks,
        random_state=0,
        test_size=0.2,
        shuffle=False
    )
    train_masks, validation_masks, _, _ = train_test_split(
        attention_masks,
        input_ids,
        random_state=0,
        test_size=0.2,
        shuffle=False
    )

    # Assuming 'labels' is a one-dimensional tensor from earlier in your code
    train_data = TensorDataset(train_inputs, train_masks, train_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=32)

    validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
    validation_sampler = SequentialSampler(validation_data)
    validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=32)

    # Load the BERT model
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(tokenizer.vocab))
    model.to(device)

    # Train and validate
    train(model, train_dataloader, validation_dataloader, device)

    # Save the model
    model.save_pretrained('models/bert_chess_model.keras')

    # Test the model with a new sentence
    new_moves = 'e4 d5 exd5 Qxd5'  # Change this to your new chess moves
    predicted_move = predict(model, new_moves, tokenizer, device)
    print(f'Predicted move: {predicted_move}')

In [12]:
main()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['input_sequence'].fillna('[START]', inplace=True)
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh



ValueError: Expected input batch_size (32) to match target batch_size (2048).