### Modules

In [1]:
import board_feature_extractor
import game_state_classifier_data_entry

### Gathering data

In [2]:
# Set player name and amount of games you want to get
player_name = "BIG_TONKA_T"
game_count = 50

# Begin loading game data into json
game_state_classifier_data_entry.data_entry(player_name, game_count)

https://www.chess.com/game/live/93590969817


Data entry stopped by user.


### Compiling data into dataframes

In [3]:
import pandas as pd
import json
import chess
import chess.pgn
from chess import Board
import io


# Load the JSON data from the file
with open('game_state_classifier_data.json', 'r') as file:
    data = json.load(file)

# Prepare dataset
rows = []

for game in data['games']:
    pgn = game['pgn']
    move_count_middle_game = game['move_count_middle_game']
    move_count_end_game = game['move_count_end_game']

    game_obj = chess.pgn.read_game(io.StringIO(pgn))
    board = game_obj.board()

    for move in game_obj.mainline_moves():
        board.push(move)
        move_count = board.fullmove_number - 0.5 # Starts at 0.5 instead of 1
        if board.turn == chess.WHITE:  # Adjust for half moves (this checks whose turn is next, not the current turn)
            move_count -= 0.5 # subtracts 0.5 from black

        pieces_count = board_feature_extractor.count_pieces(board)
        developed_pieces_count = board_feature_extractor.count_developed_pieces(board)
        open_files_count = board_feature_extractor.count_open_files(board)
        passed_pawns_count = board_feature_extractor.count_passed_pawns(board)

        if move_count < move_count_middle_game:
            game_state = 'OPEN'
        elif move_count < move_count_end_game:
            game_state = 'MID'
        else:
            game_state = 'END'

        row = {
            'move_count': move_count,
            'white_pawns': pieces_count['pawns']['white'],
            'black_pawns': pieces_count['pawns']['black'],
            'white_pieces': pieces_count['pieces']['white'],
            'black_pieces': pieces_count['pieces']['black'],
            'white_developed_pieces': developed_pieces_count['white'],
            'black_developed_pieces': developed_pieces_count['black'],
            'open_files': open_files_count,
            'white_passed_pawns': passed_pawns_count['white'],
            'black_passed_pawns': passed_pawns_count['black'],
            'game_state': game_state
        }
        rows.append(row)

df = pd.DataFrame(rows)
df

Unnamed: 0,move_count,white_pawns,black_pawns,white_pieces,black_pieces,white_developed_pieces,black_developed_pieces,open_files,white_passed_pawns,black_passed_pawns,game_state
0,0.5,8,8,7,7,0,0,0,0,0,OPEN
1,1.0,8,8,7,7,0,1,0,0,0,OPEN
2,1.5,8,8,7,7,1,1,0,0,0,OPEN
3,2.0,8,8,7,7,1,1,0,0,0,OPEN
4,2.5,8,8,7,7,2,1,0,0,0,OPEN
...,...,...,...,...,...,...,...,...,...,...,...
2680,16.0,7,7,5,6,5,6,0,0,0,MID
2681,16.5,7,7,5,5,5,5,0,0,0,MID
2682,17.0,7,7,5,5,5,5,0,0,0,MID
2683,17.5,7,7,5,5,5,5,0,0,0,MID


### Building Classifier

In [4]:
# split features and target into training and testing

from sklearn.model_selection import train_test_split

# Splitting the data into features and target variable
X = df.drop('game_state', axis=1)  # Features
y = df['game_state']  # Target variable

# Splitting the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [5]:
## KNN Classifier
from sklearn.neighbors import KNeighborsClassifier

# Create KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)

# Train the classifier using the training data
knn.fit(X_train, y_train)

In [6]:
## Binary Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

### Testing Classifier

In [7]:
# evaluate knn

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Predict the labels for the test set
y_pred = knn.predict(X_test)

# Evaluate the classifier
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("KNN Accuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
[[110   1   0]
 [  3 246   4]
 [  0   4 169]]

Classification Report:
              precision    recall  f1-score   support

         END       0.97      0.99      0.98       111
         MID       0.98      0.97      0.98       253
        OPEN       0.98      0.98      0.98       173

    accuracy                           0.98       537
   macro avg       0.98      0.98      0.98       537
weighted avg       0.98      0.98      0.98       537

KNN Accuracy Score: 0.9776536312849162


In [8]:
# evaluate binary tree

y_pred = clf.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(f"Binary Tree Model Accuracy: {accuracy_score(y_test, y_pred)}")

Confusion Matrix:
[[111   0   0]
 [  2 245   6]
 [  0   7 166]]

Classification Report:
              precision    recall  f1-score   support

         END       0.98      1.00      0.99       111
         MID       0.97      0.97      0.97       253
        OPEN       0.97      0.96      0.96       173

    accuracy                           0.97       537
   macro avg       0.97      0.98      0.97       537
weighted avg       0.97      0.97      0.97       537

Binary Tree Model Accuracy: 0.9720670391061452


In [9]:
# Optionally, you can also display or export the tree
from sklearn.tree import export_text
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)

|--- black_pieces <= 6.50
|   |--- black_pieces <= 2.50
|   |   |--- white_pieces <= 2.50
|   |   |   |--- class: END
|   |   |--- white_pieces >  2.50
|   |   |   |--- move_count <= 32.00
|   |   |   |   |--- class: MID
|   |   |   |--- move_count >  32.00
|   |   |   |   |--- white_pawns <= 3.50
|   |   |   |   |   |--- black_pieces <= 1.50
|   |   |   |   |   |   |--- class: END
|   |   |   |   |   |--- black_pieces >  1.50
|   |   |   |   |   |   |--- class: MID
|   |   |   |   |--- white_pawns >  3.50
|   |   |   |   |   |--- class: END
|   |--- black_pieces >  2.50
|   |   |--- move_count <= 7.75
|   |   |   |--- white_pawns <= 7.50
|   |   |   |   |--- black_developed_pieces <= 3.50
|   |   |   |   |   |--- white_developed_pieces <= 1.50
|   |   |   |   |   |   |--- class: OPEN
|   |   |   |   |   |--- white_developed_pieces >  1.50
|   |   |   |   |   |   |--- class: MID
|   |   |   |   |--- black_developed_pieces >  3.50
|   |   |   |   |   |--- class: OPEN
|   |   |   |--- wh

### Saving Classifier

In [16]:
from joblib import dump

dump(knn, 'game_state_classifier_knn.joblib')
dump(clf, 'game_state_classifier_bst.joblib')

print("saved classifiers")

saved classifiers


### Loading Classifier

In [11]:
from joblib import load
from sklearn.neighbors import KNeighborsClassifier

loaded_knn_model = load('game_state_classifier.joblib')

loaded_knn_model

### Using Classifier

In [12]:
import chess
import random

def random_board(n):
    board = chess.Board()
    for _ in range(n):
        legal_moves = list(board.legal_moves)
        if not legal_moves:
            break  # stop if no legal moves are available
        move = random.choice(legal_moves)
        board.push(move)
    return board

# Specify the number of half-moves
n_half_moves = 10  # Change this number as needed
randomized_board = random_board(n_half_moves)
print(randomized_board)

r . b q k b . r
p p p p . p p .
. . n . p . . n
. . . . . . . .
Q . P . . . p .
. . . . P . . .
P P . P N P . P
R N B . K B . R


In [13]:
boards = []
rows = []

for i in range(3):
    board = random_board(random.randint(4, 25))

    move_count = board.fullmove_number - 0.5 # Starts at 0.5 instead of 1
    if board.turn == chess.WHITE:  # Adjust for half moves (this checks whose turn is next, not the current turn)
        move_count -= 0.5 # subtracts 0.5 from black

    pieces_count = board_feature_extractor.count_pieces(board)
    developed_pieces_count = board_feature_extractor.count_developed_pieces(board)
    open_files_count = board_feature_extractor.count_open_files(board)
    passed_pawns_count = board_feature_extractor.count_passed_pawns(board)

    row = {
        'move_count': move_count,
        'white_pawns': pieces_count['pawns']['white'],
        'black_pawns': pieces_count['pawns']['black'],
        'white_pieces': pieces_count['pieces']['white'],
        'black_pieces': pieces_count['pieces']['black'],
        'white_developed_pieces': developed_pieces_count['white'],
        'black_developed_pieces': developed_pieces_count['black'],
        'open_files': open_files_count,
        'white_passed_pawns': passed_pawns_count['white'],
        'black_passed_pawns': passed_pawns_count['black'],
    }

    rows.append(row)
    boards.append(board)

df = pd.DataFrame(rows)

df['game_state'] = loaded_knn_model.predict(df)

df

Unnamed: 0,move_count,white_pawns,black_pawns,white_pieces,black_pieces,white_developed_pieces,black_developed_pieces,open_files,white_passed_pawns,black_passed_pawns,game_state
0,6.5,8,8,7,7,2,2,0,0,0,OPEN
1,10.5,8,7,7,7,4,2,0,0,0,OPEN
2,3.5,8,8,7,7,1,0,0,0,0,OPEN


In [14]:
for i in range(3):
    print(df.iloc[i])
    print('')
    print(boards[i])
    print('\n\n')

move_count                 6.5
white_pawns                  8
black_pawns                  8
white_pieces                 7
black_pieces                 7
white_developed_pieces       2
black_developed_pieces       2
open_files                   0
white_passed_pawns           0
black_passed_pawns           0
game_state                OPEN
Name: 0, dtype: object

r . . q k b n r
p . p n . p p .
b p . . p . . p
. . . p . . . .
. Q P . P . . .
. . . . . P . .
P P . P N . P P
R N B . K B . R



move_count                10.5
white_pawns                  8
black_pawns                  7
white_pieces                 7
black_pieces                 7
white_developed_pieces       4
black_developed_pieces       2
open_files                   0
white_passed_pawns           0
black_passed_pawns           0
game_state                OPEN
Name: 1, dtype: object

r . b q k b . r
. . p p n p p p
. p . . . . . .
. n . . . . . .
N P . . p P . .
. . . P . . P .
P . P Q P . . P
. R B K . B N R



move_cou

### Using game_state_classifier.py

In [15]:
import game_state_classifier

boards = [random_board(random.randint(4, 25)) for i in range(5)]
rows = [game_state_classifier.extract_predictors_from_board(board) for board in boards]
df = pd.DataFrame(rows)
df['game_state'] = game_state_classifier.predict(df)

df

Unnamed: 0,move_count,white_pawns,black_pawns,white_pieces,black_pieces,white_developed_pieces,black_developed_pieces,open_files,white_passed_pawns,black_passed_pawns,game_state
0,9.5,8,8,7,7,0,1,0,0,0,MID
1,3.5,8,8,7,7,2,0,0,0,0,OPEN
2,11.0,7,8,7,7,2,2,0,0,0,MID
3,9.5,8,6,7,7,3,2,0,0,0,MID
4,6.0,8,8,6,7,1,3,0,0,0,OPEN
