# UCITAVANJE RADNOM POTEZA

In [1]:
import os
import re

input_dir = "Lichess"
output_file = "flat_games.txt"

tag_line_pattern = re.compile(r"^\[.*\]$")


for filename in os.listdir(input_dir):
    output_file = "Data/" + filename
    with open(output_file, "w", encoding="utf-8") as out:
            
        print(f"Obradjuje: {filename}...")
        path = os.path.join(input_dir, filename)

        with open(path, "r", encoding="utf-8") as f:
            current_game_moves = []

            for line in f:
                line = line.strip()

                if tag_line_pattern.match(line):
                    continue

                if not line:
                    if current_game_moves:
                        one_line_game = " ".join(current_game_moves)
                        out.write(one_line_game + "\n")
                        current_game_moves = []
                else:
                    current_game_moves.append(line)

            if current_game_moves:
                one_line_game = " ".join(current_game_moves)
                out.write(one_line_game + "\n")

print(f"Sacuvano u: {output_file}")


Processing lichess_db_standard_rated_2017-02.pgn...
✅ Done. Cleaned games saved to: Data/lichess_db_standard_rated_2017-02.pgn


# KOD ZA PRONALAZENJE NAJBOLJEG POTEZA ZA RANDOM POZICIJU U SVAKOJ PARTIJI

In [None]:
import chess
import chess.pgn
import chess.engine
import random
import os
import io

STOCKFISH_PATH = "C:\stockfish\stockfish-windows-x86-64-avx2.exe"
dir = "Data"
output_file = "output.txt"
br = 0

# br sluzi za debagovanje

with open(output_file, "w") as out:
    with chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH) as engine:
        for filename in os.listdir(dir):
            print(f"Čita: {filename}")
            br+=1
            if(br <= 30):
                with open(os.path.join(dir, filename)) as file:
                    for line in file:
                        if not line.strip():
                            continue

                        game = chess.pgn.read_game(io.StringIO(line))
                        if game is None:
                            continue

                        board = game.board()
                        moves = list(game.mainline_moves())

                        if len(moves) < 10:
                            continue

                        move_index = random.randint(5, len(moves) - 1)
                        for move in moves[:move_index]:
                            board.push(move)

                        result = engine.analyse(board, chess.engine.Limit(time=0.1))
                        best_move = result["pv"][0]
                        fen1 = board.fen()

                        out.write(f"{fen1} {best_move}\n")

: 

# PRIPREMA PODATAKA ZA PRETVARANJE U TENZOR

In [9]:
input_file = "combinedRawOutput.txt"
output_file = "fixedData.csv"

with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    for line in infile:
        line = line.strip()
        if not line:
            continue

        *fen_parts, move = line.split()
        fen = " ".join(fen_parts)
        outfile.write(f"{fen},{move}\n")


# PRETVARANJE PODATAKA U TENZORE

In [3]:
import torch
from torch.utils.data import Dataset
import chess
import numpy as np

class ChessDataset(Dataset):
    def __init__(self, data_path, move_vocab=None):
        self.positions = []
        self.moves = []
        self.move_to_idx = move_vocab or {}
        self.idx_to_move = {}

        with open(data_path, "r") as f:
            for line in f:
                fen, move = line.strip().split(",")
                self.positions.append(fen)
                self.moves.append(move)

                if move not in self.move_to_idx:
                    idx = len(self.move_to_idx)
                    self.move_to_idx[move] = idx
                    self.idx_to_move[idx] = move

    def __len__(self):
        return len(self.positions)

    def __getitem__(self, idx):
        fen = self.positions[idx]
        move = self.moves[idx]

        board_tensor = self.fen_to_tensor(fen)
        move_idx = self.move_to_idx[move]

        return board_tensor, move_idx

    def fen_to_tensor(self, fen):
        board = chess.Board(fen)
        tensor = np.zeros((18, 8, 8), dtype=np.float32)

        piece_map = board.piece_map()
        for square, piece in piece_map.items():
            row = 7 - (square // 8)
            col = square % 8
            plane = self.piece_to_plane(piece)
            tensor[plane, row, col] = 1.0

        tensor[12, :, :] = int(board.turn)

        tensor[13, :, :] = int(board.has_kingside_castling_rights(chess.WHITE))
        tensor[14, :, :] = int(board.has_queenside_castling_rights(chess.WHITE))
        tensor[15, :, :] = int(board.has_kingside_castling_rights(chess.BLACK))
        tensor[16, :, :] = int(board.has_queenside_castling_rights(chess.BLACK))

        if board.ep_square is not None:
            row = 7 - (board.ep_square // 8)
            col = board.ep_square % 8
            tensor[17, row, col] = 1.0

        return torch.tensor(tensor)

    def piece_to_plane(self, piece):
        piece_type = piece.piece_type - 1
        color_offset = 0 if piece.color == chess.WHITE else 6
        return piece_type + color_offset


# SKLANJANJE EVALUACIONIH METRIKA U ODREDJENIM PARTIJAMA

In [1]:
import re

def clean_pgn_game(game):
    cleaned = re.sub(r'\s*\{\s*\[\%eval[^\}]+\}\s*', ' ', game)
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
    return cleaned

def process_pgn_file(input_file, output_file):
    with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
        for line in f_in:
            if line.strip():
                cleaned_game = clean_pgn_game(line)
                f_out.write(cleaned_game + '\n')

input_filename = 'Data/Big.pgn'
output_filename = 'cleaned_chess_games.pgn'

process_pgn_file(input_filename, output_filename)
print(f"Sacuvano u: {output_filename}")

Cleaned games saved to cleaned_chess_games.pgn


# DELJENJE FAJLA SVIH PARTIJA U MANJE DELOVE

In [3]:
import os

def split_large_game_file(input_file, output_prefix, games_per_file=1000000):
    with open(input_file, 'r', encoding='utf-8') as infile:
        file_count = 1
        game_count = 0
        outfile = None
            
        for line in infile:
            if game_count % games_per_file == 0:
                if outfile is not None:
                    outfile.close()
                output_filename = f"{output_prefix}{file_count}.txt"
                outfile = open(output_filename, 'w', encoding='utf-8')
                file_count += 1
                
            outfile.write(line)
            game_count += 1
                
            if game_count % 1000000 == 0:
                print(f"Uradio {game_count} partija...")
            
        if outfile is not None:
            outfile.close()
                
    print(f"\Podelio {game_count} parija u {file_count-1} fajlova")
    return True


input_filename = "GameSpliter/10mil.pgn" 
output_prefix = "GameSpliter/games_part_"

split_large_game_file(input_filename, output_prefix)

Processed 1000000 games...
Processed 2000000 games...
Processed 3000000 games...
Processed 4000000 games...
Processed 5000000 games...
Processed 6000000 games...
Processed 7000000 games...
Processed 8000000 games...
Processed 9000000 games...
Processed 10000000 games...

Successfully split 10194939 games into 11 files


True

# PARELELIZOVAN KOD ZA NOVU (fen, eval, 5 poteza) OBRADU FAJLOVA

In [6]:
import chess
import chess.pgn
import chess.engine
import random
import os
import io
from multiprocessing import Pool, cpu_count
from functools import partial

STOCKFISH_PATH = r"C:\stockfish\stockfish-windows-x86-64-avx2.exe"
DATA_DIR = "GameSpliter"
CHUNK_SIZE = 1000
OUTPUT_FILE = "output1.txt"

def parse_and_select_position(line):
    game = chess.pgn.read_game(io.StringIO(line))
    if game is None:
        return None

    board = game.board()
    moves = list(game.mainline_moves())
    if len(moves) < 10:
        return None

    move_index = min(len(moves) - 1, int(random.triangular(5, len(moves), len(moves)*0.8)))
    for move in moves[:move_index]:
        board.push(move)

    return board.fen()

def analyze_fen_chunk(fen_chunk):
    engine = chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH)
    results = []
    for fen in fen_chunk:

        board = chess.Board(fen)
        analysis = engine.analyse(board, chess.engine.Limit(depth=10), multipv=5)
                
        eval_score = "NULL"

        if isinstance(analysis, list):
            main_info = analysis[0]
        else:
            main_info = analysis
                    
        score = main_info["score"].white()
        if score.is_mate():
             eval_score = 10000 if score.mate() > 0 else -10000
        else:
             eval_score = score.score()

        top_moves = []
        eval_score = eval_score/100
        for i in range(5):
            if isinstance(analysis, list):
                move = analysis[i]["pv"][0] if i < len(analysis) else "NULL"
            else:
                move_info = engine.analyse(board, chess.engine.Limit(depth=10), multipv=i+1)
                move = move_info["pv"][0] if "pv" in move_info else "NULL"
            top_moves.append(str(move))

                
            while len(top_moves) < 5:
                top_moves.append("NULL")
                
            results.append(f"{fen},{eval_score},{' '.join(top_moves)}")
           
    engine.quit()
    return results

def process_file(filename):
    with open(os.path.join(DATA_DIR, filename), 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    
    with Pool(cpu_count()) as pool:
        fens = pool.map(parse_and_select_position, lines, chunksize=1000)
    
    fens = [fen for fen in fens if fen is not None]
    
    chunk_size = 100
    fen_chunks = [fens[i:i + chunk_size] for i in range(0, len(fens), chunk_size)]
    
    with Pool(cpu_count()) as pool:
        chunk_results = pool.map(analyze_fen_chunk, fen_chunks)
    
    return [res for chunk in chunk_results for res in chunk]

if __name__ == "__main__":
    all_results = []
    
    files = [f for f in os.listdir(DATA_DIR) if f.endswith('.pgn') or f.endswith('.txt')]
    br = 0
    for filename in files:
        br+=1
        print(filename, br)
        if(br == 12):
            print(f"Obradjuje {filename}...")
            results = process_file(filename) 
            all_results.extend(results)
                
            with open(OUTPUT_FILE, 'w') as out:
                for res in results:
                    out.write(res + "\n")
        
    print(f"Gotovo, sacuvano u {OUTPUT_FILE}")

games_part_1.txt 1
games_part_10.txt 2
games_part_11.txt 3
games_part_2.txt 4
games_part_3.txt 5
games_part_4.txt 6
games_part_5.txt 7
games_part_6.txt 8
games_part_7.txt 9
games_part_8.txt 10
games_part_9.txt 11
test.txt 12
Processing test.txt...
