In [15]:
import random
import numpy as np
import pandas as pd
import time
import chess
from teradataml import *

In [16]:
# Connect with Teradata Database
eng = create_context("192.168.56.1", "val", "val")
conn = get_connection()
replay_buffer_table = "replay_buffer"
replay_buffer_staging_table = "replay_buffer_stage"

In [17]:
#tdf = DataFrame(replay_buffer_staging_table)

batch_df = pd.DataFrame({
    'game_id': [],
    'ply': [],
    'FEN': [],
    'move': [],
    'reward': [],
})

random.seed(time.time())

In [18]:
def play_game(in_board, game_id, training=None):
    
    # Initialize the starting variables.
    sequence_no = 0
    reward = 0
    
    game_df = pd.DataFrame({
        'game_id': [],
        'ply': [],
        'FEN': [],
        'move': [],
        'reward': [],
    })
    board = chess.Board()
    if in_board is not None:
        board = in_board

    # Launch into the key loop.
    while not board.is_checkmate() and not board.is_stalemate() and not board.is_insufficient_material() and not board.can_claim_draw() and not board.is_seventyfive_moves() and not board.is_fivefold_repetition():
        if training and sequence_no == 0:
            fen = board.fen()
            spaces_pos = [pos for pos, char in enumerate(fen) if char == ' ']
            opening_fen = fen[:spaces_pos[3]]
            
            ply_array = np.array([game_id, sequence_no, opening_fen, "", 0])
            new_row = pd.DataFrame([ply_array], columns=game_df.columns)
            game_df = pd.concat([game_df, new_row], ignore_index=True)

        sequence_no += 1
        all_moves = str(board.legal_moves)
        start_pos = [pos for pos, char in enumerate(all_moves) if char == '(']
        end_pos = [pos for pos, char in enumerate(all_moves) if char == ')']
        all_moves_string= all_moves[start_pos[0]+1:end_pos[0]]
        all_moves_string = all_moves_string.replace(" ", "")
        all_moves_array = all_moves_string.split(',')
        # random.seed(time.time())
        random_int = random.randint(0, len(all_moves_array)-1)
        next_move = all_moves_array[random_int]
        board.push_san(next_move)

        if training:
            fen = board.fen()
            spaces_pos = [pos for pos, char in enumerate(fen) if char == ' ']
            trimmed_fen = fen[:spaces_pos[3]]
            

            ply_array = np.array([game_id, sequence_no, trimmed_fen, next_move, 0])
            new_row = pd.DataFrame([ply_array], columns=game_df.columns)
            game_df = pd.concat([game_df, new_row], ignore_index=True)

    if board.is_checkmate():
        if board.outcome().winner:
            reward = 1
        else:
            reward = -1
    
    reward_array = np.full(sequence_no+1, reward)
    game_df['reward'] = reward_array

    return game_df, reward

In [19]:
num_games = 10000    # DISTINCT FEN STATES BEFORE: 28 413 240

batch_size = 200
batch_counter = 0
out_reward = 0
start_time = time.time()

# Set the game_id by looking up the last used game_id.
game_id_cursor = execute_sql("SELECT MAX(game_id) FROM " + replay_buffer_table +";")
in_game_id = int(game_id_cursor.fetchall()[0][0]) + 1  # There has to be a better way to do this.

for counter in range(num_games):
    batch_counter += 1
    out_replay_buffer_df, out_reward = play_game(None, in_game_id, training=True)
    print("Reward for game " + str(in_game_id) + " was " + str(out_reward) + ".")
   
    # Increment the game_id
    in_game_id += 1

    # Append the game dataframe to the batch dataframe.
    batch_df = pd.concat([batch_df, out_replay_buffer_df], ignore_index=True)
    
    if batch_counter == batch_size or counter == num_games-1:
        
        # Load the batch to the database.
        fastload(df = batch_df, table_name = replay_buffer_staging_table, if_exists = 'append')
        
        # Empty the batch dataframe.
        batch_df = pd.DataFrame({
            'game_id': [],
            'ply': [],
            'FEN': [],
            'move': [],
            'reward': [],
        })
        
        # Reset the batch counter.
        batch_counter = 0
        end_time = time.time()
        batch_duration = end_time-start_time
        print(str(counter+1) + " of " + str(num_games) + " completed. Last batch duration was " + str(batch_duration) + " with average game duration of " + str(batch_duration/batch_size) + ".")
        start_time = time.time()


Reward for game 80000 was 0.
Reward for game 80001 was 0.
Reward for game 80002 was 0.
Reward for game 80003 was -1.
Reward for game 80004 was 0.
Reward for game 80005 was 0.
Reward for game 80006 was 0.
Reward for game 80007 was 0.
Reward for game 80008 was 1.
Reward for game 80009 was 0.
Reward for game 80010 was 0.
Reward for game 80011 was 0.
Reward for game 80012 was 0.
Reward for game 80013 was 1.
Reward for game 80014 was 1.
Reward for game 80015 was 0.
Reward for game 80016 was 0.
Reward for game 80017 was 0.
Reward for game 80018 was 0.
Reward for game 80019 was -1.
Reward for game 80020 was 0.
Reward for game 80021 was 1.
Reward for game 80022 was 0.
Reward for game 80023 was 0.
Reward for game 80024 was 1.
Reward for game 80025 was 0.
Reward for game 80026 was 0.
Reward for game 80027 was -1.
Reward for game 80028 was 0.
Reward for game 80029 was 0.
Reward for game 80030 was 0.
Reward for game 80031 was 0.
Reward for game 80032 was 0.
Reward for game 80033 was 1.
Reward for 

In [20]:
execute_sql("CALL UpdateProcessBatch()")

TeradataCursor uRowsHandle=4534 bClosed=False

In [21]:
conn.close
remove_context()

True