In [None]:
import tflearn
import tensorflow as tf

import pandas as pd
from sklearn import preprocessing
import chess_game as cg
import numpy as np
import time


In [None]:
#setup for NN
#if loading saved NN, must setup network same as before
#current method of loading save only loads the values of the variables; everything has to be set up before that

board = tflearn.layers.core.input_data(shape=(None, 64, 8), name='board_input')
aux = tflearn.layers.core.input_data(shape=(None, 7), name='aux_input')
Y = tflearn.layers.core.input_data(shape=(None, 3), name='labels')
wthreats = tflearn.layers.core.input_data(shape=(None, 8, 8), name='white_threats')
bthreats = tflearn.layers.core.input_data(shape=(None, 8, 8), name='black_threats')


board_input_reshaped = tf.reshape(board, [-1, 8, 8, 8])
w_threats = tf.reshape(wthreats, [-1, 8, 8, 1])
b_threats = tf.reshape(bthreats, [-1, 8, 8, 1])

board_input_with_threats = tf.concat([board_input_reshaped, w_threats, b_threats], 3)

conv4 = tflearn.conv_2d(board_input_with_threats, 64, [4,4], activation='relu')
conv4 = tflearn.conv_2d(conv4, 128, [4,4], activation='relu')

conv3 = tflearn.conv_2d(board_input_with_threats, 64, [3,3], activation='relu')
conv3 = tflearn.conv_2d(board_input_with_threats, 128, [4,4], activation='relu')

conv2 = tflearn.conv_2d(board_input_with_threats, 64, [2,2], activation='relu')
conv2_4 = tflearn.conv_2d(conv2, 128, [4,4], activation='relu')

ranks = tflearn.conv_2d(board_input_reshaped, 64, [1,8], activation='relu')
files = tflearn.conv_2d(board_input_reshaped, 64, [8,1], activation='relu')


# note: this is how to get separate branches of a larger NN and then have them converge
fc1 = tflearn.layers.core.fully_connected(tf.concat([conv4, conv3, conv2_4, ranks, files], 3), n_units=2000, activation='relu')

fc2 = tflearn.layers.core.fully_connected(tf.concat([fc1, aux], 1), n_units=1000, activation='relu')

fc3 = tflearn.layers.core.fully_connected(fc2, n_units=300, activation='relu')

net = tflearn.layers.core.fully_connected(fc3, n_units=3, activation='softmax')

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=net, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(loss)

init = tf.global_variables_initializer()

In [None]:
sess = tf.Session()
sess.run(init)

In [None]:
def get_next_board(game_board, sess):
    next_boards = game_board.find_all_next_board_positions()
    next_inputs = [ele.get_NN_inputs() for ele in next_boards]
    
    threats = [ele.count_threats() for ele in next_boards]
    batch_wthreats = [ele[0] for ele in threats]
    batch_bthreats = [ele[1] for ele in threats]
    
    batch_board = np.array([ele[0] for ele in next_inputs])
    batch_aux = np.array([ele[1] for ele in next_inputs])
    
    evaluation = sess.run(net, feed_dict={board: batch_board, 
                                          aux: batch_aux, 
                                          wthreats: batch_wthreats, 
                                          bthreats: batch_bthreats})
    #chosen_movenum = np.argmax(evaluation, 0)[-2*(int(a.white_tomove) - 1)]
    chosen_movenum = np.random.choice(np.argsort(evaluation[:, -2*(int(a.white_tomove) - 1)])[:2])
    return next_boards[chosen_movenum], batch_wthreats[chosen_movenum], batch_bthreats[chosen_movenum]

In [None]:
# training loop
for j in range(100):
    all_games = np.empty((0,6))

    print('Playing games...')
    for gamenum in range(0, 50):
        print(gamenum)
        a = cg.game_board()
        b = a.copy()
        board_positions_ar = np.empty((0, 64, 8))
        w_threats_ar = np.empty((0,8,8))
        b_threats_ar = np.empty((0,8,8))
        aux_ar = np.empty((0,7))
        move_info = np.empty((0,2))
        score = None
        for i in range(60): #do first 30 moves for each side
            #startingtime  = time.time()
            a, wt, bt = get_next_board(a, sess)

            nextboard_inputs = a.get_NN_inputs()
            board_positions_ar = np.append(board_positions_ar, [nextboard_inputs[0]], axis=0)
            move_info = np.append(move_info, np.array([[gamenum, i]]), axis=0)
            aux_ar = np.append(aux_ar, [nextboard_inputs[1]], axis=0)
            w_threats_ar = np.append(w_threats_ar, [wt], axis=0)
            b_threats_ar = np.append(b_threats_ar, [bt], axis=0)
            game_end = a.game_over()
            
            #print(time.time() - startingtime)

            if game_end[0]:
                score = game_end[0]
                break

        if score:
            score = int((-2)*score + 2)
        else:
            material = a.to_csv_format()[-2:]
            if material[0] > material[1]:
                score = 0
            elif material[1] > material[0]:
                score = 2
            else:
                score = 1

        score_onehot = np.zeros((board_positions_ar.shape[0],3))
        score_onehot[:,score] = move_info[:,0]/move_info[:,0].size

        game_array = np.array([board_positions_ar, 
                               aux_ar, 
                               w_threats_ar,
                               b_threats_ar,
                               score_onehot, 
                               move_info, np.array([])])[:6]
        xp = np.array([[game_array[j][i] for j in range(6)] for i in range(i)])

        all_games = np.append(all_games, xp, axis=0)

    print("Training...")
    for epoch in range(1):  # 1 epochs
        for i in range(61):
            boards_selected = all_games[np.random.choice(range(all_games.shape[0]), 100), :]
            batch_board = np.stack(boards_selected[:, 0])
            batch_aux = np.stack(boards_selected[:, 1])
            batch_wthreats = np.stack(boards_selected[:, 2])
            batch_bthreats = np.stack(boards_selected[:, 3])
            batch_ys = np.stack(boards_selected[:, 4])
            cost = sess.run(loss, feed_dict={board: batch_board, 
                                          aux: batch_aux, 
                                          wthreats: batch_wthreats, 
                                          bthreats: batch_bthreats,
                                          Y: batch_ys})
            if i % 20 == 0:
                print("Epoch:", '%03d' % (epoch + 1), "Step:", '%03d' % i,
                      "Loss:", str(cost))
                
            sess.run(optimizer, feed_dict={board: batch_board, 
                                          aux: batch_aux, 
                                          wthreats: batch_wthreats, 
                                          bthreats: batch_bthreats,
                                          Y: batch_ys})
     

In [None]:
a = cg.game_board()

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
# plays a game and saves the game to a numpy file
# the format it's saved in probably isn't ideal for training
# to avoid getting stuck in dumb loops, this randomly picks from the top 5 moves

all_games = np.empty((0,4))

for gamenum in range(0, 10):
    print(gamenum)
    a = cg.game_board()
    b = a.copy()
    board_positions_ar = np.empty((0, 64, 8))
    aux_ar = np.empty((0,7))
    move_info = np.empty((0,2))
    score = None
    for i in range(30): #do first 10 moves for each side
        next_boards = a.find_all_next_board_positions()
        next_inputs = [ele.get_NN_inputs() for ele in next_boards]
        batch_board = np.array([ele[0] for ele in next_inputs])
        batch_aux = np.array([ele[1] for ele in next_inputs])
        evaluation = sess.run(net, feed_dict={board: batch_board, aux: batch_aux})
        #chosen_movenum = np.argmax(evaluation, 0)[-2*(int(a.white_tomove) - 1)]
        chosen_movenum = np.random.choice(np.argsort(evaluation[:, -2*(int(a.white_tomove) - 1)])[:5])
        a = next_boards[chosen_movenum]

        nextboard_inputs = a.get_NN_inputs()
        board_positions_ar = np.append(board_positions_ar, [nextboard_inputs[0]], axis=0)
        move_info = np.append(move_info, np.array([[gamenum, i]]), axis=0)
        aux_ar = np.append(aux_ar, [nextboard_inputs[1]], axis=0)
        game_end = a.game_over()
        c = b.copy()
        b = a.copy()
        if game_end[0]:
            score = game_end[0]
            break

    if score:
        score = int((-2)*score + 2)
    else:
        material = a.to_csv_format()[-2:]
        if material[0] > material[1]:
            score = 0
        elif material[1] > material[0]:
            score = 2
        else:
            score = 1

    score_onehot = np.zeros((board_positions_ar.shape[0],3))
    score_onehot[:,score] = 1

    game_array = np.array([board_positions_ar, aux_ar, score_onehot, move_info, np.array([])])[:4]
    xp = np.array([[game_array[0][i], game_array[1][i], game_array[2][i], game_array[3][i]] for i in range(i)])

    all_games = np.append(all_games, xp, axis=0)

#print(a)

np.save('ai_games/ai_game_3', all_games)

In [None]:
b.check_check('B')

In [None]:
[print(ele) for ele in c.find_all_next_board_positions()]

In [None]:
c.find_all_legal_moves()

In [None]:
# starting session and loading variables
#saver = tf.train.Saver()

#sess = tf.Session()
#saver.restore(sess, "/Users/pschale/pythonstuff/chess_ai_project/test_saved_CNN")
#saver.restore(sess, "./test_saved_CNN")

In [None]:
all_games = np.empty((0,4))
all_games = np.append(all_games, xp, axis=0)



In [None]:
all_games.shape

In [None]:
for epoch in range(3):  # 2 epochs
    for i in range(1):
        boards_selected = all_games[np.random.choice(range(all_games.shape[0]), 100), :]
        batch_board = np.stack(boards_selected[:, 0])
        batch_aux = np.stack(boards_selected[:, 1])
        batch_ys = np.stack(boards_selected[:, 2])
        t = time.time()
        sess.run(optimizer, feed_dict={board: batch_board, aux: batch_aux, Y: batch_ys})
        cost = sess.run(loss, feed_dict={board: batch_board, aux: batch_aux, Y: batch_ys})
        print(time.time() - t)
        if i % 20 == 0:
            print("Epoch:", '%03d' % (epoch + 1), "Step:", '%03d' % i,
                  "Loss:", str(cost))

In [None]:
np.save('ai_games/ai_game_1', all_games)

In [None]:
all_games.shape