In [None]:
import tflearn
import tensorflow as tf

import pandas as pd
from sklearn import preprocessing
import chess_game as cg
import numpy as np
import time


In [None]:
#setup for NN
#if loading saved NN, must setup network same as before
#current method of loading save only loads the values of the variables; everything has to be set up before that

board = tflearn.layers.core.input_data(shape=(None, 8, 8, 8), name='board_input')
aux = tflearn.layers.core.input_data(shape=(None, 6), name='aux_input')
Y = tflearn.layers.core.input_data(shape=(None, 3), name='labels')

conv4 = tflearn.conv_2d(board, 64, [4,4], activation='relu')
conv4 = tflearn.conv_2d(conv4, 128, [4,4], activation='relu')

conv3 = tflearn.conv_2d(board, 64, [3,3], activation='relu')
conv3 = tflearn.conv_2d(board, 128, [4,4], activation='relu')

conv2 = tflearn.conv_2d(board, 64, [2,2], activation='relu')
conv2_4 = tflearn.conv_2d(conv2, 128, [4,4], activation='relu')

ranks = tflearn.conv_2d(board, 64, [1,8], activation='relu')
files = tflearn.conv_2d(board, 64, [8,1], activation='relu')


# note: this is how to get separate branches of a larger NN and then have them converge
fc1 = tflearn.layers.core.fully_connected(tf.concat([conv4, conv3, conv2_4, ranks, files], 3), n_units=2000, activation='relu')

fc2 = tflearn.layers.core.fully_connected(tf.concat([fc1, aux], 1), n_units=1000, activation='relu')

fc3 = tflearn.layers.core.fully_connected(fc2, n_units=300, activation='relu')

net = tflearn.layers.core.fully_connected(fc3, n_units=3, activation='softmax')

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=net, labels=Y))
percent_correct = tf.metrics.accuracy(Y, net)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-5).minimize(loss)

init = tf.global_variables_initializer()

In [None]:
sess = tf.Session()
sess.run(init)

In [None]:
n_rounds = 1000

num_games = 100
max_moves = 30

n_epochs = 5
n_batches = 101
n_batch = 100

# playing games loop

for ii in range(n_rounds):
    master_log = np.empty((0,3))
    for j in range(num_games):
        print(j)
        g = cg.saved_game()
        for k in range(max_moves):

            move_inputs = g.get_next_NN_inputs(dothreats=False)

            batch_board = np.array([ele[0] for ele in move_inputs])
            batch_aux = np.array([ele[1] for ele in move_inputs])

            evaluation = sess.run(net, feed_dict={board: batch_board, 
                                                  aux: batch_aux})

            ind = 0 if g.current_position().white_tomove else 2
            chosen_movenum = np.argmax(evaluation[:, ind])

            g.make_move(chosen_movenum, dothreats=False)

    g.finish_game() #calculates winner based on material if no checkmate

    master_log = np.append(master_log, g.log, axis=0)

    print("Training...")
    for epoch in range(n_epochs):
        for i in range(n_batches):
            boards_selected = master_log[np.random.choice(range(master_log.shape[0]), n_batch), :]
            batch_board = np.stack(boards_selected[:, 0])
            batch_aux = np.stack(boards_selected[:, 1])
            batch_ys = np.stack(boards_selected[:, 2])

            if i % 20 == 0:
                cost = sess.run(loss, feed_dict={board: batch_board, 
                                              aux: batch_aux, 
                                              Y: batch_ys})
                #acc = sess.run(percent_correct,feed_dict={board: batch_board, 
                #                              aux: batch_aux, 
                #                              wthreats: batch_wthreats, 
                #                              bthreats: batch_bthreats,
                #                              Y: batch_ys})
                print("Epoch:", '%03d' % (epoch + 1), "Step:", '%03d' % i,
                      "Loss:", str(cost))#, "accuracy: ", str(acc))

            sess.run(optimizer, feed_dict={board: batch_board, 
                                          aux: batch_aux, 
                                          Y: batch_ys})
        
saver = tf.train.Saver()
saver.save(sess, 'trained_NN_no_threats')
    

In [None]:
np.stack(batch_board).shape

In [None]:
batch_aux.shape

In [None]:
# training loop
for j in range(100):
    all_games = np.empty((0,4))

    print('Playing games...')
    for gamenum in range(0, 5):
        print(gamenum)
        a = cg.game_board()
        b = a.copy()
        board_positions_ar = np.empty((0, 8, 8, 10))
        aux_ar = np.empty((0,6))
        move_info = np.empty((0,2))
        score = None
        for i in range(50): #do first 30 moves for each side
            #startingtime  = time.time()
            a = get_next_board(a, sess)
            color = 'W' if a.white_tomove else 'B'
            nextboard_inputs = a.get_NN_inputs(color)
            board_positions_ar = np.append(board_positions_ar, [nextboard_inputs[0]], axis=0)
            move_info = np.append(move_info, np.array([[gamenum, i]]), axis=0)
            aux_ar = np.append(aux_ar, [nextboard_inputs[1]], axis=0)
            game_end = a.game_over()
            
            #print(time.time() - startingtime)

            if game_end[0]:
                score = game_end[0]
                break

        if score:
            score = int((-2)*score + 2)
        else:
            material = a.to_csv_format()[-2:]
            if material[0] > material[1]:
                score = 0
            elif material[1] > material[0]:
                score = 2
            else:
                score = 1
        print(score)
        score_onehot = np.zeros((board_positions_ar.shape[0],3))
        score_onehot[:,score] = move_info[:,1]/move_info[:,1].size
        score_onehot[1::2, :] = score_onehot[1::2, ::-1]
        print(score_onehot)

        game_array = np.array([board_positions_ar, 
                               aux_ar, 
                               score_onehot, 
                               move_info, np.array([])])[:4] #adding empty array then cutting it off
                                                             # is done because otherwise numpy errors out
        xp = np.array([[game_array[j][i] for j in range(4)] for i in range(i)])

        all_games = np.append(all_games, xp, axis=0)

    print("Training...")
    for epoch in range(1):  # 1 epochs
        for i in range(61):
            boards_selected = all_games[np.random.choice(range(all_games.shape[0]), 100), :]
            batch_board = np.stack(boards_selected[:, 0])
            batch_aux = np.stack(boards_selected[:, 1])
            batch_ys = np.stack(boards_selected[:, 2])
            
            if i % 20 == 0:
                cost = sess.run(loss, feed_dict={board: batch_board, 
                                              aux: batch_aux, 
                                              Y: batch_ys})
                #acc = sess.run(percent_correct,feed_dict={board: batch_board, 
                #                              aux: batch_aux, 
                #                              wthreats: batch_wthreats, 
                #                              bthreats: batch_bthreats,
                #                              Y: batch_ys})
                print("Epoch:", '%03d' % (epoch + 1), "Step:", '%03d' % i,
                      "Loss:", str(cost))#, "accuracy: ", str(acc))
                
            sess.run(optimizer, feed_dict={board: batch_board, 
                                          aux: batch_aux, 
                                          Y: batch_ys})
     

In [None]:
def get_next_board(game_board, sess):
    #t = time.time()
    next_boards = game_board.find_all_next_board_positions()
    color = 'W' if game_board.white_tomove else 'B'
    #t = time.time()
    next_inputs = [ele.get_NN_inputs(color) for ele in next_boards]
    
    batch_board = np.array([ele[0] for ele in next_inputs])
    batch_aux = np.array([ele[1] for ele in next_inputs])
    #print(time.time()-t)
    evaluation = sess.run(net, feed_dict={board: batch_board, 
                                          aux: batch_aux})
    #chosen_movenum = np.argmax(evaluation, 0)[-2*(int(a.white_tomove) - 1)]
    #chosen_movenum = np.random.choice(np.argsort(evaluation[:, -2*(int(a.white_tomove) - 1)])[:2])
    chosen_movenum = np.argsort(evaluation[:, -2*(int(a.white_tomove) - 1)])[0]
    return next_boards[chosen_movenum]

In [None]:
saver = tf.train.Saver()
saver.save(sess, '/Users/pschale/pythonstuff/chess_ai_project/bigger_saved_CNN')

In [None]:
np.reshape(board_positions_ar[0,:,:], (8,8,8))

In [None]:
def un_onehot(board_ar):
    board = np.empty((8,8),dtype='str')
    board[:, :] = " "
    board[np.logical_and(board_ar[:,:,2]==1, board_ar[:,:,0] == 1)] = "K"
    board[np.logical_and(board_ar[:,:,3]==1, board_ar[:,:,0] == 1)] = "Q"
    board[np.logical_and(board_ar[:,:,4]==1, board_ar[:,:,0] == 1)] = "R"
    board[np.logical_and(board_ar[:,:,5]==1, board_ar[:,:,0] == 1)] = "B"
    board[np.logical_and(board_ar[:,:,6]==1, board_ar[:,:,0] == 1)] = "N"
    board[np.logical_and(board_ar[:,:,7]==1, board_ar[:,:,0] == 1)] = "P"
    
    board[np.logical_and(board_ar[:,:,2]==1, board_ar[:,:,1] == 1)] = "k"
    board[np.logical_and(board_ar[:,:,3]==1, board_ar[:,:,1] == 1)] = "q"
    board[np.logical_and(board_ar[:,:,4]==1, board_ar[:,:,1] == 1)] = "r"
    board[np.logical_and(board_ar[:,:,5]==1, board_ar[:,:,1] == 1)] = "b"
    board[np.logical_and(board_ar[:,:,6]==1, board_ar[:,:,1] == 1)] = "n"
    board[np.logical_and(board_ar[:,:,7]==1, board_ar[:,:,1] == 1)] = "p"
    
    a = cg.game_board()
    a.board = board
    return a

In [None]:
f = un_onehot(board_positions_ar[-1,:,:])
print(f.white_tomove)
print(f.check_check('W'))
print(f)
print(f.to_csv_format())
#f.find_all_next_board_positions()[0].find_threats_from_piece('K', 'W')

In [None]:
def evaluate_board(sess, bp):
    bp_board, bp_aux = bp.get_NN_inputs()
    bp_wthreats, bp_bthreats = bp.count_threats()
    ev = sess.run(net, feed_dict={board: [bp_board], 
                                          aux: [bp_aux], 
                                          wthreats: [bp_wthreats], 
                                          bthreats: [bp_bthreats]})
    print(bp)
    print(ev)

In [None]:
testbs = pd.read_csv('test_board_positions.csv',index_col=None)

In [None]:
gnum = 1
tb = cg.game_board(gametype='saved', csvstr=testbs.as_matrix()[gnum,1:])
evaluate_board(sess, tb)
testbs['winner'][gnum]

In [None]:
print(tb)

In [None]:
evaluate_board(sess, tb)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
a = get_next_board(a, sess)
print(a)

In [None]:
# plays a game and saves the game to a numpy file
# the format it's saved in probably isn't ideal for training
# to avoid getting stuck in dumb loops, this randomly picks from the top 5 moves

all_games = np.empty((0,4))

for gamenum in range(0, 10):
    print(gamenum)
    a = cg.game_board()
    b = a.copy()
    board_positions_ar = np.empty((0, 64, 8))
    aux_ar = np.empty((0,7))
    move_info = np.empty((0,2))
    score = None
    for i in range(30): #do first 10 moves for each side
        next_boards = a.find_all_next_board_positions()
        next_inputs = [ele.get_NN_inputs() for ele in next_boards]
        batch_board = np.array([ele[0] for ele in next_inputs])
        batch_aux = np.array([ele[1] for ele in next_inputs])
        evaluation = sess.run(net, feed_dict={board: batch_board, aux: batch_aux})
        #chosen_movenum = np.argmax(evaluation, 0)[-2*(int(a.white_tomove) - 1)]
        chosen_movenum = np.random.choice(np.argsort(evaluation[:, -2*(int(a.white_tomove) - 1)])[:5])
        a = next_boards[chosen_movenum]

        nextboard_inputs = a.get_NN_inputs()
        board_positions_ar = np.append(board_positions_ar, [nextboard_inputs[0]], axis=0)
        move_info = np.append(move_info, np.array([[gamenum, i]]), axis=0)
        aux_ar = np.append(aux_ar, [nextboard_inputs[1]], axis=0)
        game_end = a.game_over()
        c = b.copy()
        b = a.copy()
        if game_end[0]:
            score = game_end[0]
            break

    if score:
        score = int((-2)*score + 2)
    else:
        material = a.to_csv_format()[-2:]
        if material[0] > material[1]:
            score = 0
        elif material[1] > material[0]:
            score = 2
        else:
            score = 1

    score_onehot = np.zeros((board_positions_ar.shape[0],3))
    score_onehot[:,score] = 1

    game_array = np.array([board_positions_ar, aux_ar, score_onehot, move_info, np.array([])])[:4]
    xp = np.array([[game_array[0][i], game_array[1][i], game_array[2][i], game_array[3][i]] for i in range(i)])

    all_games = np.append(all_games, xp, axis=0)

#print(a)

np.save('ai_games/ai_game_3', all_games)

In [None]:
b.check_check('B')

In [None]:
[print(ele) for ele in c.find_all_next_board_positions()]

In [None]:
c.find_all_legal_moves()

In [None]:
# starting session and loading variables
#saver = tf.train.Saver()

#sess = tf.Session()
#saver.restore(sess, "/Users/pschale/pythonstuff/chess_ai_project/test_saved_CNN")
#saver.restore(sess, "./test_saved_CNN")

In [None]:
all_games = np.empty((0,4))
all_games = np.append(all_games, xp, axis=0)



In [None]:
all_games.shape

In [None]:
for epoch in range(3):  # 2 epochs
    for i in range(1):
        boards_selected = all_games[np.random.choice(range(all_games.shape[0]), 100), :]
        batch_board = np.stack(boards_selected[:, 0])
        batch_aux = np.stack(boards_selected[:, 1])
        batch_ys = np.stack(boards_selected[:, 2])
        t = time.time()
        sess.run(optimizer, feed_dict={board: batch_board, aux: batch_aux, Y: batch_ys})
        cost = sess.run(loss, feed_dict={board: batch_board, aux: batch_aux, Y: batch_ys})
        print(time.time() - t)
        if i % 20 == 0:
            print("Epoch:", '%03d' % (epoch + 1), "Step:", '%03d' % i,
                  "Loss:", str(cost))

In [None]:
np.save('ai_games/ai_game_1', all_games)

In [None]:
all_games.shape