In [1]:
from keras.layers import Input, Dense, Activation, Conv2D, Flatten,Concatenate, Dropout
from keras.models import Model
from keras.models import Sequential
from keras.optimizers import Adam
import copy
import numpy as np
import pandas as pd
from scipy import signal
%run defs_ttt.ipynb

Using TensorFlow backend.


We now define the model we will use.  It has 2 parts, a policy vector which gives probabilities for each move and an evaluation which is the expected value (-1,1) for player 1 in the given position.

In [2]:
np.random.seed(1)

board = Input(shape = (3,3,1),name = 'board')

F1 = Flatten()(board)
x = Dense(30, activation='relu')(F1)
x = Dropout(.2)(x)
x = Dense(30, activation='relu')(x)
x = Dropout(.2)(x)
x = Dense(30, activation='relu')(x)
x = Dropout(.2)(x)
x = Dense(9, activation='softmax')(x)

y = Dense(15, activation='relu')(F1)
y = Dropout(.2)(y)
y = Dense(15, activation='relu')(y)
y = Dropout(.2)(y)
y = Dense(15, activation='relu')(y)
y = Dropout(.2)(y)
y = Dense(1, activation='tanh')(y)

model = Model(inputs =[board],outputs = [x,y])

optimizer = Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-8)

model.compile(optimizer=optimizer, loss=['binary_crossentropy','mean_squared_error'],loss_weights=[1,10])


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Next we simulate games using the policy and evaluation from the model.  At each step we use a Monte Carlo search tree from the given position.  The search tree takes in the policy weighted by how often that move has been made for each move as well as the current score along that branch.  These are combined in a linear fashion and chooses the highest move.  It then makes that move.  If the move has already been made it repeats the above step until it reaches a leaf node.  It then evaluates the position on the leave node using the model and backs that up through the tree.  It repeats this process 50-3 x move_number times.  Then makes a move randomly with probability equal to the percentage of times each move was made.  Finally, the game is transformed using all elements of the dihedral four group (3 rotations and 4 reflection + rotations) and the model is trained.  Finally, this process is repeated multiple times with a testing against random play after each train.

In [3]:
test_scores = np.zeros((3,12))
total_games = np.zeros((1,12))
test_scores[:,0] = testing(model).flatten()
for n0 in range(11):
    b_size = 1+n0//2
    total_games[0,n0+1] = total_games[0,n0]+b_size
    for n1 in range(b_size):
        X_train1 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train2 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train3 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train4 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train5 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train6 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train7 = np.zeros((b_size*10,3,3,1),dtype = int)
        X_train8 = np.zeros((b_size*10,3,3,1),dtype = int)
        y_results = np.zeros((1,b_size*10))
        y_moves = np.zeros((b_size*10,9))
        l = 0

        for n1 in range(b_size):
            df = pd.DataFrame({'path':[[0]*9],'N': [np.zeros((1,9),dtype = int)], 'W': [np.zeros((1,9))],'player':0,'roll':[1]})

            X=np.zeros((3,3,2),dtype = 'int')

            X_game = np.zeros((10,3,3,2),dtype = int)
            X_move = np.zeros((9,10))
            result = np.zeros((1,10))


            turn = -1
            winner = 0
            while winner == 0:
                X_temp = copy.deepcopy(X)
                turn+=1
                dF = rollout2(X,model,df,num=50-3*np.sum(X))

                X_game[turn,:,:,:] = X
                dfN = dF[dF.apply(lambda row: list(row.path)==[0]*9,axis=1)].N.iloc[0]
                dfW = dF[dF.apply(lambda row: list(row.path)==[0]*9,axis=1)].W.iloc[0]
                X_move[:,turn] = dfN/np.sum(dfN)

                result[:,turn] = np.sum(dfW)/np.sum(dfN)
                pos = np.zeros((1,9),dtype='int')

                temp = np.random.choice(9, 1, p=(dfN/np.sum(dfN)).ravel())[0]
                pos[0,temp] +=1

                X[:,:,np.sum(X[:,:,0]-X[:,:,1])%2] += pos.reshape((3,3))
                winner = win_check(X,winner)
                if winner == 0:
                    pp = np.zeros((9),dtype='int')
                    pp[np.sum(X)-1] = temp+1

                    dF.path = dF.path - pp

                    df = dF[dF.apply(lambda row: row.path[np.sum(X)-1]==0,axis=1)]
                else:
                    pass

            X_train1[l:l+turn+2,:,:,:] = (X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1]).reshape((turn+2,3,3,1))
            X_train2[l:l+turn+2,:,:,:] = (np.flip(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axis = 1)).reshape((turn+2,3,3,1))
            X_train3[l:l+turn+2,:,:,:] = (np.flip(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axis = 2)).reshape((turn+2,3,3,1))
            X_train4[l:l+turn+2,:,:,:] = (np.flip(np.flip(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axis = 2),axis=1)).reshape((turn+2,3,3,1))
            X_train5[l:l+turn+2,:,:,:] = (np.rot90(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axes = (1,2),k=1)).reshape((turn+2,3,3,1))
            X_train6[l:l+turn+2,:,:,:] = (np.rot90(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axes = (1,2),k=-1)).reshape((turn+2,3,3,1))
            X_train7[l:l+turn+2,:,:,:] = (np.rot90(np.flip(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axis = 2),axes=(1,2),k=1)).reshape((turn+2,3,3,1))
            X_train8[l:l+turn+2,:,:,:] = (np.rot90(np.flip(X_game[0:turn+2,:,:,0]-X_game[0:turn+2,:,:,1],axis = 2),axes=(1,2),k=-1)).reshape((turn+2,3,3,1))



            y_results[:,l:l+turn+2] = result[:,0:turn+2]
            y_moves[l:l+turn+2,:] = X_move[:,0:turn+2].T
            l = l+turn+2
    
    X_train = np.concatenate((X_train1[0:l,:].reshape((l,3,3,1)),X_train2[0:l,:].reshape((l,3,3,1)),
                              X_train3[0:l,:].reshape((l,3,3,1)),X_train4[0:l,:].reshape((l,3,3,1)),
                              X_train5[0:l,:].reshape((l,3,3,1)),X_train6[0:l,:].reshape((l,3,3,1)),
                              X_train7[0:l,:].reshape((l,3,3,1)),X_train8[0:l,:].reshape((l,3,3,1))))
    y_moves1 = y_moves[0:l,:]
    y_moves2 = np.flip(y_moves[0:l,:].reshape(l,3,3),axis=1).reshape((l,9))
    y_moves3 = np.flip(y_moves[0:l,:].reshape(l,3,3),axis=2).reshape((l,9))
    y_moves4 = np.flip(np.flip(y_moves[0:l,:].reshape(l,3,3),axis=2),axis=1).reshape((l,9))
    y_moves5 = np.rot90(y_moves[0:l,:].reshape(l,3,3),axes = (1,2),k=1).reshape((l,9))
    y_moves6 = np.rot90(y_moves[0:l,:].reshape(l,3,3),axes = (1,2),k=1).reshape((l,9))
    y_moves7 = np.rot90(np.flip(y_moves[0:l,:].reshape(l,3,3),axis=2),axes=(1,2),k=1).reshape((l,9))
    y_moves8 = np.rot90(np.flip(y_moves[0:l,:].reshape(l,3,3),axis=2),axes=(1,2),k=-1).reshape((l,9))
    Y_moves = np.concatenate((y_moves1,y_moves2,y_moves3,y_moves4,
                             y_moves5,y_moves6,y_moves7,y_moves8))
    Y_results = np.concatenate((y_results[:,0:l].T,y_results[:,0:l].T,y_results[:,0:l].T,y_results[:,0:l].T,
                               y_results[:,0:l].T,y_results[:,0:l].T,y_results[:,0:l].T,y_results[:,0:l].T))
    
    model.fit(X_train, [Y_moves, Y_results],epochs=200,verbose=0)
    print(testing(model))
    test_scores[:,n0+1] = testing(model).flatten()

Instructions for updating:
Use tf.cast instead.
[[0.587]
 [0.364]
 [0.049]]
[[0.576]
 [0.352]
 [0.072]]
[[0.682]
 [0.219]
 [0.099]]
[[0.575]
 [0.227]
 [0.198]]
[[0.461]
 [0.272]
 [0.267]]
[[0.73 ]
 [0.193]
 [0.077]]
[[0.799]
 [0.131]
 [0.07 ]]
[[0.718]
 [0.19 ]
 [0.092]]
[[0.719]
 [0.164]
 [0.117]]
[[0.692]
 [0.204]
 [0.104]]
[[0.777]
 [0.167]
 [0.056]]


In [None]:
np.savetxt("test_scores.csv", test_scores, delimiter=",")
np.savetxt("total_games.csv", total_games, delimiter=",")


In [None]:
Y = np.loadtxt("test_scores.csv",delimiter=",", dtype='str').astype(np.float)
X = np.loadtxt("total_games.csv",delimiter=",", dtype='str').astype(np.float)

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.plot(X,Y[0,:],label = 'win')
ax.plot(X,Y[1,:],label = 'lose')
ax.plot(X,Y[2,:],label = 'draw')
ax.set_xlabel('games played')
ax.set_ylabel('percentage')
ax.set_title('results against random play')
ax.legend()
plt.show()

In [4]:
model.save('ttt_model')