In [3]:
# Dependencies
import pandas as pd
import seaborn as sns
import numpy as np
import sklearn as sk
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [315]:
# Data
#raw_data = pd.read_csv("data_clean_lite.csv")

In [98]:
# Utility Functions

# Create new game state
def getNewGameState():
    return np.zeros(9)

# Choose Move
## player = (-1|1)
## game state = np.array((1,9))
def getRandomMove(player, game_state):
    
    next_move = np.random.randint(9)
    while not game_state[next_move] == 0:
        next_move = (next_move + 1) % 9
    #for k,v in enumerate(game_state):
        #if game_state[k] == 0:
            #next_move = k
    return next_move

# T|F if move is valid
def validateMove(chosen_move, game_state):
    return game_state[chosen_move] == 0

# Get Updated game state
def getUpdatedGameState(player, chosen_move, game_state):
    new_game_state = np.copy(game_state)
    new_game_state[chosen_move] = player
    return new_game_state

# Get Winning Player
## -1 => player -1
## 1 => player 1
## 0 => tie
## None => game is not over
def getWinner(game_state):
    winner = 0
    for k,v in enumerate(game_state):
        if game_state[k] == 0:
            #board is not full, so is not a tie
            winner = None
            break
        
    winning_rows = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]
    
    for w in winning_rows:
        if (np.sum(game_state[w]) == -3):
            winner = -1
        elif (np.sum(game_state[w]) == 3):
            winner = 1
    
    return winner
        
def prettyGameStates(game_states):
    pretty_states = np.reshape(game_states, (len(game_states),3,3))
    return pretty_states

def decayScores(n,p=1):

    scores = np.linspace(0.5,1,len(n))*p
    scores[len(scores)-1] += 3*p 
    return scores

def mirrorStates(game_states):
    num_states = len(game_states)
    new_states = np.copy(game_states)
    #Horizontal reflection
    m_horiz = np.reshape(np.flip(prettyGameStates(game_states),axis=2),(num_states,9))
    new_states = np.vstack([new_states, m_horiz])
    #Flipped 90 degree rotation
    new_states = np.vstack([new_states, np.reshape(np.transpose(prettyGameStates(game_states),axes=(0,2,1)),(num_states,9))])
    #90 degree rotation
    new_states = np.vstack([new_states, np.reshape(np.transpose(np.flip(prettyGameStates(game_states),axis=1),axes=(0,2,1)),(num_states,9))])
    #180 degree rotation
    new_states = np.vstack([new_states, np.flip(game_states,axis=1)])
    #Flipped 180 degree rotation
    new_states = np.vstack([new_states, np.reshape(np.flip(prettyGameStates(game_states),axis=1),(num_states,9))])
    #Flipped 270 degree rotation
    new_states = np.vstack([new_states, np.reshape(np.transpose(prettyGameStates(game_states),axes=(0,2,1)),(num_states,9))])
    #270 degree rotation
    new_states = np.vstack([new_states, np.reshape(np.transpose(prettyGameStates(m_horiz),axes=(0,2,1)),(num_states,9))])

    return new_states

def augmentData(game_states, scores):
    #Create 7 mirror image games
    mirrored_scores = np.tile(scores,8)
    mirrored_states = mirrorStates(game_states)

    #Create inverse games
    #inverse_mirrored_states = mirrored_states * -1
    #inverse_scores =  (mirrored_scores * -1) + 1

    #new_game_states = np.vstack([mirrored_states, inverse_mirrored_states])
    #new_scores = np.append(mirrored_scores, inverse_scores)
    new_game_states = mirrored_states
    new_scores = mirrored_scores
    
    return (new_game_states, new_scores)

In [6]:
# Play a game

def playGame(policy1=getRandomMove, policy2=getRandomMove):
    if policy1 is None:
        policy1 = getRandomMove
    if policy2 is None:
        policy2 = getRandomMove
    policies = [policy1, policy2]
    current_player = -1 #(-1|1)
    # Start the game
    game_state = getNewGameState()
    # Create a log of game states
    game_states = np.zeros((0,9))

    turn_num = 0
    winner = None
    while turn_num < 10 and winner is None:
        #Switch player
        current_player = current_player * -1
        #Pick a move
        next_move = policies[max(current_player* -1,0)](current_player, game_state)
        #Make the move
        game_state = getUpdatedGameState(current_player, next_move, game_state)
        #Log the move
        game_states = np.vstack([game_states, game_state])
        #Update turn count
        turn_num += 1
        #Check for a winner
        winner = getWinner(game_state)
    
    turn_num = turn_num-1
    #print("Player %s won in %s moves!" % (winner, turn_num))
    return (game_states, winner, turn_num)

In [7]:
def makePolicy(model=None,verbose=False):
    if model is None:
        return None
    
    def policy(current_player, game_state):
        if current_player == -1:
            game_state = game_state * -1
        available_moves = np.where(game_state == 0)[0]
        max_val = -99999
        max_val_move = -1
        for i,next_move in enumerate(available_moves):
            next_state = np.copy(game_state)
            next_state[next_move] = 1
            move_val = model.predict(np.array([next_state]))[0]
            if verbose:
                print((next_move,move_val))
            if max_val < move_val:
                max_val = move_val
                max_val_move = next_move
            
        return max_val_move
            
    return policy

In [225]:
def trainModel(game_states, scores, reg="l1", layers=[18,9,3]):
    X_train = game_states
    y_train = scores
    
    # Create neural net object
    model = keras.Sequential()
    if reg == "l1_l2":
        # Adds a densely-connected layer with 18 units to the model:
        model.add(layers.Dense(18, activation='relu', kernel_regularizer=tf.keras.regularizers.l1_l2(0.0001)))
        # Add another (9):
        model.add(layers.Dense(9, activation='relu', kernel_regularizer=tf.keras.regularizers.l1_l2(0.0001)))
        # Add another (3):
        model.add(layers.Dense(3, activation='relu', kernel_regularizer=tf.keras.regularizers.l1_l2(0.0001)))
    elif reg == "l2":
        # Adds a densely-connected layer with 18 units to the model:
        model.add(layers.Dense(18, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
        # Add another (9):
        model.add(layers.Dense(9, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
        # Add another (3):
        model.add(layers.Dense(3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
    else:
        # Adds a densely-connected layer with 18 units to the model:
        model.add(layers.Dense(18, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0001)))
        # Add another (9):
        model.add(layers.Dense(9, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0001)))
        # Add another (3):
        model.add(layers.Dense(3, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0001)))
    # Add a softmax layer with 1 unit:
    model.add(layers.Dense(1, activation='tanh')) #tanh
    
    model.compile(optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999),
              loss='mse',
              metrics=['mae'])
    #regr = MLPRegressor(solver='adam', early_stopping=True, #activation='logistic',
                        #hidden_layer_sizes=(18, 9,3), random_state=42, max_iter=400)

    # Train the model using the training sets
    #regr.fit(X_train, y_train)
    model.fit(X_train, y_train, epochs=800, batch_size=128, verbose=False)

    # Make predictions using the testing set
    #y_pred = model.predict(X_train, batch_size=32)

    # The coefficients
    #print('Coefficients: \n', regr.coef_)
    # The mean squared error
    #print("Mean squared error: %.2f"
          #% mean_squared_error(y_train, y_pred))
    # Explained variance score: 1 is perfect prediction
    #print('Variance score: %.2f' % r2_score(y_train, y_pred))
    #print(y_pred)
    return (model, 0.5) #mean_squared_error(y_train, y_pred))

In [119]:
def doTrials(num_trials=10, model=None):
    all_game_states = np.zeros((0,9))
    all_scores = []
    policy = makePolicy(model)
    randPolicy = makePolicy()
    for i in range(0,num_trials):
        #Play game
        game_states, winner, turn_num = playGame(policy,randPolicy)
        if winner == 0:
            #Skip ties
            continue
        #Normalize winner to 1
        game_states = game_states*winner
        winner = 1
        #Just winner's turns
        winner_states = game_states[::-2][::-1]
        #Smooth reward backwards over time
        winner_scores = decayScores(winner_states, 1)
        #Loser's turns
        loser_states = game_states[-2::-2][::-1]*-1
        loser_scores = decayScores(loser_states, -1)
        
        game_states = np.vstack([winner_states, loser_states])
        scores = np.append(winner_scores, loser_scores)
        #Augment with reflections and inverses
        game_states, scores = augmentData(game_states, scores)
        #Add to data
        all_game_states = np.vstack([all_game_states, game_states])
        all_scores = np.append(all_scores, scores)

    for i in range(0,num_trials):
        #Play game
        game_states, winner, turn_num = playGame(policy,policy)
        if winner == 0:
            #Skip ties
            continue
        #Normalize winner to 1
        game_states = game_states*winner
        winner = 1
        #Just winner's turns
        winner_states = game_states[::-2][::-1]
        #Smooth reward backwards over time
        winner_scores = decayScores(winner_states, 1)
        #Loser's turns
        loser_states = game_states[-2::-2][::-1]*-1
        loser_scores = decayScores(loser_states, -1)
        
        game_states = np.vstack([winner_states, loser_states])
        scores = np.append(winner_scores, loser_scores)
        #Augment with reflections and inverses
        game_states, scores = augmentData(game_states, scores)
        #Add to data
        all_game_states = np.vstack([all_game_states, game_states])
        all_scores = np.append(all_scores, scores)
    
    regr, mse = trainModel(all_game_states, all_scores)
    return regr, mse

In [73]:
def getGameData(num_trials=10, model1=None, model2=None):
    all_game_states = np.zeros((0,9))
    all_scores = []
    policy1 = makePolicy(model1)
    policy2 = makePolicy(model2)
    for i in range(0,int(num_trials/2)):
        #Play game
        game_states, winner, turn_num = playGame(policy1,policy2)
        if winner == 0:
            #Skip ties
            continue
        #Normalize winner to 1
        game_states = game_states*winner
        winner = 1
        #Just winner's turns
        winner_states = game_states[::-2][::-1]
        #Smooth reward backwards over time
        winner_scores = decayScores(winner_states, 1)
        #Loser's turns
        loser_states = game_states[-2::-2][::-1]*-1
        loser_scores = decayScores(loser_states, -1)
        
        game_states = np.vstack([winner_states, loser_states])
        scores = np.append(winner_scores, loser_scores)
        #Augment with reflections and inverses
        game_states, scores = augmentData(game_states, scores)
        #Add to data
        all_game_states = np.vstack([all_game_states, game_states])
        all_scores = np.append(all_scores, scores)

    for i in range(0,int(num_trials/2)):
        #Play game
        game_states, winner, turn_num = playGame(policy2,policy1)
        if winner == 0:
            #Skip ties
            continue
        #Normalize winner to 1
        game_states = game_states*winner
        winner = 1
        #Just winner's turns
        winner_states = game_states[::-2][::-1]
        #Smooth reward backwards over time
        winner_scores = decayScores(winner_states, 1)
        #Loser's turns
        loser_states = game_states[-2::-2][::-1]*-1
        loser_scores = decayScores(loser_states, -1)
        
        game_states = np.vstack([winner_states, loser_states])
        scores = np.append(winner_scores, loser_scores)
        #Augment with reflections and inverses
        game_states, scores = augmentData(game_states, scores)
        #Add to data
        all_game_states = np.vstack([all_game_states, game_states])
        all_scores = np.append(all_scores, scores)
    
    return (all_game_states, all_scores)

In [182]:
def evaluateModel(num_trials=100, model1=None, model2=None, verbose=False):
    winners = []
    policy1 = makePolicy(model1)
    policy2 = makePolicy(model2)

    for i in range(0,int(num_trials/2)):
        #Play game
        game_states, winner, turn_num = playGame(policy1, policy2)
        if winner == -1:
            winner = 0
        elif winner == 0:
            winner = 1
        winners.append(winner)
    
    for i in range(0,int(num_trials/2)):
        #Play game
        game_states, winner, turn_num = playGame(policy2, policy1)
        if winner == 1:
            winner = 0
        elif winner == 0:
            winner = -1
        winners.append(winner * -1)
    
    if verbose:
        print(winners)
    result = np.mean(winners)
    return result

In [87]:
#Warm up with random vs random
all_game_states, all_scores = getGameData(100)
# Train an early model
model1, mse = trainModel(all_game_states, all_scores)
print(evaluateModel(20,model1=model1))
# Make more data
new_game_states, new_scores = getGameData(50,model1)
# Stack
all_game_states = np.vstack([all_game_states, new_game_states])
all_scores = np.append(all_scores, new_scores)
# Retrain
model2, mse = trainModel(all_game_states, all_scores)
print(evaluateModel(20,model1=model2))

0.74
0.72


In [122]:
import time

def print_time(time_start,msg=""):
    time_diff = time.time()-time_start
    print(f"(L) {time_diff} {msg}")
    return time.time()

time_start = time.time()
#Warm up with random vs random
all_game_states, all_scores = getGameData(50)
time_start = print_time(time_start, "Play 50 games")
# Train an early model
model1, mse = trainModel(all_game_states, all_scores, reg="l1_l2")
time_start = print_time(time_start, "Train on 50 games")
print(evaluateModel(50,model1=model1))
time_start = print_time(time_start, "Evaluate on 50 games")
# Make more data
new_game_states, new_scores = getGameData(50,model1)
time_start = print_time(time_start, "Play 50 more games")
# Stack
all_game_states = np.vstack([all_game_states, new_game_states])
all_scores = np.append(all_scores, new_scores)
time_start = time.time()
# Retrain
model2, mse = trainModel(all_game_states, all_scores, reg="l1_l2")
time_start = print_time(time_start, "Train on 100 games")
print(evaluateModel(50,model1=model2))
time_start = print_time(time_start, "Evaluate on 50 games")

(L) 0.1334228515625 Play 50 games
(L) 48.307918548583984 Train on 50 games
0.82
(L) 4.404000282287598 Evaluate on 50 games
(L) 4.103925943374634 Play 50 more games
(L) 105.73544454574585 Train on 100 games
0.96
(L) 4.506910800933838 Evaluate on 50 games


In [195]:
time_start = time.time()
# Make more data
new_game_states, new_scores = getGameData(50,model2)
time_start = print_time(time_start, "Play 50 more games")
# Stack
all_game_states = np.vstack([all_game_states, new_game_states])
all_scores = np.append(all_scores, new_scores)
time_start = time.time()
# Retrain
model3, mse = trainModel(all_game_states, all_scores, reg="l1_l2")
time_start = print_time(time_start, "Train on 150 games")
print(evaluateModel(100,model1=model3))
time_start = print_time(time_start, "Evaluate on 100 games")

(L) 4.240751028060913 Play 50 more games
(L) 148.84031915664673 Train on 150 games
0.94
(L) 8.706801414489746 Evaluate on 100 games


In [226]:
# Hyper parameter testing
time_start = time.time()
# Retrain
model_test, mse = trainModel(all_game_states, all_scores)
time_start = print_time(time_start, "Train L1")
print(evaluateModel(100,model1=model_test))
time_start = print_time(time_start, "Evaluate L1 on 100 games")

time_start = time.time()
# Retrain
model_test, mse = trainModel(all_game_states, all_scores, reg="l2")
time_start = print_time(time_start, "Train L2")
print(evaluateModel(100,model1=model_test))
time_start = print_time(time_start, "Evaluate L2 on 100 games")

time_start = time.time()
# Retrain
model_test, mse = trainModel(all_game_states, all_scores, reg="l1_l2")
time_start = print_time(time_start, "Train ElasticNet")
print(evaluateModel(100,model1=model_test))
time_start = print_time(time_start, "Evaluate ElasticNet on 100 games")

(L) 162.75248312950134 Train L1
0.93
(L) 9.203482389450073 Evaluate L1 on 100 games
(L) 177.13553428649902 Train L2
0.9
(L) 9.132536888122559 Evaluate L2 on 100 games
(L) 163.85113143920898 Train ElasticNet
0.99
(L) 9.27957797050476 Evaluate ElasticNet on 100 games


In [274]:
print(evaluateModel(1000,model_test))

0.993


In [296]:
#Start game
my_game_state = getNewGameState()
# Create a log of game states
my_game_states = np.zeros((0,9))
#Train a model
#regr, mse = doTrials()
policy = makePolicy(model_test, verbose=True)

In [304]:
#Opponent Move
opponent_move = policy(-1, my_game_state)
my_game_state[opponent_move] = -1
my_game_states = np.vstack([my_game_states, my_game_state])
prettyGameStates(np.array([my_game_state]))

(1, array([-0.83492184], dtype=float32))
(2, array([-0.9973592], dtype=float32))


array([[[ 1., -1.,  0.],
        [-1.,  1.,  1.],
        [-1.,  1., -1.]]])

In [303]:
#My Move
my_move = 7
my_game_state[my_move] = 1
my_game_states = np.vstack([my_game_states, my_game_state])
prettyGameStates(np.array([my_game_state]))

array([[[ 1.,  0.,  0.],
        [-1.,  1.,  1.],
        [-1.,  1., -1.]]])