In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import sys
import numpy as np
import copy
from PIL import Image, ImageDraw
from IPython.display import clear_output
import tensorflow as tf
import random
import copy


In [2]:

board = np.zeros((6, 7))

In [3]:
board

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [4]:
def board_add(board, column, color):
    # Error Check
    if len(board) != 6 or any(len(row) != 7 for row in board):
        raise ValueError("Invalid board dimensions")
    if column-1 < 0 or column-1 >= 7:
        raise IndexError("Column index out of range")
    # Color Converter
    if color == 'black':
        disk = 1
    elif color == 'red':
        disk = -1
    else:
        raise ValueError("Invalid color. Use 'black' or 'red'.")
    # Actual board update
    for i in range(5, -1, -1):
        if board[i][column-1] == 0:
            board[i][column-1] = disk
            return board
    
    return board


In [5]:
def board_remove(board, column, current_color):
    # Error Check
    if column-1 < 0 or column-1 >= 7:
        raise IndexError("Column index out of range")
    # Color Converter
    if current_color == 'black':
        disk = 1
    elif current_color == 'red':
        disk = -1
    else:
        raise ValueError("Invalid color. Use 'black' or 'red'.")
    # Check if the bottom cell contains the disk of the current player
    if board[5][column-1] != disk:
        return board

    # Remove the bottom checker and shift remaining checkers down
    for i in range(5, 0, -1): 
        board[i][column-1] = board[i-1][column-1] 
    board[0][column-1] = 0  
    return board


In [6]:
def winner_check(board):
    rows, cols = len(board), len(board[0])

    # Check horizontal locations for win
    for row in range(rows):
        for col in range(cols - 3):  # Only need to check starting points that allow for 4 in a row
            if board[row][col] != 0 and board[row][col] == board[row][col + 1] == board[row][col + 2] == board[row][col + 3]:
                return board[row][col]

    # Check vertical locations for win
    for row in range(rows - 3):  # Similar logic for vertical checks
        for col in range(cols):
            if board[row][col] != 0 and board[row][col] == board[row + 1][col] == board[row + 2][col] == board[row + 3][col]:
                return board[row][col]

    # Check positively sloped diagonals
    for row in range(rows - 3):
        for col in range(cols - 3):
            if board[row][col] != 0 and board[row][col] == board[row + 1][col + 1] == board[row + 2][col + 2] == board[row + 3][col + 3]:
                return board[row][col]

    # Check negatively sloped diagonals
    for row in range(3, rows):  # Start from row 3 to have space for a diagonal
        for col in range(cols - 3):
            if board[row][col] != 0 and board[row][col] == board[row - 1][col + 1] == board[row - 2][col + 2] == board[row - 3][col + 3]:
                return board[row][col]

    return 0  # No winner found


In [7]:
def is_game_over(board):
    # Check if any player has won
    if winner_check(board) != 0:
        return True, 1
    
    # Check for a draw by seeing if any column is not full
    if any(board[0][col] == 0 for col in range(len(board[0]))):
        return False, 0
    
    return True, 0


In [8]:
def display_board(state):
    valid_moves = 14
    
    cell_size = 75

    image_width = 7 * cell_size
    image_height = 6 * cell_size

    image = Image.new('RGB', (image_width, image_height), color='grey')
    draw = ImageDraw.Draw(image)

    for row in range(6):
        for col in range(7):
            x0 = col * cell_size + cell_size // 2
            y0 = row * cell_size + cell_size // 2
            radius = cell_size // 2 - 7
            
            outline = "black"
    

            if state[row, col] == 0:
                draw.ellipse([(x0 - radius, y0 - radius), (x0 + radius, y0 + radius)], fill='white', outline=outline, width=5)
            elif state[row, col] == -1:
                draw.ellipse([(x0 - radius, y0 - radius), (x0 + radius, y0 + radius)], fill='red', outline=outline, width=5)
            elif state[row, col] == 1:
                draw.ellipse([(x0 - radius, y0 - radius), (x0 + radius, y0 + radius)], fill='black', outline=outline, width=5)

    return image

In [9]:

# current_player = 'black'
# game_over = False

# while not game_over:
#     display(display_board(board))
    
#     # Get input from player
#     column = int(input(f"{current_player}'s turn. Enter column to add or remove: "))
#     action = input("Type 'a' to place or 'r' to take out a checker: ")

#     if action == 'a':
#         board = board_add(board, column, current_player)
#     elif action == 'r':
#         board = board_remove(board, column, current_player)
    
   
#     # Check if the game is over
#     game_over, result = is_game_over(board)
#     if result == "Win":
#         print(f"{current_player} wins!")
#     elif result == "Draw":
#         print("The game is a draw.")

#     # Switch player
#     current_player = 'red' if current_player == 'black' else 'black'
#     clear_output()


In [10]:

# Wrapper
def wrapper(output:int):
    if output <=6:
        return 'a',output+1
    else:
        return 'r', output-6

In [11]:
# def board_convert(board):
#    encoded_board = np.stack([board==0,board==1,board==-1]).astype(np.float32)
#    if len(encoded_board) !=3:
#       encoded_board = np.swapaxes(encoded_board,0,1)
#    return encoded_board



def board_convert(board):
    # Stack the arrays along the last dimension instead of the first
    encoded_board = np.stack([board == 0, board == 1, board == -1], axis=-1).astype(np.float32)
    return encoded_board


In [12]:
board_convert(board)

array([[[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]],

       [[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]],

       [[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]],

       [[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]],

       [[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]],

       [[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]]], dtype=float32)

In [13]:
import tensorflow as tf

In [14]:
inp = tf.keras.layers.Input(shape=(6,7,3))
lyr1 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(inp)
lyr2 = tf.keras.layers.BatchNormalization()(lyr1)
lyr3 = tf.keras.layers.ReLU()(lyr2)

# ResNet
lyr4 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation=None)(lyr3)
lyr5 = tf.keras.layers.BatchNormalization()(lyr4)
lyr6 = tf.keras.layers.ReLU()(lyr5)
lyr7 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(lyr6)
lyr8 = tf.keras.layers.BatchNormalization()(lyr7)
lyr9 = lyr8+lyr3
lyr10 = tf.keras.layers.ReLU()(lyr9)


# ResNet
lyr11 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation=None)(lyr10)
lyr12 = tf.keras.layers.BatchNormalization()(lyr11)
lyr13 = tf.keras.layers.ReLU()(lyr12)
lyr14 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(lyr13)
lyr15 = tf.keras.layers.BatchNormalization()(lyr14)
lyr16 = lyr10+lyr15
lyr17 = tf.keras.layers.ReLU()(lyr16)


# ResNet
lyr18 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation=None)(lyr17)
lyr19 = tf.keras.layers.BatchNormalization()(lyr18)
lyr20 = tf.keras.layers.ReLU()(lyr19)
lyr21 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(lyr20)
lyr22 = tf.keras.layers.BatchNormalization()(lyr21)
lyr23 = lyr17+lyr22
lyr24 = tf.keras.layers.ReLU()(lyr23)


# ResNet
lyr25 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation=None)(lyr24)
lyr26 = tf.keras.layers.BatchNormalization()(lyr25)
lyr27 = tf.keras.layers.ReLU()(lyr26)
lyr28 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(lyr27)
lyr29 = tf.keras.layers.BatchNormalization()(lyr28)
lyr30 = lyr29+lyr24
lyr31 = tf.keras.layers.ReLU()(lyr30)


# ResNet
lyr32 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation=None)(lyr31)
lyr33 = tf.keras.layers.BatchNormalization()(lyr32)
lyr34 = tf.keras.layers.ReLU()(lyr33)
lyr35 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', strides=1, activation= None)(lyr34)
lyr36 = tf.keras.layers.BatchNormalization()(lyr35)
lyr37 = lyr31+lyr36
lyr38 = tf.keras.layers.ReLU()(lyr37)


flatten = tf.keras.layers.Flatten()(lyr38)
lyr39 = tf.keras.layers.Dense(14, activation='softmax')(flatten)
lyr40 = tf.keras.layers.Dense(1, activation='tanh')(flatten)
model = tf.keras.models.Model(inputs = [inp], outputs = [lyr39, lyr40])



optimizer = tf.keras.optimizers.Adam()

In [15]:
model.summary()

In [16]:
model(np.expand_dims(board_convert(copy.deepcopy(board)), axis=0))[0]

<tf.Tensor: shape=(1, 14), dtype=float32, numpy=
array([[0.07040589, 0.08949312, 0.05954351, 0.08239282, 0.04240274,
        0.08173043, 0.06172519, 0.06514908, 0.08160949, 0.07192582,
        0.05386994, 0.06531814, 0.04951235, 0.12492161]], dtype=float32)>

In [17]:
#Define epsilon decision function
def epsilonDecision(epsilon):
  action_decision = random.choices(['model','random'], weights = [1 - epsilon, epsilon])[0]
  return action_decision

def getAction(model, observation, epsilon):
  #Get the action based on greedy epsilon policy
  action_decision = epsilonDecision(epsilon)
  #Reshape the observation to fit in model
  observation = np.array([observation])
  #Get predictions
  preds = model.predict(observation)[0]
  #Get the softmax activation of the logits
  # weights = tf.nn.softmax(preds).numpy()[0]
  if action_decision == 'model':
    action = np.argmax(preds)
  if action_decision == 'random':
    action = random.randint(0,13)
  return int(action), preds

In [18]:
def train_step(model, optimizer, observations, actions, rewards, batch_size):
    n_batches = (len(observations) // batch_size) + 1
    print(n_batches)
    for batch in range(n_batches):
      # print(batch)
      obs_batch = observations[batch_size * batch: min(batch_size * (batch + 1), len(observations))]
      actions_batch = actions[batch_size * batch: min(batch_size * (batch + 1), len(observations))]
      rewards_batch = rewards[batch_size * batch: min(batch_size * (batch + 1), len(observations))]
      # print(np.array(obs_batch).shape)
      if (np.array(obs_batch).shape[0] == 0) or (len(np.array(obs_batch).shape) != 4):
         print("skipping")
      else:
        with tf.GradientTape() as tape:
          #Propagate through the agent network
            logits, rwd = model(obs_batch)
            # print(logits, actions_batch, rwd, np.array(rewards_batch).reshape(-1, 1))
            # print(logits, actions_batch, logits.shape, tf.reshape(actions_batch, (-1, 1)))
            softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=actions_batch)
            # print(softmax_cross_entropy)
            # print(tf.reshape(rewards, (-1, 1)).shape)
            mse_loss = tf.keras.losses.mean_squared_error(tf.reshape(rewards_batch, (-1, 1)), rwd)
            total_loss = mse_loss + softmax_cross_entropy
            loss = tf.reduce_mean(total_loss)
            # print("loss", loss)
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

In [19]:
def get_clr(player):
    if player == 1:
        return 'black'
    if player == -1:
        return 'red'

In [20]:
epsilon = 1
epsilon_rate = 0.995
for iters in range(100):
    print(f"Iter {iters}")
    frames = []
    actions = []
    rewards = []
    for games in range(100):
        clear_output()
        print(games)
        board = np.zeros((6,7))
        player = 1
        while True:
            if player == 1:
                action = np.random.randint(0,13)
                actions.append(action)
                action,col = wrapper(action)
            else:    
                action, w = getAction(model, board_convert(copy.deepcopy(board)), epsilon)
                actions.append(action)
                action, col = wrapper(action)

            if action == 'a':
                board = board_add(copy.deepcopy(board), col, get_clr(player))
            elif action == 'r':
                board = board_remove(copy.deepcopy(board), col, get_clr(player))
            
            terminated, reward = is_game_over(copy.deepcopy(board))
            reward = reward * player*(-1)
            frames.append(board_convert(copy.deepcopy(board) * player))
            rewards.append(reward)

            if terminated:
                break

            player *= -1

    print("Training")
    train_step(model, optimizer = optimizer,
                observations = np.array(frames),
                actions = np.array(actions),
                rewards = rewards,
                batch_size=32)   
    model.save('C:/Users/naray/OneDrive/Documents/UT Austin/Spring/Optimization/Project/New folder/my_model_{x}.keras'.format(x=iters)) 
    clear_output()
    epsilon = epsilon * epsilon_rate


33
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 