# Minimax with alpha beta pruning
Aims to use pure python implementation.  
Numba doesnt work for me when submitting.    
Looping numpy is slower than looping python lists due to type conversion.    

In [None]:
from kaggle_environments import evaluate, make, utils

# The following code uses many heuristics.
Thanks to Keith Galli  https://www.youtube.com/watch?v=MMLtza3CZFM for reference implementation and 
https://www.youtube.com/watch?v=y7AKtWGOPAE for some heuristics for connect4.

# Here are some heuristics used
1. Count number 'useful' pieces in rows/columns/diagonals. Useful means u can place additional pieces to win. 
2. Center Column is the best column. As you can connect to both sides of the board.
3. Even/odd strategy to ensure that you win in the late game.
4. Lower rows/columns/diagonals are better than higher up the board.
5. Columns are worth less than rows and diagonals. This is because columns are the easiest to block.

# Value of a board
This is the value_of_my_board- factor*value_of_opp_board. This is done using heuristics above.
Note factor needs to be more than 1 as it's the opponents turn after u place your piece.

# Algorithm
 Minmax search with Alpha Beta Pruning. Not really hard just go wikipedia see pseudocode. But basically the algorithm goes n-steps ahead pick the best board assuming your opponent plays optimally. Alpha Beta pruning so that you can remove branches early and not search them.

In [None]:
%%writefile sub.py
import copy

def value_fn(board,player):
    if player == 1:
        opp_player = 2
    elif player ==2:
        opp_player =1
    return _value_fn(board,player)-5*_value_fn(board,opp_player)
    
def _value_fn(board,player):
    score = 0
    
    # Value of center column
    score+= 101*[board[i][3] for i in range(6)].count(player)
    
    # Count score for each row
    for r in range(6):
        row_array = board[r]
        for c in range(4):
#             window = row_array[c:c+4]
            window = [row_array[i+c] for i in range(4)]
            score += evaluate_row(window, player,r)
    
    # Count score for each column
    for c in range(7):
        col_array = [board[i][c] for i in range(6)]
        for r in range(3):
#             window = col_array[r:r+4]
            window = [col_array[i+r] for i in range(4)]
            score += evaluate_column(window, player)
    
    # Count score on each diagonal
    # Forward Diagonal
    for r in range(3):
        for c in range(4):
            window = [board[r+i][c+i] for i in range(4)]
            score += evaluate_diagonal(window, player,r)
    
    # Backward Diagonal
    for r in range(3):
        for c in range(4):
            window = [board[r+3-i][c+i] for i in range(4)]
            score += evaluate_diagonal(window, player,r)
            
    return score

# @nb.njit(cache=False,parallel=False)
def evaluate_diagonal(window,player,r):
    score = 0
    # Lower diagonal is better
    # Doesnt check if empty place == 0 for given row but will do for now
    inverse_row = 7-r
    score += inverse_row #ranges from 1-7
    if window.count(player) == 4:
        score += 10000
    elif window.count(player) == 3 and window.count(0)==1:
        score += 100
    elif window.count(player) == 2 and window.count(0)==2:
        score += 10
    return score

# @nb.njit(cache=False,parallel=False)
def evaluate_row(window,player,r):
    score = 0
    
    # Lower row is better
    inverse_row = 7-r
    score += inverse_row #ranges from 1-7
    
    # Weighs higher on forming rows on odd number rows for player 1
    # and even rows for player 2 
    # See connect4 even odd strategy
    if player == 1 and r%2==0:
        score +=10
    if player == 2 and r%2!=0:
        score +=10
    
    if window.count(player) == 4:
        score += 10000
    elif window.count(player) == 3 and window.count(0)==1:
        score += 100
    elif window.count(player) == 2 and window.count(0)==2:
        score += 10
    return score

# @nb.njit(cache=False,parallel=False)
def evaluate_column(window,player):
    score = 0
    if window.count(player) == 4:
        score += 10000
    elif window.count(player) == 3 and window.count(0)==1:
        score += 100
    elif window.count(player) == 2 and window.count(0)==2:
        score += 10
    return 0.5*score

def get_valid_actions(board):
    """
    get possible valid actions
    """
    return [c for c in range(0,7) if board[0][c]==0]

def drop_piece(board,col,mark):
    """
    drop piece at next position
    """
    board = copy.deepcopy(board)
    for row in range(6-1, -1, -1):
        if board[row][col] == 0:
            break
    board[row][col] = mark
    return board

def check_winner(board):
    """
    Returns player that wins
    -1 if draws
    0 if game has not ended
    """
    # Check rows for winner
    for row in range(6):
        for col in range(4):
            if (board[row][col] == board[row][col + 1] == board[row][col + 2] ==\
                board[row][col + 3]) and (board[row][col] != 0):
                return board[row][col]  #Return Number that match row

    # Check columns for winner
    for col in range(7):
        for row in range(3):
            if (board[row][col] == board[row + 1][col] == board[row + 2][col] ==\
                board[row + 3][col]) and (board[row][col] != 0):
                return board[row][col]  #Return Number that match column

    # Check diagonal (top-left to bottom-right) for winner

    for row in range(3):
        for col in range(4):
            if (board[row][col] == board[row + 1][col + 1] == board[row + 2][col + 2] ==\
                board[row + 3][col + 3]) and (board[row][col] != 0):
                return board[row][col] #Return Number that match diagonal


    # Check diagonal (bottom-left to top-right) for winner

    for row in range(5, 2, -1):
        for col in range(4):
            if (board[row][col] == board[row - 1][col + 1] == board[row - 2][col + 2] ==\
                board[row - 3][col + 3]) and (board[row][col] != 0):
                return board[row][col] #Return Number that match diagonal
    c = 0
    for col in range(7):
        if board[0][col]!=0:
            c +=1
    if c == 7:
        # This is a draw
        return -1
    # No winner: return None
    return 0

def alphabeta(node,depth,alpha,beta,max_player,player,ai_player):
    winner = check_winner(node)
    if depth == 0 or winner !=0:
        if winner == ai_player:
            return None, 999999999
        elif winner == player:
            return None, -999999999
        elif winner == -1:
            return None,0
        else:
            value = value_fn(node,ai_player)
            return None,value
    
    if max_player:
        best_value = -9999999999999
        best_action = 3
        for action in get_valid_actions(node):
            child = drop_piece(node,action,ai_player)
            score = alphabeta(child,depth-1,alpha,beta,False,player,ai_player)[1]
            if score > best_value:
                best_value = score
                best_action = action
            alpha = max(alpha,best_value)
            if alpha>=beta:
                break
#         print(best_action)
        return best_action,best_value
    
    else:
        worst_value = 9999999999999
        worst_action = 3
        for action in get_valid_actions(node):
            child = drop_piece(node,action,player)
            score = alphabeta(child,depth-1,alpha,beta,True,player,ai_player)[1]
            if score < worst_value:
                worst_value = score
                worst_action = action
            beta = min(beta,worst_value)
            if beta<=alpha:
                break
        return worst_action,worst_value
def reshape4(arr):
    """
    reshape into 6x7 without numpy 
    """
    line1 = arr[0:7]
    line2 = arr[7:14]
    line3 = arr[14:21]
    line4 = arr[21:28]
    line5 = arr[28:35]
    line6 = arr[35:42]
    board = [line1, line2 , line3, line4, line5, line6] 
    return board

def my_agent(observation,config):
    global value
    board = reshape4(observation["board"])
    player = observation["mark"]
    if player == 1:
        opp_player =2
    else:
        opp_player =1
    action,value = alphabeta(board,5,-9999999999999,9999999999999,True,opp_player,player)
    return action

In [None]:
# env = make("connectx", debug=True)
# env.play([None,my_agent],width=500, height=450)

In [None]:
# def mean_reward(rewards):
#     return np.round(rewards.count([1,-1])/len(rewards),2)

In [None]:
# mean_reward(evaluate("connectx", ["negamax", my_agent], num_episodes=10))

In [None]:
# mean_reward(evaluate("connectx", [my_agent,"negamax"], num_episodes=10))