In [None]:
from kaggle_environments import make, evaluate

env = make("connectx", debug=True)

In [None]:
%%writefile submission.py

import random
import time
import numpy as np
from collections import defaultdict

ROWS = 6
COLUMNS = 7
INAROW = 4 

# Params
discount_factor = 0.9
lr = 0.1

def board_to_grid(board, config):
    return np.asarray(board).reshape(ROWS, COLUMNS)


def grid_to_board(grid):
    return grid.reshape(-1)


def other_player(player):
    return 1 if player == 2 else 2 

# Zobrist hash 
HASH_TABLE = np.frombuffer(
    np.random.bytes(ROWS*COLUMNS*3*8), dtype=np.int64
).reshape([ROWS*COLUMNS,3])

def hash_board(board):
    return np.bitwise_xor.reduce(HASH_TABLE[np.arange(ROWS*COLUMNS), board])

# Value estimates
V = defaultdict(float)

def get_value(board):
    return V[hash_board(board)]


def set_value(board, val):
    V[hash_board(board)] = val

    
def update_estimate(board, target):
    v = get_value(board)
    set_value(board, v + lr * (target - v))
    
    
# Gets grid at next step if agent drops piece in selected column
def drop_piece(grid, col, piece, config):
    next_grid = grid.copy()
    for row in range(config.rows-1, -1, -1):
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = piece
    return next_grid


# The "4" at the end of the name idicates this function only works when dealing with 
# games that terminate with 4 in-a-row.
def player_has_won_fast_4(grid, config):
    assert config.inarow == 4

    for r in range(config.rows):
        for c in range(config.columns-3):
            if 0 != grid[r][c] == grid[r][c+1] == grid[r][c+2] == grid[r][c+3]:
                return grid[r][c]

    for c in range(config.columns):
        for r in range(config.rows-3):
            if 0 != grid[r][c] == grid[r+1][c] == grid[r+2][c] == grid[r+3][c]:
                return grid[r][c]

    for r in range(config.rows-3):
        for c in range(config.columns-3):
            if 0 != grid[r][c] == grid[r+1][c+1] == grid[r+2][c+2] == grid[r+3][c+3]:
                return grid[r][c]

    for r in range(config.rows-3):
        for c in range(config.columns-3):
            if 0 != grid[r][c+3] == grid[r+1][c+2] == grid[r+2][c+1] == grid[r+3][c]:
                return grid[r][c+3]

    return 0


def behavior_lookahead_1(grid, piece, config):
    valid_moves = [col for col in range(config.columns) if grid[0][col] == 0]

    if len(valid_moves) == 0:
        return None

    # If dropping a piece makes us win, then do that.
    for move in valid_moves:   
        next_grid = drop_piece(grid, move, piece, config)
        if player_has_won_fast_4(next_grid, config) != 0:
            return move

    # If dropping a piece blocks our opponent from winning next turn, then do that.
    for move in valid_moves:    
        next_grid = drop_piece(grid, move, other_player(piece), config)
        if player_has_won_fast_4(next_grid, config) != 0:
            return move

    # Otherwise, choose a random valid move
    return random.choice(valid_moves)


# Simulate two lookahead_1 players from the given grid position.
def simulate(move, player, grid, obs, config):
    
    next_grid = drop_piece(grid, move, player, config)
    
    winner = player_has_won_fast_4(next_grid, config)
    
    if winner == obs.mark:
        # Us
        reward = 1.0
    elif winner != 0:
        # Them
        reward = -1.0
    else:
        # Neither, keep simulating
        next_player = other_player(player)
        next_move = behavior_lookahead_1(next_grid, next_player, config)
        if next_move == None:
            reward = 0.0
        else:
            reward = discount_factor * simulate(next_move, next_player, next_grid, obs, config)
    
    update_estimate(grid_to_board(next_grid), reward)
    return reward
    
    
episodes = []
    
def agent_monte_carlo(obs, config):
    
    deadline = time.time() + config.actTimeout - 0.5
#    deadline = time.time() + 1
    
    grid = board_to_grid(obs.board, config)
    
    valid_moves = [col for col in range(config.columns) if grid[0][col] == 0]
    
    k = 0
    while time.time() < deadline:
        move = random.choice(valid_moves)        
        simulate(move, obs.mark, grid, obs, config)
        episodes.append(k)
        k+=1

    best_val = -1
    best_move = 0
    for move in valid_moves:
        val = get_value(grid_to_board(drop_piece(grid, move, obs.mark, config)))
        if val >= best_val:
            best_val = val
            best_move = move
    
    return best_move

In [None]:
# # Two agents play one game round
# env.run([agent_monte_carlo, agent_monte_carlo]);
# # Show the game
# env.render(mode="ipython")

In [None]:
# env.play([None, agent_monte_carlo])

In [None]:
%run submission.py

In [None]:
# Validate submission file

import sys
from kaggle_environments import utils

out = sys.stdout
submission = utils.read_file("/kaggle/working/submission.py")
agent = utils.get_last_callable(submission)
sys.stdout = out

env = make("connectx", debug=True)
env.run([agent, "random"])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")