# Setup

In [None]:
import time
from datetime import datetime

#measure notebook running time
start_time = time.time()

In [None]:
import numpy as np 
import pandas as pd 
import random
import matplotlib.pyplot as plt
%matplotlib inline

from kaggle_environments import make, evaluate
env = make("connectx", debug=True)

## Downgrading TF
i am preparing to start with RL - this is just draft

In [None]:
%%time
!pip install 'tensorflow==1.15.0'
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
%%time
!apt-get update
!apt-get install -y cmake libopenmpi-dev python3-dev zlib1g-dev
!pip install "stable-baselines[mpi]==2.9.0"

In [None]:
import os
from stable_baselines.bench import Monitor 
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO1, A2C, ACER, ACKTR, TRPO
from stable_baselines.a2c.utils import conv, linear, conv_to_fc
from stable_baselines.common.policies import CnnPolicy

In [None]:
from gym import spaces

class ConnectFourGym:
    def __init__(self, agent2="random"):
        ks_env = make("connectx", debug=True)
        self.env = ks_env.train([None, agent2])
        self.rows = ks_env.configuration.rows
        self.columns = ks_env.configuration.columns
        # Learn about spaces here: http://gym.openai.com/docs/#spaces
        self.action_space = spaces.Discrete(self.columns)
        self.observation_space = spaces.Box(low=0, high=2, shape=(self.rows,self.columns,1), dtype=int)
        # Tuple corresponding to the min and max possible rewards
        self.reward_range = (-10, 1)
        # StableBaselines throws error if these are not defined
        self.spec = None
        self.metadata = None
    def reset(self):
        self.obs = self.env.reset()
        return np.array(self.obs['board']).reshape(self.rows,self.columns,1)
    def change_reward(self, old_reward, done):
        if old_reward == 1: # The agent won the game
            return 1
        elif done: # The opponent won the game
            return -1
        else: # Reward 1/42
            return 1/(self.rows*self.columns)
    def step(self, action):
        # Check if agent's move is valid
        is_valid = (self.obs['board'][int(action)] == 0)
        if is_valid: # Play the move
            self.obs, old_reward, done, _ = self.env.step(int(action))
            reward = self.change_reward(old_reward, done)
        else: # End the game and penalize agent
            reward, done, _ = -10, True, {}
        return np.array(self.obs['board']).reshape(self.rows,self.columns,1), reward, done, _

In [None]:
def get_win_percentages(agent1, agent2, n_rounds=100):
    config = {'rows': 6, 'columns': 7, 'inarow': 4}      
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)  
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))

---

# Agents

## Random

In [None]:
def agent_random(obs, config):
    import random
    valid_moves = [col for col in range(config.columns) if obs.board[col] == 0]
    return random.choice(valid_moves)

## Win & block Check (Simple)

In [None]:
def agent_simple(obs, config):
    import random
    import numpy as np
    
    ########################################################
    
    def drop_piece(grid, col, piece, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = piece
        return next_grid

    def check_winning_move(obs, config, col, piece):
        grid = np.asarray(obs.board).reshape(config.rows, config.columns)
        next_grid = drop_piece(grid, col, piece, config)
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(next_grid[row, col:col + config.inarow])
                if window.count(piece) == config.inarow:
                    return True
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(next_grid[row:row + config.inarow,col])
                if window.count(piece) == config.inarow:
                    return True
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(next_grid[range(row, row + config.inarow), range(col, col + config.inarow)])
                if window.count(piece) == config.inarow:
                    return True
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(next_grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if window.count(piece) == config.inarow:
                    return True
        return False

    ########################################################
    
    valid_moves = [col for col in range(config.columns) if obs.board[col] == 0]
    valid_moves_win = [move for move in valid_moves if check_winning_move(obs, config, move, obs.mark)]
    
    if (len(valid_moves_win) > 0): 
        return random.choice(valid_moves_win)
    
    valid_moves_block = [move for move in valid_moves if check_winning_move(obs, config, move, obs.mark%2+1)]
    
    if (len(valid_moves_block) > 0): 
        return random.choice(valid_moves_block)
    
    return random.choice(valid_moves)

## 1-step lookahead

In [None]:
def one_step_lookahead(obs, config):
    import random
    import numpy as np
    
    ########################################################

    def score_move(grid, col, mark, config):
        next_grid = drop_piece(grid, col, mark, config)
        score = get_heuristic(next_grid, mark, config)
        return score

    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid
    
    def get_heuristic(grid, mark, config):
        A = 1e6
        B = 1
        C = 0.5
        D = -0.5
        E = -1e2
        num_twos = count_windows(grid, 2, mark, config)
        num_threes = count_windows(grid, 3, mark, config)
        num_fours = count_windows(grid, 4, mark, config)
        num_twos_opp = count_windows(grid, 2, mark%2+1, config)
        num_threes_opp = count_windows(grid, 3, mark%2+1, config)
        score = A*num_fours + B*num_threes + C*num_twos + D*num_twos_opp + E*num_threes_opp
        return score
    
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == config.inarow - num_discs)
    
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col + config.inarow])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row + config.inarow, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row + config.inarow), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows
    
    ########################################################
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config) for col in valid_moves]))
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    return random.choice(max_cols)

---

# N-step lookahead

In [None]:
def N_step_lookahead(obs, config):
    import random
    import numpy as np
    N_STEPS = 3
    
    ########################################################

    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = minimax(next_grid, nsteps-1, False, mark, config, -np.Inf, np.Inf)
        return score

    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid
    
    def get_heuristic(grid, mark, config):
        A = 1e6
        B = 1e2
        C = 1
        D = -1
        E = -1e2
        F = -1e6
        num_twos = count_windows(grid, 2, mark, config)
        num_threes = count_windows(grid, 3, mark, config)
        num_fours = count_windows(grid, 4, mark, config)
        num_twos_opp = count_windows(grid, 2, mark%2+1, config)
        num_threes_opp = count_windows(grid, 3, mark%2+1, config)
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)
        score = A*num_fours + B*num_threes + C*num_twos + D*num_twos_opp + E*num_threes_opp +F *num_fours_opp
        return score
    
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == config.inarow - num_discs)
    
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col + config.inarow])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row + config.inarow, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row + config.inarow), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows
    

    def is_terminal_window(window, config):
        return window.count(1) == config.inarow or window.count(2) == config.inarow

    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(config.rows):
            for col in range(config.columns - (config.inarow - 1)):
                window = list(grid[row, col:col + config.inarow])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(config.rows - (config.inarow - 1)):
            for col in range(config.columns):
                window = list(grid[row:row + config.inarow, col])
                if is_terminal_window(window, config):
                    return True
        # positive diagonal
        for row in range(config.rows - (config.inarow - 1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row + config.inarow), range(col, col + config.inarow)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(config.inarow -1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row - config.inarow, -1), range(col, col + config.inarow)])
                if is_terminal_window(window, config):
                    return True
        return False
    
    ########################################################
    
    # Minimax, ab pruning
    def minimax(node, depth, maximizingPlayer, mark, config, A, B):
        if depth == 0 or is_terminal_node(node, config):
            return get_heuristic(node, mark, config)
        
        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
        
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config, A, B))
                if value >= B:
                    break
                A = max(A, value)
            return value
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config, A, B))
                if value <= A:
                    break
                B = min(B, value)
            return value
        
    def innermost(arr):
        mid = (config.columns - 1) / 2
        distance = [-abs(c-mid) for c in arr]
        return arr[np.argmax(distance)]

    ########################################################

    order = [config.columns//2 - i//2 - 1 if i%2 else config.columns//2 + i//2 for i in range(config.columns)]
    valid_moves = [c for c in order if obs.board[c] == 0]
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    return innermost(max_cols)

---

# N-step lookahead (fast)
N-step lookahead from tutorial is pretty slow. It's too expensive to look more than 3 steps ahead.<br>
It is posible to make it faster by switching some badly positioned lines, but altogehter with memoization of patterns, it gets more that 10x faster.<br>

In [None]:
def N_step_lookahead_fast_neutral(obs, config):
    import random
    import numpy as np
    #N_STEPS = 5
    #N_STEPS = 4
    N_STEPS = 3
    
    ########################################################
    def boardToPatterns(grid, config):
        pats = boardDiagonals(grid, config)
        pats.extend(boardHorizontals(grid, config))
        pats.extend(boardHorizontals(grid.T, config))
        pats = list(filter(lambda x: x.count(0) <= 2, pats))
        return pats


    def boardDiagonals(grid, config):
        diags = []
        for col in range(config.columns - (config.inarow - 1)):
            for row in range(config.rows - (config.inarow - 1)):
                w = []
                for i in range(config.inarow):
                    w.append(grid[row+i][col+i])
                diags.append(w)
                for row in range(config.inarow - 1, config.rows):
                    w = []
                    for i in range(config.inarow):
                        w.append(grid[row-i][col+i])
                diags.append(w)
        return diags


    def boardHorizontals(grid, config):
        pats = []
        for row in range(grid.shape[0]):
            for col in range(grid.shape[1] - (config.inarow - 1)):
                pats.append(list(grid[row, col:col + config.inarow]))
        return pats

    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = minimax(next_grid, nsteps-1, False, mark, config, -np.Inf, np.Inf, boardToPatterns(next_grid, config))
        #print(col, mark,score)
        return score

    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid
    
    def get_heuristic(grid, mark, config, patterns):
        weights = [1, 1e2, 1e6]
        weights_opp = [1, 1e2, 1e6]
        score = 0
        for n in range(3):
            score += count_windows_in_pattern(patterns, n + 2, mark) * weights[n]
            score -= count_windows_in_pattern(patterns, n + 2, (mark % 2) + 1) * weights_opp[n]
        return score

    def count_windows_in_pattern(patterns, num, piece):
        return sum([window.count(piece) == num and window.count(0) == (config.inarow - num) for window in patterns])

    def is_terminal_node(grid, config, patterns):
        #draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        #win
        return count_windows_in_pattern(patterns, config.inarow, 1) > 0 or count_windows_in_pattern(patterns, config.inarow, 2) > 0

    
    ########################################################
    
    # Minimax, ab pruning
    def minimax(node, depth, maximizingPlayer, mark, config, A, B, patterns):
        if depth == 0 or is_terminal_node(node, config, patterns):
            return get_heuristic(node, mark, config, patterns)

        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]

        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config, A, B, boardToPatterns(child, config)))
                if value >= B:
                    break
                A = max(A, value)
            return value
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config, A, B, boardToPatterns(child, config)))
                if value <= A:
                    break
                B = min(B, value)
            return value

    def innermost(arr):
        mid = (config.columns - 1) / 2
        distance = [-abs(c-mid) for c in arr]
        return arr[np.argmax(distance)]

    ########################################################

    order = [config.columns//2 - i//2 - 1 if i%2 else config.columns//2 + i//2 for i in range(config.columns)]
    valid_moves = [c for c in order if obs.board[c] == 0]
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    return innermost(max_cols)

In [None]:
def N_step_lookahead_fast(obs, config):
    import random
    import numpy as np
    N_STEPS = 5
    #N_STEPS = 4
    #N_STEPS = 3
    
    ########################################################
    def boardToPatterns(grid, config):
        pats = boardDiagonals(grid, config)
        pats.extend(boardHorizontals(grid, config))
        pats.extend(boardHorizontals(grid.T, config))
        pats = list(filter(lambda x: x.count(0) <= 2, pats))
        return pats


    def boardDiagonals(grid, config):
        diags = []
        for col in range(config.columns - (config.inarow - 1)):
            for row in range(config.rows - (config.inarow - 1)):
                w = []
                for i in range(config.inarow):
                    w.append(grid[row+i][col+i])
                diags.append(w)
                for row in range(config.inarow - 1, config.rows):
                    w = []
                    for i in range(config.inarow):
                        w.append(grid[row-i][col+i])
                diags.append(w)
        return diags


    def boardHorizontals(grid, config):
        pats = []
        for row in range(grid.shape[0]):
            for col in range(grid.shape[1] - (config.inarow - 1)):
                pats.append(list(grid[row, col:col + config.inarow]))
        return pats

    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = minimax(next_grid, nsteps-1, False, mark, config, -np.Inf, np.Inf, boardToPatterns(next_grid, config))
        return score

    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid
    
    def get_heuristic(grid, mark, config, patterns):
        weights = [1, 1e2, 1e6]
        weights_opp = [1.1, 1.1e2, 1e6]
        score = 0
        for n in range(3):
            score += count_windows_in_pattern(patterns, n + 2, mark) * weights[n]
            score -= count_windows_in_pattern(patterns, n + 2, (mark % 2) + 1) * weights_opp[n]
        return score

    def count_windows_in_pattern(patterns, num, piece):
        return sum([window.count(piece) == num and window.count(0) == (config.inarow - num) for window in patterns])

    def is_terminal_node(grid, config, patterns):
        #draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        #win
        return count_windows_in_pattern(patterns, config.inarow, 1) > 0 or count_windows_in_pattern(patterns, config.inarow, 2) > 0

    
    ########################################################
    
    # Minimax, ab pruning
    def minimax(node, depth, maximizingPlayer, mark, config, A, B, patterns):
        if depth == 0 or is_terminal_node(node, config, patterns):
            return get_heuristic(node, mark, config, patterns)

        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]

        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config, A, B, boardToPatterns(child, config)))
                if value >= B:
                    break
                A = max(A, value)
            return value
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config, A, B, boardToPatterns(child, config)))
                if value <= A:
                    break
                B = min(B, value)
            return value

    def innermost(arr):
        mid = (config.columns - 1) / 2
        distance = [-abs(c-mid) for c in arr]
        return arr[np.argmax(distance)]

    ########################################################

    order = [config.columns//2 - i//2 - 1 if i%2 else config.columns//2 + i//2 for i in range(config.columns)]
    valid_moves = [c for c in order if obs.board[c] == 0]
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    return innermost(max_cols)

---

# Deep Reinforcement Learning

In [None]:
# Create ConnectFour environment
env = ConnectFourGym(agent2="random")

In [None]:
# import os
# from stable_baselines.bench import Monitor 
# from stable_baselines.common.vec_env import DummyVecEnv

# Create directory for logging training information
log_dir = "ppo/"
os.makedirs(log_dir, exist_ok=True)

# Logging progress
monitor_env = Monitor(env, log_dir, allow_early_resets=True)

# Create a vectorized environment
vec_env = DummyVecEnv([lambda: monitor_env])

In [None]:
# Neural network for predicting action values
def modified_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3, stride=1, 
                         init_scale=np.sqrt(2), **kwargs))
    layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, 
                         init_scale=np.sqrt(2), **kwargs))
    layer_2 = conv_to_fc(layer_2)
    return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))  

class CustomCnnPolicy(CnnPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomCnnPolicy, self).__init__(*args, **kwargs, cnn_extractor=modified_cnn)
        
# Initialize agent
model = PPO1(CustomCnnPolicy, vec_env, verbose=0)

In [None]:
%%time
# Train agent
model.learn(total_timesteps=100000)
#model.save("C4model.gz")

In [None]:
#model = model.load("C4model.gz")

In [None]:
# Plot cumulative reward
with open(os.path.join(log_dir, "monitor.csv"), 'rt') as fh:    
    firstline = fh.readline()
    assert firstline[0] == '#'
    df = pd.read_csv(fh, index_col=None)['r']
df.rolling(window=1000).mean().plot()
plt.show()


In [None]:
def agent_RL(obs, config):
    
    ##########################################################
    # prerequisites
    ##########################################################
    
    
    # Initialize agent
    #model = PPO1(CustomCnnPolicy, vec_env, verbose=0)
    #model = model.load("C4model")
    
    ##########################################################
    # Use the best model to select a column
    col, _ = model.predict(np.array(obs['board']).reshape(6,7,1))
    # Check if selected column is valid
    is_valid = (obs['board'][int(col)] == 0)
    # If not valid, select random move. 
    if is_valid:
        return int(col)
    else:
        return random.choice([col for col in range(config.columns) if obs.board[int(col)] == 0])

---

# Simulation & check

In [None]:
env = make("connectx", debug=True)

In [None]:
env.run([agent_RL, "random"])
env.render(mode="ipython")

In [None]:
env.run([agent_RL, agent_simple])
env.render(mode="ipython")

In [None]:
%%time
print("best so far:", 0.15)
get_win_percentages(agent1=agent_simple, agent2=agent_RL)

In [None]:
# env.run([agent_simple, N_step_lookahead_fast])
# env.render(mode="ipython")

In [None]:
# env.run([one_step_lookahead, N_step_lookahead_fast])
# env.render(mode="ipython")

In [None]:
# env.run([N_step_lookahead_fast, N_step_lookahead_fast])
# env.render(mode="ipython")

In [None]:
# env.run([N_step_lookahead_fast_neutral, N_step_lookahead_fast])
# env.render(mode="ipython")

In [None]:
# %%time
# print("best so far (3 steps ahead):", 0.98)
# get_win_percentages(agent1=agent_simple, agent2=N_step_lookahead_fast)

In [None]:
# %%time
# print("best so far:", 0.85, "5-steps ahead")
# print("best so far:", 0.72, "4-steps ahead")
# print("best so far:", 0.83, "3-steps ahead")
# get_win_percentages(agent1=one_step_lookahead, agent2=N_step_lookahead_fast)

In [None]:
# %%time
# print("best so far:", 1.0, "3-steps ahead")
# get_win_percentages(agent1=N_step_lookahead_fast_neutral, agent2=N_step_lookahead_fast)

---

# Submission

In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

write_agent_to_file(N_step_lookahead_fast, "submission.py")

## Validate Submission

In [None]:
import sys
from kaggle_environments import utils

out = sys.stdout
submission = utils.read_file("/kaggle/working/submission.py")
# agent = utils.get_last_callable(submission)
agent = N_step_lookahead_fast
sys.stdout = out

env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

In [None]:
end_time = time.time()
print("Notebook run time: {:.1f} seconds. Finished at {}".format(end_time - start_time, datetime.now()) )