In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install neat-python

In [None]:
import multiprocessing
import os
import pickle

import neat
import numpy as np

from kaggle_environments import make

num_games = 15

#sigmoid seems to be performing better tand running faster

#reward is -1, 0, 1 for l, d, w

#figure out if you win with 4 or random loses
#https://www.kaggle.com/matthewyu01/connect-4-agent

# Use the NN network phenotype and the discrete actuator force function.
def eval_genome(genome, config):
    net = neat.nn.FeedForwardNetwork.create(genome, config)

    fitnesses = []

    for runs in range(num_games):
        env = make("connectx", debug=True)
        trainer = env.train([None, "random"]) #random is probably playing a bunch of invalid moves
        #train against negamax is too challenging #runs extremely slow

        observation = trainer.reset()['board']
        #print(observation)
        fitness = 0.0
        done = False
        while not done:
            env.render()
            action = int(np.argmax(net.activate(observation)))
            #print(action)
            #observation, reward, done, info = env.run(action)
            observation, reward, done, info = trainer.step(action)
            #improve reward by looking at observation and info

            #print(f"R: {reward}")
            observation = observation['board']
            #print(observation)

            # else:
            #     fitness -= 1 #makes performance worse
            #print(reward)
            if done:
                if reward != None:
                    grid = np.asarray(observation).reshape(6, 7)
                    won = check_win(grid)
                    if reward == 1:
                        if won == 1:
                            fitness += 1.5
                        else:
                            fitness += 1
                    else:
                        fitness += reward
                    
                    
                observation = trainer.reset()['board']
        if fitness!= None:
            fitnesses.append(fitness)
        #print(fitnesses)

    return np.sum(fitnesses) #was np.mean


def eval_genomes(genomes, config):
    nets = []
    ge = []
    birds = []
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        nets.append(net)
        genome.fitness = eval_genome(genome, config)


def run():
    # Load the config file, which is assumed to live in
    # the same directory as this script.
    #local_dir = os.path.dirname(__file__)
    #config_path = os.path('../input/connect-4-neat-config/config')
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         '../input/connect-4-neat-config/config')

    pop = neat.Population(config)
    stats = neat.StatisticsReporter()
    pop.add_reporter(stats)
    pop.add_reporter(neat.StdOutReporter(True))

    pe = neat.ParallelEvaluator(multiprocessing.cpu_count(), eval_genome)
    winner = pop.run(eval_genomes,25) 

    # Save the winner.
    with open('/kaggle/working/winner_random', 'wb') as f:
        pickle.dump(winner, f)

    print(winner)


if __name__ == '__main__':
    run()

In [None]:
import shutil

In [None]:
shutil.move('winner','/kaggle/working/winner')

In [None]:
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == 4-num_discs)
    
def check_win(grid):
    # horizontal
    for row in range(6):
        for col in range(4):
            window = list(grid[row, col:col+4])
            for piece in range(1,3):
                if check_window(window, 4, piece):
                    return piece
    # vertical
    for row in range(3):
        for col in range(7):
            window = list(grid[row:row+4, col])
            for piece in range(1,3):
                if check_window(window, 4, piece):
                    return piece
    # positive diagonal
    for row in range(3):
        for col in range(4):
            window = list(grid[range(row, row+4), range(col, col+4)])
            for piece in range(1,3):
                if check_window(window, 4, piece):
                    return piece
    # negative diagonal
    for row in range(3, 6):
        for col in range(4):
            window = list(grid[range(row, row-4, -1), range(col, col+4)])
            for piece in range(1,3):
                if check_window(window, 4, piece):
                    return piece
    return 0



# NEGAMAX TRAINING

In [None]:
import multiprocessing
import os
import pickle

import neat
import numpy as np

from kaggle_environments import make

num_games = 20

#sigmoid seems to be performing better tand running faster

#reward is -1, 0, 1 for l, d, w

#figure out if you win with 4 or random loses
#https://www.kaggle.com/matthewyu01/connect-4-agent

# Use the NN network phenotype and the discrete actuator force function.
def eval_genome(net):

    fitnesses = []

    for runs in range(num_games):
        env = make("connectx", debug=True)
        trainer = env.train([None, "negamax"]) #random is probably playing a bunch of invalid moves
        #train against negamax is too challenging #runs extremely slow

        observation = trainer.reset()['board']
        #print(observation)
        fitness = 0.0
        done = False
        while not done:
            env.render()
            action = int(np.argmax(net.activate(observation)))
            #print(action)
            #observation, reward, done, info = env.run(action)
            observation, reward, done, info = trainer.step(action)
            #improve reward by looking at observation and info

            #print(f"R: {reward}")
            observation = observation['board']
            #print(observation)

            # else:
            #     fitness -= 1 #makes performance worse
            #print(reward)
            if done:
                if reward != None:
                    grid = np.asarray(observation).reshape(6, 7)
                    won = check_win(grid)
                    if reward == -1:
                        if won == 0:
                            fitness -= 2 #punishes losing by placing invalid move
                        else:
                            fitness += reward
                    else:
                        fitness += reward
                observation = trainer.reset()['board']
        if fitness!= None:
            fitnesses.append(fitness)
        #print(fitnesses)

    return np.sum(fitnesses) #was np.mean


def eval_genomes(genomes, config):
    nets = []
    ge = []
    birds = []
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        nets.append(net)
        genome.fitness = eval_genome(net)


def run():
    # Load the config file, which is assumed to live in
    # the same directory as this script.
    #local_dir = os.path.dirname(__file__)
    #config_path = os.path('../input/connect-4-neat-config/config')
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         '../input/connect-4-neat-config/config')
    
    #pop = neat.Population(config)
    pop = neat.Checkpointer.restore_checkpoint('/kaggle/working/neat-checkpoint-3')
    stats = neat.StatisticsReporter()
    pop.add_reporter(stats)
    pop.add_reporter(neat.StdOutReporter(True))
    pop.add_reporter(neat.Checkpointer(2, 1000))

    #pe = neat.ParallelEvaluator(multiprocessing.cpu_count(), eval_genome)
    winner = pop.run(eval_genomes,25) 

    # Save the winner.
    with open('/kaggle/working/winner_negamax2', 'wb') as f:
        pickle.dump(winner, f)

    print(winner)


if __name__ == '__main__':
    run()

https://neat-python.readthedocs.io/en/latest/_modules/checkpoint.html

In [None]:

import gzip
import random
import time

try:
    import cPickle as pickle  # pylint: disable=import-error
except ImportError:
    import pickle  # pylint: disable=import-error

from neat.population import Population
from neat.reporting import BaseReporter


class Checkpointer(BaseReporter):
    """
    A reporter class that performs checkpointing using `pickle`
    to save and restore populations (and other aspects of the simulation state).
    """

    def __init__(self, generation_interval=2, time_interval_seconds=300,
                 filename_prefix='/kaggle/working/neat-checkpoint-'):
        """
        Saves the current state (at the end of a generation) every ``generation_interval`` generations or
        ``time_interval_seconds``, whichever happens first.

        :param generation_interval: If not None, maximum number of generations between save intervals
        :type generation_interval: int or None
        :param time_interval_seconds: If not None, maximum number of seconds between checkpoint attempts
        :type time_interval_seconds: float or None
        :param str filename_prefix: Prefix for the filename (the end will be the generation number)
        """
        self.generation_interval = generation_interval
        self.time_interval_seconds = time_interval_seconds
        self.filename_prefix = filename_prefix

        self.current_generation = None
        self.last_generation_checkpoint = -1
        self.last_time_checkpoint = time.time()

    def start_generation(self, generation):
        self.current_generation = generation

    def end_generation(self, config, population, species_set):
        checkpoint_due = False

        if self.time_interval_seconds is not None:
            dt = time.time() - self.last_time_checkpoint
            if dt >= self.time_interval_seconds:
                checkpoint_due = True

        if (checkpoint_due is False) and (self.generation_interval is not None):
            dg = self.current_generation - self.last_generation_checkpoint
            if dg >= self.generation_interval:
                checkpoint_due = True

        if checkpoint_due:
            self.save_checkpoint(config, population, species_set, self.current_generation)
            self.last_generation_checkpoint = self.current_generation
            self.last_time_checkpoint = time.time()

    def save_checkpoint(self, config, population, species_set, generation):
        """ Save the current simulation state. """
        filename = '{0}{1}'.format(self.filename_prefix, generation)
        print("Saving checkpoint to {0}".format(filename))

        with gzip.open(filename, 'w', compresslevel=5) as f:
            data = (generation, config, population, species_set, random.getstate())
            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)


    @staticmethod
    def restore_checkpoint(filename):
        """Resumes the simulation from a previous saved point."""
        with gzip.open(filename) as f:
            generation, config, population, species_set, rndstate = pickle.load(f)
            random.setstate(rndstate)
            return Population(config, (population, species_set, generation))