In [1]:
import os
import math
from statistics import mean

import neat
import visualize
import pandas as pd
import numpy as np
import json
from threading import Thread

In [2]:
def eval_one_genome(genome_id, genome, config, x, y):
    xi = x.copy()
    net = neat.nn.FeedForwardNetwork.create(genome, config)
    
    # keep feeding the board to the network until the puzzle is solved
    accuracies = []
    # for xi, yi in zip(x, y):
    cells_to_solve = xi.count(-0.5)
    num_correct = 0
    while -0.5 in xi:
        output = net.activate(xi)
        indices = np.flip(np.argsort(output))

        # find idx of most confident prediction that corresponds to a 0 cell
        max_idx = -1
        for idx in indices:
            if xi[math.floor(idx/9)] == -0.5:
                max_idx = idx
                break
        
        board_idx = math.floor(max_idx / 9)
        number = max_idx % 9 + 1
        xi[board_idx] = y[board_idx]
        num_correct += int(number / 9 - 0.5 == y[board_idx])
    accuracies.append(num_correct / cells_to_solve)

    # Calculate the fitness
    genome.fitness = mean(accuracies)
    # print(f"genome: {genome_id}\t fitness: {genome.fitness}")


# def eval_genomes(genomes, config, data, sample_n):
def eval_genomes(genomes, config):
    sample = train.sample(n=1)
    x = sample.quizzes.array[0]
    y = sample.solutions.array[0]
    threads = []
    for genome_id, genome in genomes:
        t = Thread(target=eval_one_genome, args=[genome_id, genome, config, x, y])
        t.start()
        threads.append(t)
    
    for t in threads:
        t.join()
        

def run(config_file, checkpoint=None):
    print("Configuring Settings")
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    if checkpoint == None:
        # No checkpoint specified, create a new population
        print("Creating Population")
        p = neat.Population(config)

        # Add a stdout reporter to show progress in the terminal.
        p.add_reporter(neat.StdOutReporter(True))
        stats = neat.StatisticsReporter()
        p.add_reporter(stats)
        p.add_reporter(neat.Checkpointer(generation_interval=1, time_interval_seconds=500, filename_prefix="checkpoints/neat-checkpoint-"))
    else:
        # Restore a checkpoint
        print("Restoring Population from Checkpoint")
        p = neat.restore_checkpoint(checkpoint)

    # Run for up to 300 generations.
    print("Training Network")
    # winner = p.run(lambda genomes, config: eval_genomes(genomes, config, train, 1), n=300)
    winner = p.run(eval_genomes, n=300)
    print("Finished Training")

    # TODO: Run p on test data and get the winner that way

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))

    # TODO: Rewrite this so that it actually fits the new model
    # Show output of the most fit genome against training data.
    print('\nOutput:')
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    for xi, yi in zip(train_x, train_y):
        output = winner_net.activate(xi)
        print("input {!r}, expected output {!r}, got {!r}".format(xi, yi, output))

    node_names = {-1: 'A', -2: 'B', 0: 'A XOR B'}
    visualize.draw_net(config, winner, True, node_names=node_names)
    # visualize.draw_net(config, winner, True, node_names=node_names, prune_unused=True)
    visualize.plot_stats(stats, ylog=False, view=True)
    visualize.plot_species(stats, view=True)

    # p = neat.Checkpointer.restore_checkpoint('checkpoints/neat-checkpoint-4')
    # p.run(eval_genomes, 10)

In [3]:
# load data
data = pd.read_csv("sudoku_cleaned.csv")
data = data.apply(lambda x: x.apply(json.loads))

In [4]:
# get test and train sets
FRAC_TRAIN = 0.5

data = data.sample(frac=1).reset_index(drop=True)
train = data.head(math.floor(data.shape[0] * FRAC_TRAIN))
train_x = train["quizzes"]
train_y = train["solutions"]

test = data.tail(math.ceil(data.shape[0] * FRAC_TRAIN))
test_x = test["quizzes"]
test_y = test["solutions"]

In [5]:
# Determine path to configuration file. This path manipulation is
# here so that the script will run successfully regardless of the
# current working directory.
# local_dir = os.path.dirname(__file__)
# config_path = os.path.join(local_dir, 'properties.config')
config_path = 'properties.config'
run(config_path)

Configuring Settings
Creating Population
Training Network

 ****** Running generation 0 ****** 

Population's average fitness: 0.10087 stdev: 0.03955
Best fitness: 0.21739 - size: (729, 59049) - species 1 - id 29
Average adjusted fitness: 0.079
Mean genetic distance 1.166, standard deviation 0.112
Population of 50 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0    50      0.2    0.079     0
Total extinctions: 0
Generation time: 199.856 sec
Saving checkpoint to checkpoints/neat-checkpoint-0

 ****** Running generation 1 ****** 

Population's average fitness: 0.11280 stdev: 0.04396
Best fitness: 0.24000 - size: (729, 58748) - species 1 - id 72
Average adjusted fitness: 0.093
Mean genetic distance 1.228, standard deviation 0.069
Population of 50 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1    50      0.2    0.093     0
Total extinctions: 0
Generation time: 197.579 sec (198.717 average)
Saving checkpoint to checkpoints/neat-checkpo

KeyboardInterrupt: 