# Game of Life - Random Forest

Here we train a Random Forest Classifier to predict the Reverse Game of Life

TODO: use LightGBM with custom loss function
- https://lightgbm.readthedocs.io/en/latest/
- https://towardsdatascience.com/custom-loss-functions-for-gradient-boosting-f79c1b40466d

In [None]:
# Source: https://github.com/JamesMcGuigan/ai-games/blob/master/puzzles/game_of_life/utils/util.py
from typing import Dict

import numpy as np
import pandas as pd
from fastcache import clru_cache


@clru_cache(None)
def csv_column_names(key='start'):
    return [ f'{key}_{n}' for n in range(25**2) ]


def csv_to_delta(df, idx):
    return int(df.loc[idx]['delta'])

def csv_to_delta_list(df):
    return df['delta'].values


def csv_to_numpy(df, idx, key='start') -> np.ndarray:
    try:
        columns = csv_column_names(key)
        board   = df.loc[idx][columns].values
    except:
        board = np.zeros((25, 25), dtype=np.int8)
    board = board.reshape((25,25))
    return board.astype(np.int8)


def csv_to_numpy_list(df, key='start') -> np.ndarray:
    try:
        columns = csv_column_names(key)
        output  = df[columns].values.reshape(-1,25,25)
    except:
        output  = np.zeros((0,25,25), dtype=np.int8)
    return output.astype(np.int8)


# noinspection PyTypeChecker,PyUnresolvedReferences
def numpy_to_dict(board: np.ndarray, key='start') -> Dict:
    assert len(board.shape) == 2  # we want 2D solutions_3d[0] not 3D solutions_3d
    assert key in { 'start', 'stop' }

    board  = np.array(board).flatten().tolist()
    output = { f"{key}_{n}": board[n] for n in range(len(board))}
    return output


def numpy_to_series(board: np.ndarray, key='start') -> pd.Series:
    solution_dict = numpy_to_dict(board, key)
    return pd.Series(solution_dict)


# Source: https://stackoverflow.com/questions/8290397/how-to-split-an-iterable-in-constant-size-chunks
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [None]:
# Source: https://github.com/JamesMcGuigan/ai-games/blob/master/puzzles/game_of_life/utils/game.py

# Functions for implementing Game of Life Forward Play
from typing import List
from typing import Union

import numpy as np
import scipy.sparse
from joblib import delayed
from joblib import Parallel
from numba import njit


# Source: https://www.kaggle.com/ianmoone0617/reversing-conways-game-of-life-tutorial
def life_step_numpy(X: np.ndarray):
    """Game of life step using generator expressions"""
    nbrs_count = sum(np.roll(np.roll(X, i, 0), j, 1)
                     for i in (-1, 0, 1) for j in (-1, 0, 1)
                     if (i != 0 or j != 0))
    return (nbrs_count == 3) | (X & (nbrs_count == 2))


# Source: https://www.kaggle.com/ianmoone0617/reversing-conways-game-of-life-tutorial
def life_step_scipy(X: np.ndarray):
    """Game of life step using scipy tools"""
    from scipy.signal import convolve2d
    nbrs_count = convolve2d(X, np.ones((3, 3)), mode='same', boundary='wrap') - X
    return (nbrs_count == 3) | (X & (nbrs_count == 2))



# NOTE: @njit doesn't like np.roll(axis=) so reimplement explictly
@njit
def life_neighbours_xy(board: np.ndarray, x, y, max_value=3):
    size_x = board.shape[0]
    size_y = board.shape[1]
    neighbours = 0
    for i in (-1, 0, 1):
        for j in (-1, 0, 1):
            if i == j == 0: continue    # ignore self
            xi = (x + i) % size_x
            yj = (y + j) % size_y
            neighbours += board[xi, yj]
            if neighbours > max_value:  # shortcircuit return 4 if overpopulated
                return neighbours
    return neighbours


@njit
def life_neighbours(board: np.ndarray, max_value=3):
    size_x = board.shape[0]
    size_y = board.shape[1]
    output = np.zeros(board.shape, dtype=np.int8)
    for x in range(size_x):
        for y in range(size_y):
            output[x,y] = life_neighbours_xy(board, x, y, max_value)
    return output


@njit
def life_step_njit(board: np.ndarray) -> np.ndarray:
    """Game of life step using generator expressions"""
    size_x = board.shape[0]
    size_y = board.shape[1]
    output = np.zeros(board.shape, dtype=np.int8)
    for x in range(size_x):
        for y in range(size_y):
            cell       = board[x,y]
            neighbours = life_neighbours_xy(board, x, y, max_value=3)
            if ( (cell == 0 and      neighbours == 3 )
              or (cell == 1 and 2 <= neighbours <= 3 )
            ):
                output[x, y] = 1
    return output

life_step = life_step_njit  # create global alias
def life_steps(boards: Union[List[np.ndarray],np.ndarray]) -> List[np.ndarray]:
    """ Parallel version of life_step() but for an array of boards """
    return np.array( Parallel(-1)( delayed(life_step)(board) for board in boards ) )


@njit
def life_step_delta(board: np.ndarray, delta):
    for t in range(delta): board = life_step(board)
    return board


def life_steps_delta(boards: np.ndarray, deltas: np.ndarray) -> np.ndarray:
    """ Parallel version of life_step() but for an array of boards """
    return np.array( Parallel(-1)( delayed(life_step_delta)(board, delta) for (board, delta) in zip(boards, deltas) ) )


def life_step_3d(board: np.ndarray, delta):
    solution_3d = np.array([ board ], dtype=np.int8)
    for t in range(delta):
        board       = life_step(board)
        solution_3d = np.append( solution_3d, [ board ], axis=0)
    return solution_3d


# RULES: https://www.kaggle.com/c/conway-s-reverse-game-of-life/data
def generate_random_board(shape=(25,25)):
    # An initial board was chosen by filling the board with a random density between 1% full (mostly zeros) and 99% full (mostly ones).
    # DOCS: https://cmdlinetips.com/2019/02/how-to-create-random-sparse-matrix-of-specific-density/
    density = np.random.random() * 0.98 + 0.01
    board   = scipy.sparse.random(*shape, density=density, data_rvs=np.ones).toarray().astype(np.int8)

    # The starting board's state was recorded after the 5 "warmup steps". These are the values in the start variables.
    for t in range(5):
        board = life_step(board)
        if np.count_nonzero(board) == 0:
            return generate_random_board(shape)  # exclude empty boards and try again
    return board

def generate_random_boards(count: int, shape=(25,25)) -> np.ndarray:
    generated_boards = Parallel(-1)( delayed(generate_random_board)(shape) for _ in range(count) )
    return np.array(generated_boards, dtype=np.int8).reshape((-1, *shape))


In [None]:
import os
import sys
import pandas as pd
import numpy as np
# from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from scipy import signal

In [None]:
train_df      = pd.read_csv('/kaggle/input/conways-reverse-game-of-life-2020/train.csv',             index_col='id').astype(np.int8)
test_df       = pd.read_csv('/kaggle/input/conways-reverse-game-of-life-2020/test.csv',              index_col='id').astype(np.int8)
# submission_df = pd.read_csv('/kaggle/input/conways-reverse-game-of-life-2020/sample_submission.csv', index_col='id').astype(np.int8)

In [None]:
# DOCS: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
# DOCS: https://mljar.com/blog/random-forest-memory/
# DOCS: https://medium.com/swlh/post-pruning-decision-trees-using-python-b5d4bcda8e23
classifier_1 = RandomForestClassifier(
    max_depth=25,     # max_depth affects memory usage   
    ccp_alpha=0.005,  # prune tree
    warm_start=True,
    n_jobs=-1,
    verbose=0,
)

classifier_2 = RandomForestClassifier(
    max_depth=100,     # max_depth affects memory usage   
    ccp_alpha=0.005,  # prune tree
    warm_start=True,
    n_jobs=-1,
    verbose=0,
)


### https://www.kaggle.com/c/santander-product-recommendation/discussion/25506
### You can also use XGBClassifier sklearn-valid API and put it as a classifier in OneVsRestClassifier, according to:
### http://scikit-learn.org/stable/modules/multiclass.html
# classifier = XGBClassifier(
#     learning_rate=0.1,
#     n_estimators=1000,
#     max_depth=5,
#     min_child_weight=1,
#     gamma=0,
#     subsample=0.8,
#     colsample_bytree=0.8,
#     objective='multi:softprob',
#     nthread=4,
#     num_class=625,
#     seed=27
# )    

In [None]:
def preprocess_X(X, deltas):
    if isinstance(deltas, int): deltas = np.ones((X.shape[0],1)) * deltas
    deltas = np.array(deltas).reshape(-1,1)
        
    count_neighbours = np.array([[1,1,1],[1,0,1],[1,1,1]], dtype=np.int8)
    X = np.array([ 
        [ 
            X[n], 
            signal.convolve2d(X[n], count_neighbours, boundary='wrap', mode='same') 
        ] 
        for n in range(len(X)) 
    ], dtype=np.int8)
    X = X.reshape(X.shape[0], -1)
    X = np.concatenate([ X, deltas ], axis=1)
    return X

def preprocess_Y(Y):
    Y = Y.reshape(Y.shape[0], -1)
    return Y

In [None]:
def tree_accuracy( Y_actual, Y_pred ):
    return np.mean( np.count_nonzero( Y_actual.reshape(-1,625) == Y_pred.reshape(-1,625), axis=1 ) / 625 )

# Train

TODO: generate random board data

In [None]:
if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', 'Localhost') == 'Interactive':
    train_df = train_df[:100]
    test_df  = test_df[:100]

In [None]:
train_deltas = train_df['delta'].to_numpy()
train_X_raw  = csv_to_numpy_list( train_df, key='stop'  )
train_Y_raw  = csv_to_numpy_list( train_df, key='start' )

In [None]:
%%time
train_X = preprocess_X(train_X_raw, train_deltas)
train_Y = preprocess_Y(train_Y_raw)
print(f'train_X.shape: {train_X.shape}, train_Y.shape: {train_Y.shape}')

classifier_1.fit(train_X, train_Y)

train_Y_pred = classifier_1.predict(train_X).reshape(-1,25,25)
train_X_pred = life_steps_delta(train_Y_pred, train_deltas)

print('train_Y_accuracy: ', tree_accuracy(train_Y_raw, train_Y_pred))
print('train_X_accuracy: ', tree_accuracy(train_X_raw, train_X_pred))

In [None]:
%%time
train_X2 = np.concatenate([
    train_X,
    train_Y_pred.reshape(-1,625),
    train_X_pred.reshape(-1,625),
    # (train_X_raw != train_X_pred).reshape(-1,625),
    # preprocess_X(train_X_pred, train_df['delta']),    
    # preprocess_X(train_Y_pred, train_df['delta']),    
], axis=1)
classifier_2.fit(train_X2, train_Y)
train_Y2_pred = classifier_2.predict(train_X2).reshape(-1,25,25)
train_X2_pred = life_steps_delta(train_Y2_pred, train_deltas)
print('train_Y2_accuracy: ', tree_accuracy(train_Y_raw, train_Y2_pred))
print('train_X2_accuracy: ', tree_accuracy(train_X_raw, train_X2_pred))

# Submission

In [None]:
test_deltas = test_df['delta'].to_numpy()
test_X_raw  = csv_to_numpy_list( test_df, key='stop'  )
test_X      = preprocess_X(test_X_raw, test_deltas)

test_Y_pred = classifier_1.predict(test_X).reshape(-1,25,25)
test_X_pred = life_steps_delta(test_Y_pred, test_deltas).reshape(-1,25,25)

print('test_X_accuracy: ', tree_accuracy(test_X_raw, test_X_pred))

In [None]:
test_X2 = np.concatenate([
    test_X,
    test_Y_pred.reshape(-1,625),
    test_X_pred.reshape(-1,625),
    # (test_X_raw != test_X_pred).reshape(-1,625),
    # preprocess_X(test_X_pred, test_df['delta']),    
    # preprocess_X(test_Y_pred, test_df['delta']),    
], axis=1)
test_Y2_pred = classifier_2.predict(test_X2).reshape(-1,25,25)
test_X2_pred = life_steps_delta(test_Y2_pred, test_df['delta']).reshape(-1,25,25)

print('test_X2_accuracy: ', tree_accuracy(test_X_raw, test_X2_pred))

In [None]:
submission_df = pd.DataFrame([ numpy_to_series(test_Y2_pred[n].reshape(25,25)) 
                               for n in range(len(test_Y2_pred)) ])
submission_df.index = test_df.index
submission_df.to_csv('submission.csv')
submission_df

# Further Reading

I have written an interactive playable demo of the forward version of this game in React Javascript:
- https://life.jamesmcguigan.com/


This notebook is part of series exploring the Neural Network implementions of the Game of Life Foward Problem
- [Pytorch Game of Life - First Attempt](https://www.kaggle.com/jamesmcguigan/pytorch-game-of-life-first-attempt)
- [Pytorch Game of Life - Hardcoding Network Weights](https://www.kaggle.com/jamesmcguigan/pytorch-game-of-life-hardcoding-network-weights)
- [Its Easy for Neural Networks To Learn Game of Life](https://www.kaggle.com/jamesmcguigan/its-easy-for-neural-networks-to-learn-game-of-life)

This is preliminary research towards the harder Reverse Game of Life problem, for which I have already designed a novel Ouroboros loss function: 
- [OuroborosLife - Function Reversal GAN](https://www.kaggle.com/jamesmcguigan/ouroboroslife-function-reversal-gan)


I also have an extended series of Notebooks exploring different approaches to the Reverse Game of Life problem

My first attempt was to use the Z3 Constraint Satisfaction SAT solver. This gets 100% accuracy on most boards, but there are a few which it cannot solve. This approach can be slow for boards with large cell counts and large deltas. I managed to figure out how to get cluster compute working inside Kaggle Notebooks, but this solution is estimated to require 10,000+ hours of CPU time to complete.    
- [Game of Life - Z3 Constraint Satisfaction](https://www.kaggle.com/jamesmcguigan/game-of-life-z3-constraint-satisfaction)

Second approach was to create a Geometrically Invarient Hash function using Summable Primes, then use forward play and a dictionary lookup table to create a database of known states. For known input/output states at a given delta, the problem is reduced to simply solving the geometric transform between inputs and applying the same function to the outputs. The Hashmap Solver was able to solve about 10% of the test dataset. 
- [Summable Primes](https://www.kaggle.com/jamesmcguigan/summable-primes)
- [Geometric Invariant Hash Functions](https://www.kaggle.com/jamesmcguigan/geometric-invariant-hash-functions)
- [Game of Life - Repeating Patterns](https://www.kaggle.com/jamesmcguigan/game-of-life-repeating-patterns)
- [Game of Life - Hashmap Solver](https://www.kaggle.com/jamesmcguigan/game-of-life-hashmap-solver)
- [Game of Life - Image Segmentation Solver](https://www.kaggle.com/jamesmcguigan/game-of-life-image-segmentation-solver)