In [1]:
import numpy as np
import pandas as pd
NROW, NCOL = 20, 20

In [2]:
np.random.seed(42)
m = np.random.randint(0, 2, size=NROW * NCOL).reshape(NROW, NCOL)

In [3]:
def empty_frame():
    return np.zeros((NROW, NCOL), dtype=np.int64)

def generate_frame(seed=None):
    if seed is not None:
        np.random.seed(seed)
    return np.random.randint(0, 2, size=NROW * NCOL).reshape(NROW, NCOL)

In [4]:
M = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1))

def sum_neighbours(frame, i, j):
    neighs_alive = 0
    for m in M:
        row_idx = m[0] + i
        col_idx = m[1] + j
        if 0 <= row_idx < NROW and 0 <= col_idx < NCOL:
            if frame[row_idx, col_idx]:
                neighs_alive += 1
    return neighs_alive

In [5]:
def evolve_frame(frame, n=5):
    """ Using frame as initial state, evolve system for <n> steps using GoL rules """
    cur_frame = frame
    for timestep in range(n):
        new_frame = np.zeros((NROW, NCOL), dtype=int)
        for i in range(NROW):
            for j in range(NCOL):
                alive_near = sum_neighbours(cur_frame, i, j)
                if cur_frame[i, j] and alive_near == 2:
                    new_frame[i, j] = 1
                elif alive_near == 3:
                    new_frame[i, j] = 1
        cur_frame = new_frame
    return cur_frame

In [80]:
def generate_sample(delta=1, skip_first=6, ravel=True):
    frame = generate_frame()
    start_frame = evolve_frame(frame, skip_first)
    end_frame = evolve_frame(start_frame, delta)
    return (end_frame, start_frame) if not ravel else (end_frame.ravel(), start_frame.ravel())

In [81]:
def generate_samples(n=32):
    X = np.zeros((n, NROW * NCOL))
    Y = np.zeros((n, NROW * NCOL))
    for i in range(n):
        x, y = generate_sample()
        X[i, :] = x
        Y[i, :] = y
    return X, Y

In [82]:
X, Y = generate_samples(10000)

In [47]:
import sklearn
from sklearn.ensemble import RandomForestClassifier

In [85]:
rf = RandomForestClassifier(n_estimators=300, verbose=1, class_weight="balanced_subsample")

In [86]:
rf.fit(X, Y)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed:  7.5min finished


RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample',
            criterion='gini', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=300, n_jobs=None, oob_score=False,
            random_state=None, verbose=1, warm_start=False)

In [14]:
train_df = pd.read_csv("resources/train.csv", index_col=0)
test_df = pd.read_csv("resources/test.csv", index_col=0)

In [15]:
fst_delta = train_df[train_df.delta == 1]
test_X, test_Y = fst_delta.iloc[:, 1:401], fst_delta.iloc[:, 401:]

In [87]:
Y_pred = rf.predict(test_X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed:  1.1min finished


In [51]:
from sklearn.metrics import mean_absolute_error

In [95]:
mean_absolute_error(test_Y, Y_pred)

0.14085728744939272

In [96]:
X_submit = test_df.iloc[:, 1:]

In [68]:
np.set_printoptions(threshold=np.nan)

In [None]:
submit_df = pd.DataFrame(Y_submit)
submit_df.columns = ['start.'+str(i) for i in range(1, 401)]
submit_df.index += 1
submit_df.index.name = 'id'
submit_df.head(n=1).astype(int).values

In [60]:
submit_df.to_csv("rf_all_map.csv")