In [21]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

['test.csv', 'train.csv', 'sampleSubmission.csv']


In [None]:
train_df = pd.read_csv("../input/train.csv", index_col=0)
test_df = pd.read_csv("../input/test.csv", index_col=0)

In [28]:
M = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1))

def calc_neighs(field, i, j):
    neighs = 0
    n = len(field)
    for m in M:
        row_idx = m[0] + i
        col_idx = m[1] + j
        if 0 <= row_idx < n and 0 <= col_idx < n:
            if field[row_idx][col_idx]:
                neighs += 1
    return neighs

def make_move(field, moves=1):
    n = len(field)
    cur_field = field
    for _ in range(moves):
        new_field = np.zeros((n, n), dtype=int)
        for i in range(n):
            for j in range(n):
                neighs = calc_neighs(cur_field, i, j)
                if cur_field[i][j] and neighs == 2:
                    new_field[i][j] = 1
                if neighs == 3:
                    new_field[i][j] = 1
        cur_field = new_field
    return cur_field

def generate_field(delta):
    field = np.random.randint(0, 2, size=(20, 20))
    field = make_move(field, moves=5)
    return field

def generate_data_row(delta):
    start_field = generate_field(delta)
    end_field = make_move(start_field, delta)
    return np.hstack((np.array(delta).reshape(1, -1), start_field.reshape(1, -1), end_field.reshape(1, -1))).ravel()

In [None]:
# function which extracts windows around every cell for future prediction
# and returns (X, y) for this field
def train_row_to_windowed_data(row):
    delta, start_field, end_field = row[0], row[1:401].reshape(20, 20), row[401:].reshape(20, 20)
    padded = np.pad(end_field, delta, mode="constant", constant_values=-1)
    rows = []
    labels = []
    n = len(start_field)
    for i in range(n):
        for j in range(n):
            window = padded[i:i+2*delta+1, j:j+2*delta+1]
            cell_status = start_field[i][j]
            rows.append(window.ravel())
            labels.append(cell_status)
    return (np.array(rows), np.array(labels).reshape(-1, 1))

In [None]:
# applies prev function to every row
def extract_features_from_raw_data(raw_data):
    X, y = [], []
    for row_idx in range(raw_data.shape[0]):
        field_X, field_y = train_row_to_windowed_data(raw_data[row_idx, :])
        X.append(field_X)
        y.append(field_y)
    return np.vstack(X), np.vstack(y)

In [None]:
# training, takes about hour

# models_by_delta = dict()

# for d in range(1, 6):
#     raw_data = train_df[train_df["delta"] == d].values
#     X, y = extract_features_from_raw_data(raw_data)
    
#     rf = RandomForestClassifier(n_jobs=1)
#     rf.fit(X, y)
    
#     models_by_delta[d] = rf

In [None]:
# predict all

def predict_field(end_field, delta, model):
    def field_to_window_rows(end_field, delta):
        padded = np.pad(end_field, delta, mode="constant", constant_values=-1)
        rows = []

        n = len(end_field)
        for i in range(n):
            for j in range(n):
                window = padded[i:i+2*delta+1, j:j+2*delta+1]
                rows.append(window.ravel())
        return np.array(rows)
    
    rows = field_to_window_rows(end_field, delta)
    
    field = model.predict(rows)
    return field

res = []
for row_idx in range(test_df.values.shape[0]):
    if row_idx % 1000 == 0:
        print(row_idx)
    row = test_df.values[row_idx, :]
    delta = row[0]
    field = row[1:].reshape(20, 20)
    pred = predict_field(field, delta, models_by_delta[delta])
    res.append(pred)

In [None]:
# save
# output = pd.DataFrame(np.hstack((np.arange(1, 50001).reshape((-1, 1)), np.array(res))), columns=(["id"] + list(train_df.columns)[1:401]))
# output.to_csv("./submission.csv", index=False)