In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import itertools
import numpy as np
import pandas as pd
import torch.optim as optim
from torch.autograd import Variable
import torch
from tqdm import tqdm
import sys

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Life Simulators

In [None]:
def life_step(X):
    """Game of life step using generator expressions"""
    nbrs_count = sum(np.roll(np.roll(X, i, 0), j, 1)
                     for i in (-1, 0, 1) for j in (-1, 0, 1)
                     if (i != 0 or j != 0))
    return (nbrs_count == 3) | (X & (nbrs_count == 2))

In [None]:
def actual_forward(x): 
    torch.backends.cudnn.deterministic = True
    
    # Weights for layer 1
    weight1 = torch.tensor([[[1, 1, 1], [1, 0.1, 1], [1, 1, 1]],
                            [[1, 1, 1], [1, 1, 1], [1, 1, 1]]]).view(2, 1, 3, 3).float()
    b1 = torch.tensor([-3, -2]).float()
    # Weights for layer 2
    weight2 = torch.tensor([-10, 1]).view(1, 2, 1, 1).float()
    # Weights for layer 3
    s = 20
    weight3 = torch.tensor([2*s]).view(1, 1, 1, 1).float()
    b3 = torch.tensor([-s]).float()
    
    if torch.cuda.is_available():
        weight1 = weight1.cuda()
        b1 = b1.cuda()
        weight2 = weight2.cuda()
        weight3 = weight3.cuda()
        b3 = b3.cuda()

    x = F.pad(x.float(), (1, 1, 1, 1), mode='circular')
    x = F.relu(F.conv2d(x, weight1, b1))
    x = F.relu(F.conv2d(x, weight2))
    x = torch.sigmoid(F.conv2d(x, weight3, b3))
    return x

# Data Generator

In [None]:
def generate_inf_cases(train, seed, board_size=25, min_dens=0.01, max_dens=0.99, warm_up=5, min_delta=1, max_delta=5, dtype=np.int, return_one_but_last=False):
    rs = np.random.RandomState(seed)
    zer = np.zeros(shape=(board_size, board_size), dtype=dtype)
    while True:
        density = rs.uniform(min_dens, max_dens)
        start = rs.choice([1, 0], size=(board_size, board_size), p=[density, 1.0-density])
        for _ in range(warm_up):
            start = life_step(start)

        delta = rs.randint(min_delta, max_delta+1)
        stop = start.copy()
        one_but_last = None
        for _ in range(delta):
            one_but_last = stop
            stop = life_step(stop)

        if not (stop == zer).all():
            if return_one_but_last:
                yield delta, one_but_last, stop
            elif train:
                yield delta, start, stop
            else:
                yield delta, stop

In [None]:
def generate_n_cases(train, set_size, seed, **kwargs):
    return itertools.islice(
        generate_inf_cases(train, seed, **kwargs),
        set_size)

In [None]:
def generate_train_set(set_size, seed, **kwargs):
    return generate_n_cases(True, set_size, seed, **kwargs)

In [None]:
REGENERATE_DATA = True

In [None]:
# Generate or load training data
N = 128000

if REGENERATE_DATA:
    train_data_gen = generate_train_set(N, 41, min_delta=1, max_delta=1)
    deltas, start_boards, stop_boards = map(np.array, zip(*list(train_data_gen)))
    # Save training data
    np.save('/kaggle/working/training_start_boards', start_boards)
    np.save('/kaggle/working/training_stop_boards', stop_boards)
else:
    start_boards = np.load('/kaggle/working/data/training_start_boards.npy')
    stop_boards = np.load('/kaggle/working/data/training_stop_boards.npy')

In [None]:
# Generate or load validation data
N_valid = 12800

if REGENERATE_DATA:
    valid_data_gen = generate_train_set(N_valid, 888, min_delta=1, max_delta=1)
    deltas, valid_start_boards, valid_stop_boards = map(np.array, zip(*list(valid_data_gen)))
    # Save validation data
    np.save('/kaggle/working/valid_start_boards', valid_start_boards)
    np.save('/kaggle/working/valid_stop_boards', valid_stop_boards)
else:
    valid_start_boards = np.load('/kaggle/working/valid_start_boards.npy')
    valid_stop_boards = np.load('/kaggle/working/valid_stop_boards.npy')

# Model Trainer

In [None]:
def train(model, X, y, X_valid, y_valid, 
          optim, criterion, output_path, num_epochs=10, batch_size=128):
    # Set optimizer
    optimizer = optim(model.parameters())
    
    model.cuda()

    # Best validation MAE
    best_valid_mae = 1
    
    # Train
    n_iter = 0
    for epoch in range(num_epochs): 
        permutation = torch.randperm(X.size()[0])
        running_loss = 0.0
        pbar = tqdm(range(0, X.size()[0], batch_size))
        for i in pbar:
            n_iter += 1
            indices = permutation[i:i+batch_size]
            batch = X[indices].cuda()
            target = y[indices].cuda()
        
            optimizer.zero_grad()
            outputs = model(batch)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            
            # Calculate MAE
            if hasattr(model, "reverse_net"):
                pred_start_boards = model.reverse_net(batch)
                outputs = actual_forward((pred_start_boards > 0.5).int())
            output_boards = (outputs > 0.5).int()   
            mae = torch.sum(output_boards != target).float() / (batch_size * 25 * 25)    
            pbar.set_description("[{:d}, {:5d}] loss: {:.6f} | train MAE {:.6f} | best MAE: {:.6f}".format(epoch + 1, i + 1, loss.item(), mae, best_valid_mae))

            # Write boards and validation results to Tensorboard every 50 batches
            if n_iter % 50 == 0:
                with torch.no_grad():
                    model.eval()
                    valid_loss = 0
                    valid_mae = 0
                    m = 0
                    for j in range(0, X_valid.size()[0], batch_size):
                        m += 1
                        valid_batch = X_valid[j:j+batch_size].cuda()
                        valid_target = y_valid[j:j+batch_size].cuda()
                        valid_outputs = model(valid_batch)
                        valid_loss += criterion(valid_outputs, valid_target)
                        if hasattr(model, "reverse_net"):
                            valid_start_boards = model.reverse_net(valid_batch)
                            valid_outputs = actual_forward((valid_start_boards > 0.5).int())
                        valid_boards = (valid_outputs > 0.5).int()
                        valid_mae += torch.sum(valid_boards != valid_target).float()
                    valid_loss /= m
                    valid_mae /= (X_valid.size()[0] * 25 * 25)
                    
                if valid_mae < best_valid_mae:
                    best_valid_mae = valid_mae
                    # Save model if we have the lastest best MAE
                    torch.save(model.state_dict(), output_path)
    print("The best validation MAE: {}".format(best_valid_mae))

# Train Relaxed Forward Model

In [None]:
# Modify starting boards 
def relax_boards(boards):
    np.random.seed(41)
    return np.abs(np.random.rand(*boards.shape) / 2 - boards)

In [None]:
class RelaxedForwardNet(nn.Module):
    def __init__(self):
        super(RelaxedForwardNet, self).__init__()
        # in channels, out channels, kernel size
        self.conv0 = nn.Conv2d(1, 8, (1, 1))
        self.activ0 = nn.ReLU()
        self.conv1 = nn.Conv2d(8, 16, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ1 = nn.PReLU()
        self.conv2 = nn.Conv2d(16, 8, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ2 = nn.PReLU()
        self.conv3 = nn.Conv2d(8, 4, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ3 = nn.PReLU()
        self.conv4 = nn.Conv2d(4, 1, (3, 3), padding=(1, 1), padding_mode='circular')
        
    def forward(self, x):
        x = self.activ0(self.conv0(x))
        x = self.activ1(self.conv1(x))
        x = self.activ2(self.conv2(x))
        x = self.activ3(self.conv3(x))
        x = torch.sigmoid(self.conv4(x))
        return x

In [None]:
relaxed_forward_net = RelaxedForwardNet()
criterion = nn.BCELoss()

In [None]:
# Training data
relaxed_start_boards = relax_boards(start_boards)
X_relaxed = Variable(torch.tensor(relaxed_start_boards).view(N, 1, 25, 25).float(), requires_grad=True)
y = Variable(torch.tensor(stop_boards).view(N, 1, 25, 25).float())

In [None]:
# Validation data
relaxed_valid_start_boards = relax_boards(valid_start_boards)
X_valid_relaxed = Variable(torch.tensor(relaxed_valid_start_boards).view(N_valid, 1, 25, 25).float())
y_valid = Variable(torch.tensor(valid_stop_boards).view(N_valid, 1, 25, 25).float())

In [None]:
relaxed_forward_model_path = "/kaggle/working/relaxed_forward.pkl"

In [None]:
train(relaxed_forward_net, X_relaxed, y, X_valid_relaxed, y_valid, optim.Adam, criterion, relaxed_forward_model_path, batch_size=512, num_epochs=25)

# Train reverse-forward model

In [None]:
class ReverseNet(nn.Module):
    def __init__(self):
        super(ReverseNet, self).__init__()
        # in channels, out channels, kernel size
        self.conv0 = nn.Conv2d(1, 8, (1, 1))
        self.activ0 = nn.ReLU()
        self.conv1 = nn.Conv2d(8, 16, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ1 = nn.PReLU()
        self.conv2 = nn.Conv2d(16, 8, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ2 = nn.PReLU()
        self.conv3 = nn.Conv2d(8, 4, (3, 3), padding=(1, 1), padding_mode='circular')
        self.activ3 = nn.PReLU()
        self.conv4 = nn.Conv2d(4, 1, (3, 3), padding=(1, 1), padding_mode='circular')

    def forward(self, x):
        x = self.activ0(self.conv0(x))
        x = self.activ1(self.conv1(x))
        x = self.activ2(self.conv2(x))
        x = self.activ3(self.conv3(x))
        x = torch.sigmoid(self.conv4(x))
        return x

In [None]:
class ReverseForwardNet(nn.Module):
    def __init__(self, ForwardNet, forward_wt_path, ReverseNet):
        super(ReverseForwardNet, self).__init__()
        self.reverse_net = ReverseNet()
        # freeze the weights of the forward net
        self.forward_net = ForwardNet()
        self.forward_net.load_state_dict(torch.load(forward_wt_path))
        for param in self.forward_net.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.reverse_net(x)
        x = self.forward_net(x)
        return x

In [None]:
rf_net = ReverseForwardNet(RelaxedForwardNet, relaxed_forward_model_path, ReverseNet)
print(rf_net)

In [None]:
criterion = nn.BCELoss()

In [None]:
# Training data
X_rf = Variable(torch.tensor(stop_boards).view(N, 1, 25, 25).float(), requires_grad=True)
y_rf = y

In [None]:
# Validation data
X_valid_rf = y_valid
y_valid_rf = y_valid

In [None]:
rf_model_path = "/kaggle/working/reverse_forward.pkl"

In [None]:
train(rf_net, X_rf, y_rf, X_valid_rf, y_valid_rf, optim.Adam, criterion, rf_model_path, batch_size=256, num_epochs=10)

# Make prediction and submission

In [None]:
test_data_path = '/kaggle/input/conways-reverse-game-of-life-2020/test.csv'
submission_path = '/kaggle/working/submission.csv'

In [None]:
def get_forward_mae(model, weight_path, input_boards, output_boards, n):
    # Release CUDA memory
    torch.cuda.empty_cache()
    # Load model
    model.load_state_dict(torch.load(weight_path))
    model.cuda()
    # Convert boards to tensor
    input_boards_tensor = torch.tensor(input_boards[:n]).view(n, 1, 25, 25).float().cuda()
    output_boards_tensor = torch.tensor(output_boards[:n]).view(n, 1, 25, 25)
    with torch.no_grad():
        model.eval()
        # Make prediction
        if hasattr(model, "reverse_net"):
            predicted_start_board = model.reverse_net(input_boards_tensor)
            predicted_output_board = (actual_forward((predicted_start_board > 0.5).int()) > 0.5).int()
        else:
            predicted_output_board = (model(input_boards_tensor) > 0.5).int()
        error = torch.sum(predicted_output_board.cpu() != output_boards_tensor).float()
        # print(predicted_stop_board)
        # print(stop_boards_tensor)
        return error / (n * 25 * 25)

In [None]:
def predict(df, model):
    n = df.shape[0]
    cols = [col for col in df.columns if col.startswith('stop')]
    stop_boards = df[cols].to_numpy().reshape(n, 25, 25)
    stop_boards_tensor = Variable(torch.tensor(stop_boards).float()).cuda()
    deltas = df.delta.to_list()

    predictions = np.zeros((n, 25, 25), dtype=int)
    model.eval()
    with torch.no_grad():
        for i in tqdm(range(n)):
            prediction = model(stop_boards_tensor[i].view(1, 1, 25, 25))            
            for _ in range(deltas[i] - 1):
                prediction = model((prediction > 0.5).float())
            predictions[i] = (prediction > 0.5).view(25, 25).int().cpu()
            
    df_ = df[['id']].copy()
    predictions = predictions.reshape(n, 625)
    for i in range(625):
        df_['start_{}'.format(i)] = predictions[:,i]
    return df_

In [None]:
df = pd.read_csv(test_data_path, sep=',')
cols = [col for col in df.columns if col.startswith('stop')]
stop_boards = df[cols].to_numpy().reshape(df.shape[0], 25, 25)

In [None]:
# print("MAE on test set: {:.8f}.".format(get_forward_mae(rf_net, rf_model_path, stop_boards, stop_boards, df.shape[0])))

In [None]:
predictions = predict(df, rf_net.reverse_net)
predictions.to_csv(submission_path, index=False)