In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense
from tensorflow.keras.models import Sequential

# Model

In [5]:
def oneIterCNN(info_ch=64, ch=128):
    ''' 
    creates the core iterated CNN block for one delta
    '''
    loss = tf.losses.BinaryCrossentropy(from_logits=True)
    model = Sequential()
    model.add(Conv2D(filters=ch, activation='relu', kernel_size=5, padding='same', input_shape=(25, 25, info_ch)))
    model.add(Conv2D(filters=ch, activation='relu', kernel_size=3, padding='same', input_shape=(25, 25, ch)))
    model.add(Conv2D(filters=info_ch, activation='relu', kernel_size=3, padding='same', input_shape=(25, 25, ch)))
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])
    return model

In [6]:
oneIterCNN().summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 25, 25, 128)       204928    
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 25, 25, 128)       147584    
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 25, 25, 64)        73792     
Total params: 426,304
Trainable params: 426,304
Non-trainable params: 0
_________________________________________________________________


In [7]:
class OneIterCNN(tf.keras.Model):
    def __init__(self, info_ch, ch):
        super(OneIterCNN, self).__init__()
        self.conv1 = Conv2D(filters=ch, activation='relu', kernel_size=5, padding='same', input_shape=(25, 25, info_ch))
        self.conv2 = Conv2D(filters=ch, activation='relu', kernel_size=3, padding='same', input_shape=(25, 25, ch))
        self.conv3 = Conv2D(filters=info_ch, activation='relu', kernel_size=3, padding='same', input_shape=(25, 25, ch))
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.conv3(x)
        return x        

In [71]:
class IterCNN(tf.keras.Model):
    def __init__(self, info_ch=64, ch=128):
        super(IterCNN, self).__init__()
        self.encoder = Conv2D(filters=64, activation = 'relu', kernel_size=7, padding='same', input_shape=(25, 25, 1))
        self.one_iter = OneIterCNN(info_ch, ch)
        self.decoder = Conv2D(filters=1, activation='relu', kernel_size=3, padding='same', input_shape=(25, 25, 64))
        
    def call(self, inputs):
        stop, delta = inputs
        x = self.encoder(stop-0.5)
        for i in range(delta):
            y = self.one_iter(x)
            # allows for different delta in the same batch
            mask = (delta > i).reshape(-1,1,1,1)
            x = x*(~mask).float() + y*mask.float()
        x = self.decoder(x)
        return x

In [72]:
loss = tf.losses.BinaryCrossentropy(from_logits=True)

model = IterCNN()

In [73]:
model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])

# Load Data

In [18]:
# import data
df = pd.read_csv('../data/train.csv', index_col='id')
train, val = train_test_split(df, test_size=0.2, shuffle=True, random_state=42, stratify=df['delta'])

In [29]:
train

Unnamed: 0_level_0,delta,start_0,start_1,start_2,start_3,start_4,start_5,start_6,start_7,start_8,...,stop_615,stop_616,stop_617,stop_618,stop_619,stop_620,stop_621,stop_622,stop_623,stop_624
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20555,5,0,1,1,0,0,0,0,0,0,...,1,1,1,0,0,0,1,0,0,0
21476,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21220,4,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
38159,5,0,1,1,1,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
30246,3,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38726,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
47315,2,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,1,0,1
42986,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0
34080,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Process Data

In [55]:
def process_data(df: pd.DataFrame) -> (np.array, np.array, np.array):
    '''
    takes a dataframe of the same format as the csv loaded dataframe, extracts those boards, and 
    reshapes them to be the appropriate shape for CNN. 
    
    inputs: df is a pandas DataFrame with train data. 
    outputs: multi-dimensional np.arrays, train are the train stop boards, y_train are the train start boards, delta 
             is the number of steps required
    '''
    len_data = df.shape[0]
    delta = df.loc[:, 'delta'].values.astype('int32')
    stop_boards = df.loc[:, ['stop_{}'.format(i) for i in range(625)]].values.astype('float32').reshape(len_data, 25, 25, 1)
    start_boards = df.loc[:, ['start_{}'.format(i) for i in range(625)]].values.astype('float32').reshape(len_data, 25, 25, 1)
    return stop_boards, start_boards, delta

In [60]:
stop_train, start_train, delta_train = process_data(train)
stop_val, start_val, delta_val = process_data(val)

In [61]:
print(stop_train.shape)
print(start_train.shape)
print(delta_train.shape)
print(stop_val.shape)
print(start_val.shape)
print(delta_train.shape)

(40000, 25, 25, 1)
(40000, 25, 25, 1)
(40000,)
(10000, 25, 25, 1)
(10000, 25, 25, 1)
(40000,)


In [74]:
model.fit(x=(stop_train, delta_train), y=start_train, epochs=3, verbose=1)

Epoch 1/3


ValueError: in user code:

    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    <ipython-input-63-bf24d7358da9>:11 call  *
        for i in range(delta):
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/autograph/operators/py_builtins.py:352 range_  **
        return _tf_range(start_or_stop, stop, step)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/autograph/operators/py_builtins.py:370 _tf_range
        return math_ops.range(start_or_stop)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:1812 range
        return gen_math_ops._range(start, limit, delta, name=name)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py:7314 _range
        "Range", start=start, limit=limit, delta=delta, name=name)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        compute_device)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:3485 _create_op_internal
        op_def=op_def)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1975 __init__
        control_input_ops, op_def)
    /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 0 but is rank 2
    	 for 'limit' for '{{node iter_cnn_12/range}} = Range[Tidx=DT_INT32](iter_cnn_12/range/start, iter_cnn_12/Maximum, iter_cnn_12/range/delta)' with input shapes: [], [32,1], [].


In [43]:
start_train.dtype

dtype('int64')