<a href="https://colab.research.google.com/github/omerhac/arc_challenge/blob/master/arc_modulerized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np
import tensorflow as tf
import json
from google.cloud import storage
from matplotlib import pyplot as plt
from matplotlib import colors
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Conv2D, Lambda, Dense, Flatten, MaxPool2D, Input, BatchNormalization, Conv2DTranspose, UpSampling2D, Reshape
from sklearn.preprocessing import OneHotEncoder
import math

AUTO = tf.data.experimental.AUTOTUNE

In [2]:
# get repository from github
!git clone https://github.com/omerhac/arc_challenge.git

Cloning into 'arc_challenge'...
remote: Enumerating objects: 168, done.[K
remote: Counting objects: 100% (168/168), done.[K
remote: Compressing objects: 100% (165/165), done.[K
remote: Total 168 (delta 73), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (168/168), 10.52 MiB | 4.96 MiB/s, done.
Resolving deltas: 100% (73/73), done.


In [3]:
# navigate to directory
%cd arc_challenge

/content/arc_challenge


In [4]:
# load repository dependencies
!pip install import_ipynb
import import_ipynb
import preprocess
from importlib import reload

reload(preprocess)

Collecting import_ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp36-none-any.whl size=2976 sha256=7d200de08ce5364952443e3a935bc3bed269229e8f4a429b0d7b579594ba6e65
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3
importing Jupyter notebook from preprocess.ipynb
importing Jupyter notebook from preprocess.ipynb


<module 'preprocess' from 'preprocess.ipynb'>

In [5]:
## constants ##
BOARD_SIZE = (16,16) # board upperbound size
SN_BATCH_SIZE = 64
DECODER_BATCH_SIZE = 8
DENSE_REP_SIZE = 64 # dense vector represantation size


# Load data


In [6]:
%%time
training_tasks, eval_tasks, test_tasks = preprocess.load_data_from_jsons()

CPU times: user 232 ms, sys: 15.1 ms, total: 247 ms
Wall time: 250 ms


In [7]:
def get_siamese_dataset(training_tasks, eval_tasks, test_tasks, get_lists=False):
  """
  Creates a dataset for the siamese networks.

  Args:
  .._tasks: list of tasks.
  get_lists: bool, whether to get just the lists of pairs or the processed dataset.
  """

  # extract all_boards
  all_boards = preprocess.get_all_boards(training_tasks, eval_tasks, test_tasks)

  # binirize all boards
  all_boards_binary = [preprocess.get_binary_board(board) for board in all_boards]

  # normalize boards --> not sure if necessary
  #all_boards_binary = normalize_boards(all_boards_binary)

  # create a list of all boards augmentation data
  pair_list = []
  label_list = []

  # iterate over all boards
  for i, board in enumerate(all_boards_binary):
    board_pairs, board_labels = preprocess.get_all_pairs_reshaped(board, all_boards[i:]) # augment example. use only boards from here onward
    pair_list += board_pairs
    label_list += board_labels

  ## create dataset
  x, y = preprocess.get_dataset_from_lists(pair_list, label_list)

  if get_lists:
    return pair_list, label_list

  else:
    return x, y

In [8]:
### just for this notebook
x, y = get_siamese_dataset(training_tasks, eval_tasks, test_tasks)

# Basic residual blocks


In [10]:
def residual_encoder_block(filter_num, kernel_size, bn_moment):
  """
  A functional style residual connection convolutional block.
  """

  def block(x, filter_num, kernel_size=(3,3), bn_moment=0.9):
    # first layer
    c1 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(x)
    c1 = BatchNormalization(momentum=bn_moment)(c1)

    # second layer
    c2 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(c1)
    c2 = BatchNormalization(momentum=bn_moment)(c2)

    # third layer
    c3 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(c2)
    c3 = BatchNormalization(momentum=bn_moment)(c3)

    # residual connection
    res = c1 + c3

    return res
  
  return lambda x: block(x, filter_num, kernel_size, bn_moment)

def residual_decoder_block(filter_num, kernel_size, bn_moment, first=False):
  """
  A functional style residual connection deconvolutional block.

  Args:
  first - bool, determains if it is the first block of a network
  """
  
  def block(x, filter_num, kernel_size=(3,3), bn_moment=0.9):
    
    # first layer
    if first:
      dense_sqrt = math.sqrt(DENSE_REP_SIZE)
      dc1 = Dense(dense_sqrt*dense_sqrt*filter_num, activation='linear')(x) # num neurons is dependent upon the number of blocks
      dc1 = Reshape(target_shape=(dense_sqrt,dense_sqrt,filter_num))(dc1) # reshaping is dependent upon the number of blocks
      
      return dc1
    
    else:
      dc1 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(x)
      dc1 = BatchNormalization(momentum=bn_moment)(dc1)

    # second layer
    dc2 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(dc1)
    dc2 = BatchNormalization(momentum=bn_moment)(dc2)

    # third layer
    dc3 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(dc2)
    dc3 = BatchNormalization(momentum=bn_moment)(dc3)

    # residual connection
    res = dc1 + dc3

    return res

  return lambda x: block(x, filter_num, kernel_size, bn_moment)

# Defining encoder and decoder architecture


# Siamese networks architecture
- getting the right architecture
- went for residual networks

## Main architecture

In [None]:
def get_encoder(input_shape):
    """
    Creates an encoder with residual connections
    """

    x = Input(input_shape, name='x')  

    # first block
    block1 = residual_encoder_block(16, kernel_size=(3,3), bn_moment=0.9)(x)
    mp1 = MaxPool2D(pool_size=(2,2), name='max_pool1')(block1)

    # second block
    block2 = residual_encoder_block(32, kernel_size=(3,3), bn_moment=0.9)(mp1)
    mp2 = MaxPool2D(pool_size=(2,2), name='max_pool2')(block2)

    # third block
    block3 = residual_encoder_block(64, kernel_size=(3,3), bn_moment=0.9)(mp2)
    mp3 = MaxPool2D(pool_size=(2,2), name='max_pool3')(block3)

    # flatten
    flat = Flatten()(mp3)

    # dense
    dense = Dense(DENSE_REP_SIZE, activation='sigmoid', name='dense_rep')(flat)
    bn_dense = BatchNormalization(momentum=0.9)(dense)

    return tf.keras.Model(inputs=x, outputs=bn_dense)

def get_siamese_networks_model(input_shape):
  """
  Creates siamese networks model. ref paper: https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf
  """

  # define input vectors
  input1 = Input(input_shape, name='input1')
  input2 = Input(input_shape, name='input2')

  # get shared encoder
  encoder = get_encoder(input_shape)
  
  # get feature vectors
  v1 = encoder(input1)
  v2 = encoder(input2)

  # compute L1 loss
  L1_Layer = Lambda(lambda tensors: tf.abs(tensors[0] - tensors[1]))
  L1_diff = L1_Layer([v1, v2])

  # compute probs
  probs = Dense(5, activation='softmax')(L1_diff)

  siamese_net = tf.keras.Model(inputs=[input1, input2], outputs=probs)

  # compile
  siamese_net.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
  return siamese_net

## Training siamese networks

In [None]:
sn = get_siamese_networks_model([*BOARD_SIZE, 1])

In [None]:
hist = sn.fit(x=x, y=y, epochs=30, batch_size=SN_BATCH_SIZE, shuffle=True, validation_split=0.2)

In [None]:
display_training_curves(hist.history, metric='categorical_accuracy', with_val=True)

# Board generator (decoder) architecture

In [None]:
def get_encoder_from_siamese(siamese):
  """
  Creates a board encoder from the siamese networks model. Maps board --> feature vector of dim DENSE_REP_SIZE
  """

  # create encoder from all layers up to dense
  model = tf.keras.models.Sequential([
                                      tf.keras.layers.InputLayer(input_shape=[*BOARD_SIZE, 1]),
                                      siamese.layers[2] # all sequential layers from siamese networks model
  ])

  return model

def get_decoder():
  """
  Creates a decoder. Maps feature vector of dim DENSE_REP_SIZE --> board. 
  Same architecture from encoder is preserved.
  """

  # build model
  inp = Input([DENSE_REP_SIZE])

  # first_block
  block1 = residual_decoder_block(64, kernel_size=(3,3), bn_moment=0.9, first=True)(inp)
  us1 = UpSampling2D(size=(2,2))(block1)

  block2 = residual_decoder_block(32, kernel_size=(3,3), bn_moment=0.9)(us1)
  us2 = UpSampling2D(size=(2,2))(block2)

  block3 = residual_decoder_block(16, kernel_size=(3,3), bn_moment=0.9)(us2)

  output = Conv2D(1, kernel_size=(1,1), activation='sigmoid')(block3)

  model = tf.keras.Model(inputs=inp, outputs=output)

  # compile
  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])

  return model


def pixelwise_error_loss(y_true, y_pred):
  """
  Custom loss function. Pixelwise error from the true image. ||y_true - y_pred||
  """

  return tf.keras.backend.sum((y_true - y_pred) ** 2) / y_pred.shape[0]


def pixelwise_accuracy(y_true, y_pred, threshold=0.8):
  """
  Custom accuracy function. Pixelwise accuracy of predicting 0 / 1
  """
  
  tp = tf.math.reduce_sum(tf.math.multiply(tf.cast((y_pred >= threshold), 'float32'), y_true)) # cast boolean to binary
  tn = tf.math.reduce_sum(tf.math.multiply(tf.cast((y_pred < threshold), 'float32'), tf.cast((y_true == 0), 'float32')))
  n_predictions = BOARD_SIZE[0] * BOARD_SIZE[1]

  return ((tp + tn) / n_predictions) / y_pred.shape[0]


def pixelwise_sensitivity(y_true, y_pred, threshold=0.8):
  """
  Pixelwise sensitivity of correctly predicting 1s. true positives / positives
  """

  tp = tf.math.reduce_sum(tf.math.multiply(tf.cast((y_pred >= threshold), 'float32'), y_true)) 
  p = tf.cast(tf.math.reduce_sum(y_true), 'float32') 
  
  return tf.cond(p > 0, lambda: tp / p, lambda: tf.constant(1, dtype='float32')) # don't divide by zero

def pixelwise_fpr(y_true, y_pred, threshold=0.8):
  """
  Pixelwise false positive rate of wrongly predicting 1s. false positives / negatives
  """

  fp = tf.math.reduce_sum(tf.math.multiply(tf.cast((y_pred >= threshold), 'float32'), tf.cast((y_true==0), 'float32'))) # cast boolean to binary
  n = tf.cast(tf.math.reduce_sum(tf.cast((y_true == 0), 'float32')), 'float32')

  return tf.cond(n > 0, lambda: fp / n, lambda: tf.constant(1, dtype='float32')) # don't divide by zero


def pixelwise_auc(y_true, y_pred, num_thresholds=10):
  """
  Pixelwise auc score. 
  """

  sensitivity = []
  fpr = []

  # compute graph points
  for threshold in np.linspace(1, 0, num=num_thresholds):
    sensitivity.append(pixelwise_sensitivity(y_true, y_pred, threshold=threshold))
    fpr.append(pixelwise_fpr(y_true, y_pred, threshold=threshold))
  
  # compute trapeze area
  trapeze = []
  for i in range(len(sensitivity)- 1):
    area = tf.multiply(sensitivity[i], fpr[i+1] - fpr[i])
    trapeze.append(area)
  
  auc_score = tf.add_n(trapeze) 

  return auc_score

In [None]:
dec = get_decoder()

## Decoder dataset


In [None]:
def get_decoder_dataset(siamese, encoder_dataset):
  """
  Creates the decoder dataset from the encoder.
  
  Args:
  siamese - siamese model to extract encoder from
  encoder dataset - boards to encoder to feature vectors

  Returns:
  x - boards feature vectors, predicted by encoder
  y - boards 
  """
  
  enc = get_encoder_from_siamese(siamese) # fetch encoder

  # build dataset
  x = enc.predict(encoder_dataset) # get feature vectors
  y = encoder_dataset # boards to recreate

  return x,y

# create encoder dataset from the old pair and label lists
pair_list, label_list = get_siamese_dataset(training_tasks, eval_tasks, test_tasks, get_lists=True)
encoder_dataset = get_dataset_from_lists(pair_list, label_list, for_encoder=True) 
# create decoder dataset from encoder
decoder_x, decoder_y = get_decoder_dataset(sn, encoder_dataset)

In [None]:
hist_decoder = dec.fit(x=decoder_x, y=decoder_y, batch_size=DECODER_BATCH_SIZE, shuffle=True, epochs=10, validation_split=0.2)

# Check regular encoder-decoder architecture
## For checking some hyperparameters and deciding which adecoder architecture to use

## Some takeaways:
- adding two sets of randomly padded data helps alot. more doesnt help
- small decoder batch size (4-16) really helps
- adding more neurons to the compressed represantation (512 --> 1024) really helps
- removing the last layers batch normalization really helps
 - removing bn before compressed representation isnt good
- penelizing mistaking 1's for 0's more doesnt help at all
- changing last layer activation to linear and clipping to [0,1] helps to some extent
- chagning middle to relu - really bad!!!
- addind another conv-deconv layer at the beggining helps a little
- adding another conv-deconv layer at the end doesnt help at all
- residual layers - really cool! used the architecture from this paper -- >https://github.com/omerhac/arc_challenge/blob/master/deep%20residual%20conv-deconv%20network.pdf
- more types of data augmentation....


In [None]:
def get_encoder_decoder(input_shape):
  """
  Create an encoder decoer "normal" architecture, with residual connections
  """
  inp = tf.keras.layers.Input([*BOARD_SIZE, 1])
  
  # encoder
  encoder = get_encoder(input_shape)

  # dense representation
  dense_rep = encoder(inp)

  # decoder
  decoder = get_decoder()
  pred_board = decoder(dense_rep)

  model = tf.keras.Model(inp, pred_board)

  # compile
  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])
  return model

def get_encoder_from_autoencoder(auto_encoder):
  """
  Get the trained decoder from the autoencoder.
  """

  return tf.keras.models.Sequential([
                                     auto_encoder.layers[0],
                                     auto_encoder.layers[1]
  ])

def get_decoder_from_autoencoder(auto_encoder):
  """
  Get the decoder from the autoencoder.
  """

  return tf.keras.models.Sequential([
                                     tf.keras.layers.Input(shape=[DENSE_REP_SIZE]), # cut some corners here.. should inherit input shape from autoencoder
                                     auto_encoder.layers[2]
  ])

In [None]:
ed = get_encoder_decoder([*BOARD_SIZE, 1])

In [None]:
ed_hist = ed.fit(x=decoder_y, y=decoder_y, batch_size=8, shuffle=True, epochs=20, validation_split=0.2) # get image --> predict image

In [None]:
## check predictions

In [None]:
boards = decoder_y[-10:]

# predict
predictions = ed.predict(boards)

# reshape
boards = [plotting_shape_board(board) for board in boards]
predictions = [plotting_shape_board(board) for board in predictions]
pairs = zip(boards, predictions)

plot_decoder_boards(list(pairs))


# Interleaved training (decoder/encoder/deocoder..)

In [None]:
def copy_decoder_to_siamese(siamese, decoder):
  """
  Copys the weights from the decoder to the siamese networks model
  """

  decoder_layers = decoder.layers # get decoder layers
  siamese_layers = siamese.layers[2].layers # siamese encoder layers

  for i, layer in enumerate(decoder_layers[::-1]):
      if(2 >= len(layer.weights) > 0): # trainable layer and not BN layer
        if layer.weights[0].shape[0] < 100: # deconv layers
          decoder_layer_weigths = layer.get_weights() # get weights from decoder
          bias = np.zeros(siamese_layers[i].get_weights()[1].shape) # init new bias
          w = decoder_layer_weigths[0].transpose([1,0,2,3]) # transpose weights
          siamese_layers[i].set_weights([w, bias]) # set siamese weights

        else:
          # dense layer
          decoder_layer_weigths = layer.get_weights() # get weights from decoder
          bias = np.zeros(siamese_layers[i].get_weights()[1].shape) # init new bias
          w = decoder_layer_weigths[0].transpose() # transpose weights
          siamese_layers[i].set_weights([w, bias]) # set siamese weights


def copy_siamese_to_decoder(siamese, decoder):
  """
  Copys the weights from the siamese networks model to the decoder
  """

  decoder_layers = decoder.layers # get decoder layers
  siamese_layers = siamese.layers[2].layers # siamese encoder layers

  for i, layer in enumerate(decoder_layers[::-1]):
      if(2 >= len(layer.weights) > 0): # trainable layer and not BN layer
        if layer.weights[0].shape[0] < 100: # deconv layers
          siamese_layer_weigths = siamese_layers[i].get_weights() # get weights from siamese encoder
          bias = np.zeros(layer.get_weights()[1].shape) # init new bias
          w = siamese_layer_weigths[0].transpose([1,0,2,3]) # transpose weights
          layer.set_weights([w, bias]) # set decoder weights

        else:
          # dense layer
          siamese_layer_weigths = siamese_layers[i].get_weights() # get weights from siamese encoder
          bias = np.zeros(layer.get_weights()[1].shape) # init new bias
          w = siamese_layer_weigths[0].transpose() # transpose weights
          layer.set_weights([w, bias]) # set decoder weights


In [None]:
# create new models with same weights
EPOCHS = 30
sn = get_siamese_networks_model([*BOARD_SIZE, 1])
decoder = get_decoder(sn, copy_encoder_weights=False)

In [None]:
from tqdm import tqdm
SN_BATCH_SIZE = 64
DEC_BATCH_SIZE = 4
DATA_SPLIT = 5
SN_STEPS_PER_EPOCH = len(x) // SN_BATCH_SIZE
DEC_STEPS_PER_EPOCH = len(decoder_x) // DEC_BATCH_SIZE

# even fancier training loop
for i in range(EPOCHS):
  print("training... epoch num: {}".format(i))

  for i in range(40):
    # train siamese
    _ = sn.fit(x=x, y=y, epochs=1, batch_size=SN_BATCH_SIZE, shuffle=True, steps_per_epoch=(780//DATA_SPLIT))
    
    # copy weights
    copy_siamese_to_decoder(sn, decoder)

    # get new decoder dataset
    decoder_x, decoder_y = get_decoder_dataset(sn, encoder_dataset) # encoder dataset is the same as before

    # train decoder
    _ = decoder.fit(x=decoder_x, y=decoder_y, epochs=1, batch_size=DEC_BATCH_SIZE, shuffle=True, steps_per_epoch=(24000//DATA_SPLIT)) #### WOW!!! use smaller batch size, WOHOOO!!

    # copy weights
    copy_decoder_to_siamese(sn, decoder)

# Predicting from diffrance in boards

In [None]:
task_training_input, task_training_output, task_test_input, task_test_output = get_task_boards(training_tasks[0], pad=pad, divide_sets=True)

In [None]:
EXAMPLE = 99
# toy dataset (x is the dataset for the siamese networks)
anchor = x[0][EXAMPLE]
rotate_once_anchor = x[1][EXAMPLE + 1]
rotate_twice_anchor = x[1][EXAMPLE + 2]
rotate_three_anchor = x[1][EXAMPLE + 3]

toy = np.stack([anchor, rotate_once_anchor, rotate_twice_anchor, rotate_three_anchor])

# plot
fig, axs = plt.subplots(1,4)
plot_board(plotting_shape_board(anchor), axs[0], title='0')
plot_board(plotting_shape_board(rotate_once_anchor), axs[1], title='1')
plot_board(plotting_shape_board(rotate_twice_anchor), axs[2], title='2')
plot_board(plotting_shape_board(rotate_three_anchor), axs[3], title='3')

## define predictor

In [None]:
def get_predictor(decoder):
  """
  Builds a detector which takes a board and a rules vector and predicts output board.

  Args:
  decoder --> trained decoder 
  """

  # inputs
  board_vec = Input([DENSE_REP_SIZE])
  rules_vec = Input([DENSE_REP_SIZE])

  # predict
  transformed_board = board_vec + rules_vec
  prediction = decoder(transformed_board)

  model = tf.keras.Model(inputs=[board_vec, rules_vec], outputs=prediction)

  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])

  return model

In [None]:
## encode all toy datset
encoder = get_encoder_from_autoencoder(ed)
encoded_boards = encoder.predict(toy)

# get diffrance
diff = encoded_boards[1:] - encoded_boards[:-1]

#diff = np.stack([diff[2], diff[0], diff[1]])

# predict difference
predictor = get_predictor(get_decoder_from_autoencoder(ed))
predictions = predictor.predict([encoded_boards[:-1], diff])

In [None]:
fig,axs = plt.subplots(1,3)
plot_board(plotting_shape_board(predictions[0]), axs[0])
plot_board(plotting_shape_board(predictions[1]), axs[1])
plot_board(plotting_shape_board(predictions[2]), axs[2])

In [None]:
p = dec.predict(encoded_boards)
fig,axs = plt.subplots(1,4)
plot_board(plotting_shape_board(p[0]), axs[0])
plot_board(plotting_shape_board(p[1]), axs[1])
plot_board(plotting_shape_board(p[2]), axs[2])
plot_board(plotting_shape_board(p[3]), axs[3])

In [None]:
# permutate
diffs = np.stack([diff[0], diff[1], diff[2],diff[0], diff[1], diff[2]])
eb = np.stack([encoded_boards[0], encoded_boards[0], encoded_boards[0], encoded_boards[1], encoded_boards[1], encoded_boards[1]])
t = np.stack([toys[1], toys[1], toys[1], toys[2], toys[2], toys[2]])