<a href="https://colab.research.google.com/github/omerhac/arc_challenge/blob/master/arc_modulerized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies


In [1]:
import numpy as np
import tensorflow as tf
import json
from google.cloud import storage
from matplotlib import pyplot as plt
from matplotlib import colors
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Conv2D, Lambda, Dense, Flatten, MaxPool2D, Input, BatchNormalization, Conv2DTranspose, UpSampling2D, Reshape
from sklearn.preprocessing import OneHotEncoder
import math

AUTO = tf.data.experimental.AUTOTUNE

In [None]:
# get repository from github
!git clone https://github.com/omerhac/arc_challenge.git

Cloning into 'arc_challenge'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K


In [None]:
# navigate to directory
%cd arc_challenge

In [None]:
# load repository dependencies
!pip install import_ipynb
import import_ipynb
import preprocess
from importlib import reload
from metrics import * # metrics module

In [None]:
## constants ##
BOARD_SIZE = (16,16) # board upperbound size
SN_BATCH_SIZE = 64
DECODER_BATCH_SIZE = 8
DENSE_REP_SIZE = 64 # dense vector represantation size


# Load data


In [None]:
%%time
training_tasks, eval_tasks, test_tasks = preprocess.load_data_from_jsons()

In [None]:
def get_siamese_dataset(training_tasks, eval_tasks, test_tasks, get_lists=False):
  """
  Creates a dataset for the siamese networks.

  Args:
  .._tasks: list of tasks.
  get_lists: bool, whether to get just the lists of pairs or the processed dataset.
  """

  # extract all_boards
  all_boards = preprocess.get_all_boards(training_tasks, eval_tasks, test_tasks)

  # binirize all boards
  all_boards_binary = [preprocess.get_binary_board(board) for board in all_boards]

  # normalize boards --> not sure if necessary
  #all_boards_binary = normalize_boards(all_boards_binary)

  # create a list of all boards augmentation data
  pair_list = []
  label_list = []

  # iterate over all boards
  for i, board in enumerate(all_boards_binary):
    board_pairs, board_labels = preprocess.get_all_pairs_reshaped(board, all_boards[i:]) # augment example. use only boards from here onward
    pair_list += board_pairs
    label_list += board_labels

  ## create dataset
  x, y = preprocess.get_dataset_from_lists(pair_list, label_list)

  if get_lists:
    return pair_list, label_list

  else:
    return x, y

In [None]:
### just for this notebook
x, y = get_siamese_dataset(training_tasks, eval_tasks, test_tasks)

# Basic residual blocks


In [None]:
def residual_encoder_block(filter_num, kernel_size, bn_moment):
  """
  A functional style residual connection convolutional block.
  """

  def block(x, filter_num, kernel_size=(3,3), bn_moment=0.9):
    # first layer
    c1 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(x)
    c1 = BatchNormalization(momentum=bn_moment)(c1)

    # second layer
    c2 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(c1)
    c2 = BatchNormalization(momentum=bn_moment)(c2)

    # third layer
    c3 = Conv2D(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(c2)
    c3 = BatchNormalization(momentum=bn_moment)(c3)

    # residual connection
    res = c1 + c3

    return res
  
  return lambda x: block(x, filter_num, kernel_size, bn_moment)

def residual_decoder_block(filter_num, kernel_size, bn_moment):
  """
  A functional style residual connection deconvolutional block.

  """
  
  def block(x, filter_num, kernel_size=(3,3), bn_moment=0.9):
    
    # first layer
    dc1 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(x)
    dc1 = BatchNormalization(momentum=bn_moment)(dc1)

    # second layer
    dc2 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(dc1)
    dc2 = BatchNormalization(momentum=bn_moment)(dc2)

    # third layer
    dc3 = Conv2DTranspose(filter_num, kernel_size=kernel_size, activation='relu', padding='same')(dc2)
    dc3 = BatchNormalization(momentum=bn_moment)(dc3)

    # residual connection
    res = dc1 + dc3

    return res

  return lambda x: block(x, filter_num, kernel_size, bn_moment)

def reshape_dense(filter_num, shape):
  """
  Reshapes and projects the dense rep size for the first decoder layer
  """
  def reshaper(x, filter_num, shape):

    project = Dense(shape[0]*shape[1]*filter_num, activation='linear')(x) # num neurons is dependent upon the number of blocks
    reshape = Reshape(target_shape=(shape[0],shape[1],filter_num))(project) # reshaping is dependent upon the number of blocks

    return reshape
  
  return lambda x: reshaper(x, filter_num, shape)
  

# Defining encoder and decoder architecture


In [None]:
def get_encoder(input_shape, b1_filters, b2_filters, b3_filters):
    """
    Creates an encoder with residual connections

    args:
    b-_filters: num of filters of the - block
    """

    x = Input(input_shape, name='x')  

    # first block
    block1 = residual_encoder_block(b1_filters, kernel_size=(3,3), bn_moment=0.9)(x)
    de_res1 = Conv2D(b1_filters, kernel_size=(3,3), activation='relu', padding='valid', name='deres1')(block1)

    # second block
    block2 = residual_encoder_block(b2_filters, kernel_size=(3,3), bn_moment=0.9)(de_res1)
    de_res2 = Conv2D(b2_filters, kernel_size=(3,3), activation='relu', padding='valid', name='deres2')(block2)

    # third block
    block3 = residual_encoder_block(b3_filters, kernel_size=(3,3), bn_moment=0.9)(de_res2)
    de_res3 = Conv2D(b3_filters, kernel_size=(3,3), activation='relu', padding='valid', name='deres3')(block3)

    # flatten
    flat = Flatten()(de_res3)

    # dense
    dense = Dense(DENSE_REP_SIZE, activation='sigmoid', name='dense_rep')(flat)
    bn_dense = BatchNormalization(momentum=0.9)(dense)

    return tf.keras.Model(inputs=x, outputs=bn_dense)


def get_decoder(b1_filters, b2_filters, b3_filters):
  """
  Creates a decoder. Maps feature vector of dim DENSE_REP_SIZE --> board. 
  Same architecture from encoder is preserved.

  args:
  b-_filters: num of fiters of the - block
  """

  # build model
  inp = Input([DENSE_REP_SIZE])

  # reshaping
  reshaped_dense = reshape_dense(b1_filters, (10, 10))(inp)

  # first_block
  up_res1 = Conv2DTranspose(b1_filters, kernel_size=(3,3), padding='valid', activation='relu')(reshaped_dense)
  block1 = residual_decoder_block(3, kernel_size=(3,3), bn_moment=0.9)(up_res1)
  
  up_res2 = Conv2DTranspose(b2_filters, kernel_size=(3,3), padding='valid', activation='relu')(block1)
  block2 = residual_decoder_block(2, kernel_size=(3,3), bn_moment=0.9)(up_res2)
  
  up_res3 = Conv2DTranspose(b3_filters, kernel_size=(3,3), padding='valid', activation='relu')(block2)
  block3 = residual_decoder_block(1, kernel_size=(3,3), bn_moment=0.9)(up_res3)

  output = Conv2D(1, kernel_size=(1,1), activation='sigmoid')(block3)

  model = tf.keras.Model(inputs=inp, outputs=output)

  # compile
  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])

  return model

# def enc dec


In [None]:
dec = get_decoder(3,2,1)
enc = get_encoder([*BOARD_SIZE, 1], 1, 2, 3)

In [None]:
enc.summary()

In [None]:
dec.summary()

## Decoder dataset


In [None]:
# create encoder dataset from the old pair and label lists
pair_list, label_list = get_siamese_dataset(training_tasks, eval_tasks, test_tasks, get_lists=True)
encoder_dataset = preprocess.get_dataset_from_lists(pair_list, label_list, for_encoder=True) 

# create decoder dataset from encoder
decoder_x, decoder_y = preprocess.get_decoder_dataset(enc, encoder_dataset)

In [None]:
def get_encoder_decoder(input_shape, b1_filters, b2_filters, b3_filters):
  """
  Create an encoder decoer "normal" architecture, with residual connections
  """
  inp = tf.keras.layers.Input([*BOARD_SIZE, 1])
  
  # encoder
  encoder = get_encoder(input_shape, b1_filters, b2_filters, b3_filters)

  # dense representation
  dense_rep = encoder(inp)

  # decoder
  decoder = get_decoder(b3_filters, b2_filters, b1_filters)
  pred_board = decoder(dense_rep)

  model = tf.keras.Model(inp, pred_board)

  # compile
  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])
  return model

def get_encoder_from_autoencoder(auto_encoder):
  """
  Get the trained decoder from the autoencoder.
  """

  return tf.keras.models.Sequential([
                                     auto_encoder.layers[0],
                                     auto_encoder.layers[1]
  ])

def get_decoder_from_autoencoder(auto_encoder):
  """
  Get the decoder from the autoencoder.
  """

  return tf.keras.models.Sequential([
                                     tf.keras.layers.Input(shape=[DENSE_REP_SIZE]), # cut some corners here.. should inherit input shape from autoencoder
                                     auto_encoder.layers[2]
  ])

In [None]:
ed = get_encoder_decoder([*BOARD_SIZE, 1], 1 ,2 ,3)

In [None]:
ed_hist = ed.fit(x=decoder_y, y=decoder_y, batch_size=8, shuffle=True, epochs=20, validation_split=0.1) # get image --> predict image

In [None]:
## check predictions

In [None]:
boards = decoder_y[-10:]

# predict
predictions = ed.predict(boards)

# reshape
boards = [plotting_shape_board(board) for board in boards]
predictions = [plotting_shape_board(board) for board in predictions]
pairs = zip(boards, predictions)

plot_decoder_boards(list(pairs))


# Interleaved training (decoder/encoder/deocoder..)

In [None]:
def copy_decoder_to_siamese(siamese, decoder):
  """
  Copys the weights from the decoder to the siamese networks model
  """

  decoder_layers = decoder.layers # get decoder layers
  siamese_layers = siamese.layers[2].layers # siamese encoder layers

  for i, layer in enumerate(decoder_layers[::-1]):
      if(2 >= len(layer.weights) > 0): # trainable layer and not BN layer
        if layer.weights[0].shape[0] < 100: # deconv layers
          decoder_layer_weigths = layer.get_weights() # get weights from decoder
          bias = np.zeros(siamese_layers[i].get_weights()[1].shape) # init new bias
          w = decoder_layer_weigths[0].transpose([1,0,2,3]) # transpose weights
          siamese_layers[i].set_weights([w, bias]) # set siamese weights

        else:
          # dense layer
          decoder_layer_weigths = layer.get_weights() # get weights from decoder
          bias = np.zeros(siamese_layers[i].get_weights()[1].shape) # init new bias
          w = decoder_layer_weigths[0].transpose() # transpose weights
          siamese_layers[i].set_weights([w, bias]) # set siamese weights


def copy_siamese_to_decoder(siamese, decoder):
  """
  Copys the weights from the siamese networks model to the decoder
  """

  decoder_layers = decoder.layers # get decoder layers
  siamese_layers = siamese.layers[2].layers # siamese encoder layers

  for i, layer in enumerate(decoder_layers[::-1]):
      if(2 >= len(layer.weights) > 0): # trainable layer and not BN layer
        if layer.weights[0].shape[0] < 100: # deconv layers
          siamese_layer_weigths = siamese_layers[i].get_weights() # get weights from siamese encoder
          bias = np.zeros(layer.get_weights()[1].shape) # init new bias
          w = siamese_layer_weigths[0].transpose([1,0,2,3]) # transpose weights
          layer.set_weights([w, bias]) # set decoder weights

        else:
          # dense layer
          siamese_layer_weigths = siamese_layers[i].get_weights() # get weights from siamese encoder
          bias = np.zeros(layer.get_weights()[1].shape) # init new bias
          w = siamese_layer_weigths[0].transpose() # transpose weights
          layer.set_weights([w, bias]) # set decoder weights


In [None]:
# create new models with same weights
EPOCHS = 30
sn = get_siamese_networks_model([*BOARD_SIZE, 1])
decoder = get_decoder(sn, copy_encoder_weights=False)

In [None]:
from tqdm import tqdm
SN_BATCH_SIZE = 64
DEC_BATCH_SIZE = 4
DATA_SPLIT = 5
SN_STEPS_PER_EPOCH = len(x) // SN_BATCH_SIZE
DEC_STEPS_PER_EPOCH = len(decoder_x) // DEC_BATCH_SIZE

# even fancier training loop
for i in range(EPOCHS):
  print("training... epoch num: {}".format(i))

  for i in range(40):
    # train siamese
    _ = sn.fit(x=x, y=y, epochs=1, batch_size=SN_BATCH_SIZE, shuffle=True, steps_per_epoch=(780//DATA_SPLIT))
    
    # copy weights
    copy_siamese_to_decoder(sn, decoder)

    # get new decoder dataset
    decoder_x, decoder_y = get_decoder_dataset(sn, encoder_dataset) # encoder dataset is the same as before

    # train decoder
    _ = decoder.fit(x=decoder_x, y=decoder_y, epochs=1, batch_size=DEC_BATCH_SIZE, shuffle=True, steps_per_epoch=(24000//DATA_SPLIT)) #### WOW!!! use smaller batch size, WOHOOO!!

    # copy weights
    copy_decoder_to_siamese(sn, decoder)

# Predicting from diffrance in boards

In [None]:
task_training_input, task_training_output, task_test_input, task_test_output = get_task_boards(training_tasks[0], pad=pad, divide_sets=True)

In [None]:
EXAMPLE = 99
# toy dataset (x is the dataset for the siamese networks)
anchor = x[0][EXAMPLE]
rotate_once_anchor = x[1][EXAMPLE + 1]
rotate_twice_anchor = x[1][EXAMPLE + 2]
rotate_three_anchor = x[1][EXAMPLE + 3]

toy = np.stack([anchor, rotate_once_anchor, rotate_twice_anchor, rotate_three_anchor])

# plot
fig, axs = plt.subplots(1,4)
plot_board(plotting_shape_board(anchor), axs[0], title='0')
plot_board(plotting_shape_board(rotate_once_anchor), axs[1], title='1')
plot_board(plotting_shape_board(rotate_twice_anchor), axs[2], title='2')
plot_board(plotting_shape_board(rotate_three_anchor), axs[3], title='3')

## define predictor

In [None]:
def get_predictor(decoder):
  """
  Builds a detector which takes a board and a rules vector and predicts output board.

  Args:
  decoder --> trained decoder 
  """

  # inputs
  board_vec = Input([DENSE_REP_SIZE])
  rules_vec = Input([DENSE_REP_SIZE])

  # predict
  transformed_board = board_vec + rules_vec
  prediction = decoder(transformed_board)

  model = tf.keras.Model(inputs=[board_vec, rules_vec], outputs=prediction)

  model.compile(loss=pixelwise_error_loss, optimizer='adam', metrics=[pixelwise_auc])

  return model

In [None]:
## encode all toy datset
encoder = get_encoder_from_autoencoder(ed)
encoded_boards = encoder.predict(toy)

# get diffrance
diff = encoded_boards[1:] - encoded_boards[:-1]

#diff = np.stack([diff[2], diff[0], diff[1]])

# predict difference
predictor = get_predictor(get_decoder_from_autoencoder(ed))
predictions = predictor.predict([encoded_boards[:-1], diff])

In [None]:
fig,axs = plt.subplots(1,3)
plot_board(plotting_shape_board(predictions[0]), axs[0])
plot_board(plotting_shape_board(predictions[1]), axs[1])
plot_board(plotting_shape_board(predictions[2]), axs[2])

In [None]:
p = dec.predict(encoded_boards)
fig,axs = plt.subplots(1,4)
plot_board(plotting_shape_board(p[0]), axs[0])
plot_board(plotting_shape_board(p[1]), axs[1])
plot_board(plotting_shape_board(p[2]), axs[2])
plot_board(plotting_shape_board(p[3]), axs[3])

In [None]:
# permutate
diffs = np.stack([diff[0], diff[1], diff[2],diff[0], diff[1], diff[2]])
eb = np.stack([encoded_boards[0], encoded_boards[0], encoded_boards[0], encoded_boards[1], encoded_boards[1], encoded_boards[1]])
t = np.stack([toys[1], toys[1], toys[1], toys[2], toys[2], toys[2]])