In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import sys
import time
import json
import random
import numpy as np
import pandas as pd
import itertools
import scipy
import gc
import copy

from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib import colors as colors_mat
from scipy.ndimage import label, generate_binary_structure
from numpy.lib.stride_tricks import as_strided
from itertools import product
from skimage import measure
from scipy.spatial.distance import cdist
from scipy.signal import convolve2d
from collections import Counter

for dirname, _, filenames in os.walk('/kaggle/input'):
    print(dirname)

/kaggle/input
/kaggle/input/abstraction-and-reasoning-challenge
/kaggle/input/abstraction-and-reasoning-challenge/evaluation
/kaggle/input/abstraction-and-reasoning-challenge/test
/kaggle/input/abstraction-and-reasoning-challenge/training


In [2]:
start_time = time.time()
print("Load Initial Data ...")

data_path = Path('/kaggle/input/abstraction-and-reasoning-challenge/')
training_path = data_path / 'training'
evaluation_path = data_path / 'evaluation'
testing_path = data_path / 'test'

training_tasks = sorted(os.listdir(training_path))
evaluation_tasks = sorted(os.listdir(evaluation_path))
testing_tasks = sorted(os.listdir(testing_path))
submission = pd.read_csv(data_path / 'sample_submission.csv', index_col='output_id')

print("--- %s seconds ---" % (time.time() - start_time))

Load Initial Data ...
--- 0.0291292667388916 seconds ---


In [3]:
start_time = time.time()
print("Load Data Functions ...")

def flattener(pred):
    
    str_pred = str([row for row in pred.tolist()])
    str_pred = str_pred.replace(', ', '')
    str_pred = str_pred.replace('[[', '|')
    str_pred = str_pred.replace('][', '|')
    str_pred = str_pred.replace(']]', '|')
    
    return str_pred

def build_trainlist(task):
    
    task_data = []
    for i, t in enumerate(task["train"]):
        t_in, t_out = np.array(t["input"]).astype('uint8'), np.array(t["output"]).astype('uint8')        
        list.append(task_data, (t_in.copy(), t_out.copy()))
    
    return task_data

def build_testlist(task):
    
    task_data = []
    for i, t in enumerate(task["test"]):
        t_in = np.array(t["input"]).astype('uint8')       
        list.append(task_data, (t_in.copy()))
    
    return task_data

def load_data(p, phase=None):
    
    if phase in {'training', 'test', 'evaluation'}:
        p = data_path / phase / p
    
    task = json.loads(Path(p).read_text())
    dict_vals_to_np = lambda x: { k : np.array(v) for k, v in x.items() }
    assert set(task) == {'test', 'train'}
    res = dict(test=[], train=[])
    
    for t in task['train']:
        assert set(t) == {'input', 'output'}
        res['train'].append(dict_vals_to_np(t))
    for t in task['test']:
        res['test'].append(dict_vals_to_np(t))
        
    return res

print("--- %s seconds ---" % (time.time() - start_time))

Load Data Functions ...
--- 0.00074005126953125 seconds ---


In [4]:
start_time = time.time()
print("Load Data Files ...")

train_task_data = []
for i in range(0, 400):
    task = load_data(training_tasks[i], phase='training')
    list.append(train_task_data, task)

eval_task_data = []
for i in range(0, 400):
    task = load_data(evaluation_tasks[i], phase='evaluation')
    list.append(eval_task_data, task)

test_task_data = []
for i in range(0, 100):
    task = load_data(testing_tasks[i], phase='test')
    list.append(test_task_data, task)
    
print("--- %s seconds ---" % (time.time() - start_time))

Load Data Files ...
--- 1.4455881118774414 seconds ---


In [5]:
start_time = time.time()
print("Load Checking Functions")

cmap = colors_mat.ListedColormap(
    ['#000000', '#0074D9','#FF4136','#2ECC40','#FFDC00',
     '#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25'])
norm = colors_mat.Normalize(vmin=0, vmax=9)
num2color = ["black", "blue", "red", "green", "yellow", "gray", "magenta", "orange", "sky", "brown"]
color2num = {c: n for n, c in enumerate(num2color)}

def check_p(task, pred_func):
    
    fig_num = 0
    n = len(task["train"]) + len(task["test"])
    fig, axs = plt.subplots(3, n, figsize=(4*n,12), dpi=50)
    plt.subplots_adjust(wspace=0.3, hspace=0.3)

    # All Data for Task
    train_data = build_trainlist(task)
    test_data = build_testlist(task)
    task_data = Task(train_data, test_data)
    
    for i, t in enumerate(task["train"]):
        t_in, t_out = np.array(t["input"]).astype('uint8'), np.array(t["output"]).astype('uint8')   
        t_pred = pred_func(t_in)
        
        axs[0][fig_num].imshow(t_in, cmap=cmap, norm=norm)
        axs[0][fig_num].set_title(f'Train-{i} in')
        axs[0][fig_num].set_yticks(list(range(t_in.shape[0])))
        axs[0][fig_num].set_xticks(list(range(t_in.shape[1])))
        axs[1][fig_num].imshow(t_out, cmap=cmap, norm=norm)
        axs[1][fig_num].set_title(f'Train-{i} out')
        axs[1][fig_num].set_yticks(list(range(t_out.shape[0])))
        axs[1][fig_num].set_xticks(list(range(t_out.shape[1])))
        axs[2][fig_num].imshow(t_pred, cmap=cmap, norm=norm)
        axs[2][fig_num].set_title(f'Train-{i} pred')
        axs[2][fig_num].set_yticks(list(range(t_pred.shape[0])))
        axs[2][fig_num].set_xticks(list(range(t_pred.shape[1])))
        fig_num += 1
        
    for i, t in enumerate(task["test"]):
        t_in, t_out = np.array(t["input"]).astype('uint8'), np.array(t["output"]).astype('uint8')
        t_pred = pred_func(t_in)
        
        axs[0][fig_num].imshow(t_in, cmap=cmap, norm=norm)
        axs[0][fig_num].set_title(f'Test-{i} in')
        axs[0][fig_num].set_yticks(list(range(t_in.shape[0])))
        axs[0][fig_num].set_xticks(list(range(t_in.shape[1])))
        axs[1][fig_num].imshow(t_out, cmap=cmap, norm=norm)
        axs[1][fig_num].set_title(f'Test-{i} out')
        axs[1][fig_num].set_yticks(list(range(t_out.shape[0])))
        axs[1][fig_num].set_xticks(list(range(t_out.shape[1])))
        axs[2][fig_num].imshow(t_pred, cmap=cmap, norm=norm)
        axs[2][fig_num].set_title(f'Test-{i} pred')
        axs[2][fig_num].set_yticks(list(range(t_pred.shape[0])))
        axs[2][fig_num].set_xticks(list(range(t_pred.shape[1])))
        fig_num += 1

print("--- %s seconds ---" % (time.time() - start_time))

Load Checking Functions
--- 0.0015671253204345703 seconds ---


In [6]:
start_time = time.time()
print("Load DSL Helper Functions (Colors)")

# Ensure No Coordinate Match
def coord_match(grid1, grid2):
    return all(np.array_equal(i, j) for i, j in zip(grid1['coords'], grid2['coords']))

# Ensure No Duplicate Objects by Coordinates
def search_array(arr, arr_data):
    return next((True for elem in arr_data if coord_match(elem, arr)), False)
    
# Return Grid Color (%) Dictionary 
# Example: {0: 0.666, 1: 0.333}
def get_colors_percentages(grid):
    n_elements = grid.shape[0] * grid.shape[1]    
    unique, counts = np.unique(grid, return_counts=True)
    percentages =  dict(zip(unique, counts))
    percentages.update((x, y * 1.0 / n_elements) for x, y in percentages.items())
    return percentages

# Return Background Color in Grid
def get_background_color(grid):
    
    try:    
        background_color = 0
        cnt = np.bincount(grid.flatten())
        if cnt[0] >= 1:
            bg_color = [i + 1 for i, x in enumerate(cnt) if x == max(cnt)][0]
            if np.nonzero(cnt)[0].shape[0] >= 2:
                if max(cnt) >= (grid.shape[0] * grid.shape[1] * 0.25):
                    background_color = bg_color
        return background_color    
    
    except:
        return 0

print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Colors)
--- 0.0006289482116699219 seconds ---


In [53]:
start_time = time.time()
print("Load DSL Helper Functions (Objects)")
  
# Separate Objects by Distance
def object_distance(grid):
    
    # List of Objects
    object_list = []
    grid_copy = grid.copy()
    struct = generate_binary_structure(2, 2)
    labels, num_labels = label((grid_copy != 0), structure=struct)

    # Find Objects
    for i in range(0, num_labels):
        idx = np.column_stack(np.where(labels == i + 1))
        x_min = min([point[0] for point in idx])
        y_min = min([point[1] for point in idx])
        x_max = max([point[0] for point in idx])
        y_max = max([point[1] for point in idx])
                
        idx = []
        for i in range(x_min, x_max + 1):
            for j in range(y_min, y_max + 1):
                list.append(idx, [i, j])
              
        object_data = {}
        object_data['coords'] = idx
        object_data['obj'] = grid_copy[x_min: x_max + 1, y_min: y_max + 1]
        list.append(object_list, object_data)

    return object_list
        
# Separate Objects by Color/Distance
def object_color_distance(grid):
    
    # List of Objects
    object_list = []
    grid_copy = grid.copy()
    struct = generate_binary_structure(2, 2)
    num_objects = 0

    grid_colors = np.unique(grid_copy)
    bg_color = get_background_color(grid_copy)
    grid_colors = [color for color in grid_colors if color not in [0, bg_color]]
    
    # Find Objects
    for color in grid_colors:
        labels, num_labels = label((grid_copy == color), structure=struct)
        num_objects += num_labels
        
        for i in range(0, num_labels):
            idx = np.column_stack(np.where(labels == i + 1))
            x_min = min([point[0] for point in idx])
            y_min = min([point[1] for point in idx])
            x_max = max([point[0] for point in idx])
            y_max = max([point[1] for point in idx])
            
            idx = []
            for i in range(x_min, x_max + 1):
                for j in range(y_min, y_max + 1):
                    list.append(idx, [i, j])

            object_data = {}
            object_data['coords'] = idx
            object_data['obj'] = grid_copy[x_min: x_max + 1, y_min: y_max + 1]
            list.append(object_list, object_data)
        
    if num_objects <= 50:
        return object_list
    
    return []

# Separate Objects in Grid
def object_detection(grid):
    
    # List of Objects
    combined_objects = []
    
    # Run Object Detection (1)
    obj1 = object_distance(grid)    
    for object_ in obj1:
        if not search_array(object_, combined_objects):
            combined_objects.append(object_)

    # Run Object Detection (2)
    obj2 = object_color_distance(grid)
    for object_ in obj2:
        if not search_array(object_, combined_objects):
            combined_objects.append(object_)

    return combined_objects

print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Objects)
--- 0.0012440681457519531 seconds ---


In [110]:
start_time = time.time()
print("Load DSL Helper Functions (Attributes)")

def get_similar_attributes(attribute_list):
    
    attribute_flag = True
    for attribute_data in attribute_list[1:]:
        if attribute_list[0] != attribute_data:
            attribute_flag = False
    
    return attribute_flag

def get_different_attributes(attribute_list):
    
    attribute_flag = True
    for attribute_data in attribute_list[1:]:
        if attribute_list[0] == attribute_data:
            attribute_flag = False
    
    return attribute_flag

# Return Similarity Scores    
def get_similarities(grid1, grid2):
    
    similarities = 0
    grid1_copy = grid1.copy()
    grid2_copy = grid2.copy()    
        
    try:
        for i in range(0, grid1_copy.shape[0]):
            for j in range(0, grid1_copy.shape[1]):
                if grid1_copy[i][j] == grid2_copy[i][j]:
                    if grid2_copy[i][j] != 0:
                        similarities += 1
    except:
        pass
                
    return similarities

# Return Closest Object Mapping
def get_object_mapping(grid, grid_list):
    
    mappings = []
    
    # Find Input/Output Mappings
    for object_ in grid_list:
        grid_copy = copy.copy(grid)
        object_copy = copy.copy(object_)

        # Calculate/Modify Similarities
        similarities = get_similarities(grid_copy.grid, object_copy.grid)
        array_flag = np.array_equal(grid_copy.grid, object_copy.grid)
        shape_flag = grid_copy.grid.shape == object_copy.grid.shape
        coord_flag = all(np.array_equal(i, j) for i, j in zip(grid_copy.coords, object_copy.coords))

        if shape_flag:
            similarities += 1
        if coord_flag:
            similarities += 1
        if array_flag:
            similarities += 1
            
        # Add Mapping to List
        if similarities >= 1:
            list.append(mappings, [object_copy, similarities])                    
    
    # Sort Mappings By Similarities
    if len(mappings) >= 1:
        mappings = sorted(mappings, key=lambda x: x[1], reverse=True)
        
    return mappings
                 
# Return Attribute Ranking of Object
def get_relative_ranking(attribute, grid, grid_list):
        
    # Find Attribute Values
    relative_values = []
    for object_ in grid_list:
        attr_value = object_.attributes[attribute]
        list.append(relative_values, attr_value)
        
    # Sort Attributes by Values
    relative_values = sorted(relative_values, reverse=True)
    grid_value = grid.attributes[attribute]
    return relative_values.index(grid_value)
        
print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Attributes)
--- 0.0009763240814208984 seconds ---


In [111]:
start_time = time.time()
print("Load DSL Helper Functions (Entity)")

# Fundamental Entity (Tensors, Objects, etc). 
# Contains all Basic Methods acting on Task Samples.
class Entity():
    
    def __init__(self, object_, object_data=None):
                
        if type(object_) is dict:
            self.grid = object_["obj"]
            self.coords = object_["coords"]
            self.percentages = get_colors_percentages(self.grid)

        else:
            self.grid = object_
        
        self.object_data = []
        if object_data != None:
            for object_ in object_data:
                section = Section(object_)
                section.compute_attributes()
                list.append(self.object_data, section)
            
    def compute_attributes(self):
        
        self.attributes = {}
        self.attributes["grid_length"] = self.grid.shape[0]
        self.attributes["grid_width"] = self.grid.shape[1]
        self.attributes["grid_surface"] = self.grid.shape[0] * self.grid.shape[1]
                                 
        if len(self.object_data) >= 1:
            self.attributes["relative_length"] = get_relative_ranking("grid_length", self, self.object_data)
            self.attributes["relative_width"] = get_relative_ranking("grid_width", self, self.object_data)
            self.attributes["relative_surface"] = get_relative_ranking("grid_surface", self, self.object_data)
        
print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Entity)
--- 0.0006208419799804688 seconds ---


In [112]:
start_time = time.time()
print("Load DSL Helper Functions (Tensor)")

# Extends Entity Class
# Contains Data for Sections of Grid
class Section(Entity):
    
    def __init__(self, section_data, object_data=None):
        super().__init__(section_data, object_data=object_data)
        self.compute_attributes()
            
    def compute_attributes(self):
        super().compute_attributes()

# Extends Entity Class
# Contains Entire Data for Input/Output
class Tensor(Entity):
    
    def __init__(self, grid):
        super().__init__(grid)
        self.objects = []
        self.compute_features()
        self.compute_attributes()

    def compute_features(self):
        object_data = object_detection(self.grid)
        for object_ in object_data:
            section = Section(object_, object_data)
            section.compute_attributes()
            list.append(self.objects, section)
                
    def compute_attributes(self):
        super().compute_attributes()           

print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Tensor)
--- 0.0007569789886474609 seconds ---


In [120]:
start_time = time.time()
print("Load DSL Helper Functions (Task)")

# Fundamental Class for ALL Tasks
# Contains all Basic Methods acting on Tasks.
class Task():
    
    # Initialize Task Data
    def __init__(self, train_data, test_data, task_idx=0):
        
        # Identifying Information
        self.task_idx = task_idx
        
        # Lists of Train/Test Tensors
        self.train_tensors = []
        self.test_tensors = []
        
        # Dictionaries/Attributes for Object Mappings
        self.color_mappings = {}
        self.agent_mappings = {}
        self.color_attributes = {}
        self.agent_attributes = {}
        
        # Unique Rules that Determine Output
        self.color_rule = None
        self.agent_rule = None
        
        # Compute Train/Output Tensors
        for t_in, t_out in train_data:
            tensor_in = Tensor(t_in)
            tensor_out = Tensor(t_out)
            tensor_in.compute_attributes()
            tensor_out.compute_attributes()
            list.append(self.train_tensors, [tensor_in, tensor_out])
         
        # Compute Test Tensors
        for t_in in test_data:
            tensor_in = Tensor(t_in)
            tensor_in.compute_attributes()
            list.append(self.test_tensors, [tensor_in])
            
    def similar_attributes(self):
        
        similar_attributes = []
        for color, object_data in self.color_attributes.items():
            attributes_to_check = list(object_data[0].attributes.keys())
            for attribute in attributes_to_check:
                attribute_data = [object_.attributes[attribute] for object_ in object_data]
                attribute_flag = get_similar_attributes(attribute_data)
                if attribute_flag == True:
                    list.append(similar_attributes, attribute)
        
        return similar_attributes
          
    def different_attributes(self, similar_attributes):
                
        attribute_data = list(set(self.color_attributes))
        for attribute in similar_attributes:
            attribute_values = []
            for color, object_data in self.color_attributes.items():
                object_attributes = [object_.attributes[attribute] for object_ in object_data]
                list.append(attribute_values, object_attributes[0])
            attribute_flag = get_different_attributes(attribute_values)
            if attribute_flag == True:
                self.color_rule = attribute
                
    def object_mapping(self, t_in, t_out):

        # Determine Object Mappings
        for object_in in t_in.objects:
            object_mappings = get_object_mapping(object_in, t_out.objects)
            
            if len(object_mappings) >= 1:
                object_out = object_mappings[0][0]

                # Determine Type of Mapping
                array_flag = np.array_equal(object_in.grid, object_out.grid)
                length_flag = len(object_in.coords) == len(object_out.coords)
                coord_flag = all(np.array_equal(i, j) for i, j in zip(object_in.coords, object_out.coords))
                
                # Similar = Same Grid, Same Coords
                if array_flag and coord_flag:
                    self.color_mappings[object_in] = object_out
                    self.agent_mappings[object_in] = object_out
                # Color = Different Grid, Same Coords
                elif not array_flag and coord_flag:
                    self.color_mappings[object_in] = object_out
                # Agent = Same Grid, Different Coords
                elif array_flag and not coord_flag:    
                    self.agent_mappings[object_in] = object_out
                        
    def color_groupings(self):
                           
        for object_in, object_out in self.color_mappings.items():
            color_key = list(object_out.percentages.keys())[0]
            if color_key in self.color_attributes:
                list.append(self.color_attributes[color_key], object_in)
            else:
                self.color_attributes[color_key] = [object_in]

        attribute_data = self.similar_attributes()
        self.different_attributes(attribute_data)
        print(self.color_rule)
        
    # Compute Attribute Differences
    def compute_differences(self):
        
        # Calculate Object Mappings
        for t_in, t_out in self.train_tensors:
            if t_in.grid.shape == t_out.grid.shape:
                self.object_mapping(t_in, t_out)
        
        self.color_groupings()
                
# Fundamental Class for Generated Programs
# Contains all Basic Methods acting on Programs
class Program():
    
    # Initialize Program Data
    def __init__(self, train_data, test_data, task_idx=0):
        
        # Identifying Information
        self.task_idx = task_idx

        # Lists of Train/Test Tensors
        self.train_data = train_data
        self.test_data = test_data

        # Program Data/Accuracy
        self.functions = []
        self.similarity = 0
        self.accuracy = 0

print("--- %s seconds ---" % (time.time() - start_time))

Load DSL Helper Functions (Task)
--- 0.0011816024780273438 seconds ---


In [121]:
task_n = 9

#for task_n, task in enumerate(train_task_data):
print("Current Task: {}".format(task_n))

# Generate Task for Given Input
train_data = build_trainlist(train_task_data[task_n])
test_data = build_testlist(train_task_data[task_n])
task_data = Task(train_data, test_data, task_idx=task_n)
task_data.compute_differences()

# Generate Program to Solve Given Task
task_program = Program(train_data, test_data, task_idx=task_n)
print("")

Current Task: 9
relative_surface

