- make genetic algorithm

In [1]:
import os
import json
import random
import inspect
import itertools
import numpy as np
import pandas as pd
from pathlib import Path
from skimage import measure
from matplotlib import colors
import matplotlib.pyplot as plt
from numpy.lib.stride_tricks import as_strided

data_path = Path('/kaggle/input/abstraction-and-reasoning-challenge/')
training_path = data_path / 'training'
evaluation_path = data_path / 'evaluation'
test_path = data_path / 'test'
training_tasks = sorted(os.listdir(training_path))
evaluation_tasks = sorted(os.listdir(evaluation_path))
test_tasks = sorted(os.listdir(test_path))

num2color = ["black", "blue", "red", "green", "yellow", "gray", "magenta", "orange", "sky", "brown"]
color2num = {c: n for n, c in enumerate(num2color)}

In [2]:
def create_df(folder_path):
    task_names_list = sorted(os.listdir(folder_path))
    task_list = []
    for task_name in task_names_list: 
        task_file = str(folder_path / task_name)
        with open(task_file, 'r') as f:
            task = json.load(f)
            task_list.append(task)
    
    df = pd.DataFrame()
    df['task_name'] = task_names_list
    df['task'] = task_list
    df['number_of_train_pairs'] = df['task'].apply(lambda x: len(x['train']))
    df['number_of_test_pairs'] = df['task'].apply(lambda x: len(x['test']))
    
    # Compare image sizes
    df['inputs_all_have_same_height'] = df['task'].apply(
        lambda task: int(len(set([len(example['input']) for example in task['train']])) == 1)
    )
    df['inputs_all_have_same_width'] = df['task'].apply(
        lambda task: int(len(set([len(example['input'][0]) for example in task['train']])) == 1)
    )
    df['inputs_all_have_same_shape'] = df['inputs_all_have_same_height'] * df['inputs_all_have_same_width']
    df['input_height_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['input'])
                     if (len(set([len(example['input']) for example in task['train']])) == 1)
                     else np.nan
    )
    df['input_width_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['input'][0])
                     if (len(set([len(example['input'][0]) for example in task['train']])) == 1)
                     else np.nan
    )
    df['outputs_all_have_same_height'] = df['task'].apply(
        lambda task: int(len(set([len(example['output']) for example in task['train']])) == 1)
    )
    df['outputs_all_have_same_width'] = df['task'].apply(
        lambda task: int(len(set([len(example['output'][0]) for example in task['train']])) == 1)
    )
    df['outputs_all_have_same_shape'] = df['outputs_all_have_same_height'] * df['outputs_all_have_same_width']
    df['output_height_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['output'])
                     if (len(set([len(example['output']) for example in task['train']])) == 1)
                     else np.nan
    )
    df['output_width_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['output'][0])
                     if (len(set([len(example['output'][0]) for example in task['train']])) == 1)
                     else np.nan
    )  
    df['in_each_pair_shape_doesnt_change'] = df['task'].apply(
        lambda task: np.prod([int(len(example['input'][0])==len(example['output'][0])
                                  and len(example['input'])==len(example['output'])
                                 ) for example in task['train']
                            ])
    )
    df['in_each_pair_shape_ratio_is_the_same'] = df['task'].apply(
        lambda task: (len(set([len(example['input'][0]) / len(example['output'][0])
                                 for example in task['train']]))==1) * (
                      len(set([len(example['input']) / len(example['output'])
                                 for example in task['train']]))==1)
    )
    df['o/i_height_ratio_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['output']) / len(task['train'][0]['input'])
                     if (len(set([len(example['input']) / len(example['output'])
                                 for example in task['train']]))==1)
                     else np.nan
    )
    df['o/i_width_ratio_if_constant'] = df['task'].apply(
        lambda task: len(task['train'][0]['output'][0]) / len(task['train'][0]['input'][0])
                     if (len(set([len(example['input'][0]) / len(example['output'][0])
                                 for example in task['train']]))==1)
                     else np.nan
    )
    
    # my idea ---------
    df["same_color_sum"] = df['task'].apply(lambda task: 
                        np.all([int(sum(sum(np.array(example['input'])))== sum(sum(np.array(example['output'])))) for example in task['train']]))
    
    df["same_color_sum_in_edge"] = df['task'].apply(lambda task: 
                        np.all([int(sum(np.array(example['input'])[0,:]) +sum(np.array(example['input'])[:,0]) + 
                                    sum(np.array(example['input'])[-1,:]) +sum(np.array(example['input'])[:,-1])
                                    == 
                                    sum(np.array(example['output'])[0,:]) +sum(np.array(example['output'])[:,0]) + 
                                    sum(np.array(example['output'])[-1,:]) +sum(np.array(example['output'])[:,-1])) for example in task['train']]))
    
    df["io_color_kind_diff"] = df['task'].apply(lambda task: [len(np.unique(np.array(example['input']))) - len(np.unique(np.array(example['output']))) for example in task['train']])
    df["io_color_kind_diff_constant"] = df['io_color_kind_diff'].apply(lambda task: np.unique(np.array(task))[0] if len(np.unique(np.array(task)))==1 else -1)
    df["output_not_include_0"] = df['task'].apply(lambda task: np.all([np.all(np.array(example['output']) > 0) for example in task['train']]))
    df["increase_color_sum"] = df['task'].apply(lambda task: 
                        np.all([int(sum(sum(np.array(example['input']))) < sum(sum(np.array(example['output'])))) for example in task['train']]))
    df["decrease_color_sum"] = df['task'].apply(lambda task: 
                        np.all([int(sum(sum(np.array(example['input']))) > sum(sum(np.array(example['output'])))) for example in task['train']]))
    

    return df

training_descriptive_df = create_df(training_path)
evaluation_descriptive_df = create_df(evaluation_path)
test_descriptive_df = create_df(test_path)

In [3]:
def classification(row):
    # same shape and same color sum → xgboost
    if row["in_each_pair_shape_doesnt_change"] == 1 and row["o/i_height_ratio_if_constant"] ==1 and row["o/i_width_ratio_if_constant"]==1 and row.same_color_sum==1:
        return 1
    # same shape and increase color sum and include black in output　→ xgboost
    elif row["in_each_pair_shape_doesnt_change"] == 1 and row["o/i_height_ratio_if_constant"] ==1 and row["o/i_width_ratio_if_constant"]==1 and row.increase_color_sum==1 and row.output_not_include_0 == 0:
        return 2
    # same shape and incrase color sum and no black in output
    elif row["in_each_pair_shape_doesnt_change"] == 1 and row["o/i_height_ratio_if_constant"] ==1 and row["o/i_width_ratio_if_constant"]==1 and row.increase_color_sum==1 and row.output_not_include_0 == 1:
        return 3
    # same shape and decrease color sum → xgboost
    elif row["in_each_pair_shape_doesnt_change"] == 1 and row["o/i_height_ratio_if_constant"] ==1 and row["o/i_width_ratio_if_constant"]==1 and row.decrease_color_sum==1:
        return 4
    # different shape and decrease color sum
    elif row["in_each_pair_shape_doesnt_change"] == 0 and row.decrease_color_sum==1:
        return 5
    # different shape and increase color sum
    elif row["in_each_pair_shape_doesnt_change"] == 0 and row.increase_color_sum==1:
        return 6
    # different shape and same color sum
    elif row["in_each_pair_shape_doesnt_change"] == 0 and row.same_color_sum==1:
        return 7
    # otherwise
    else:
        return 8
training_descriptive_df["class"] = training_descriptive_df.apply(lambda x: classification(x), axis=1)
evaluation_descriptive_df["class"] = evaluation_descriptive_df.apply(lambda x: classification(x), axis=1)
test_descriptive_df["class"] = test_descriptive_df.apply(lambda x: classification(x), axis=1)

# Domain Specific Language (DSL)

We will build a domain specific language specialized on processing list of images. To allow easy chaining of keyword from this language together, each *function* provided by this language will be take one or more images and transform it to none, one or more. The final result of our program will then be a list of images.

The DSL is so constituted by a collection of functions of type `np.array -> [np.array]` and `[np.array] -> [np.array]`.

The first kind of function take an image, and produce a list of images (for example, the image split by different colors). The second type of function take a list of images and produce a new list (for exemple, intersect).
[](http://)

In [4]:
def neighbours(cur_row, cur_col, nrows, ncols): # function for pickup objects function
    if cur_row==0: top = -1
    else: top = [cur_row-1,cur_col]
    if cur_row==nrows-1: bottom = -1
    else: bottom = [cur_row+1, cur_col]
    if cur_col==0: left = -1
    else: left = [cur_row,cur_col-1]
    if cur_col==ncols-1: right = -1
    else: right = [cur_row, cur_col+1]
    if cur_row == 0 or cur_col == ncols-1: tr = -1
    else: tr = [cur_row-1, cur_col+1]
    if cur_row == 0 or cur_col == 0: tl = -1
    else: tl = [cur_row-1, cur_col-1]
    if cur_row == nrows-1 or cur_col == ncols-1: br = -1
    else: br = [cur_row+1, cur_col+1]
    if cur_row == nrows-1 or cur_col == 0: bl = -1
    else: bl = [cur_row+1, cur_col-1]
    ans = []
    for i in [top, bottom, left, right, tr, tl, br, bl]:
        if i != -1:
            ans.append(i)
    return ans

def make_group(loc, h, w): # function for pickup objects function
    ans = [loc[0].tolist()]
    check_list = [loc[0].tolist()]
    remain = loc[1:].tolist()
    while True:
        check_ele = check_list[0]
        check_list.remove(check_ele)
        neigh = neighbours(check_ele[0], check_ele[1], h, w)
        for i in neigh:
            if i in remain:
                ans.append(i)
                check_list.append(i)
                remain.remove(i)
        if len(check_list) == 0:
            break
    return ans, np.array(remain)

def find_inside(points): # general function
    x_s = np.unique(np.array(points)[:,0]).tolist()
    inside = []
    for i in x_s:
        indexes = np.where(np.array(points)[:,0]==i)[0].tolist()
        y_s = [points[j][1] for j in indexes]
        if max(y_s) - min(y_s) >1:
            for k in range(min(y_s)+1,max(y_s)):
                inside.append([i,k])
    return inside

# for tile and symmetry -------
def in_out_diff(t_in, t_out):
    x_in, y_in = t_in.shape
    x_out, y_out = t_out.shape
    diff = np.zeros((max(x_in, x_out), max(y_in, y_out)))
    diff[:x_in, :y_in] -= t_in
    diff[:x_out, :y_out] += t_out
    return diff

def check_symmetric(a):
    try:
        sym = 1
        if np.array_equal(a, a.T):
            sym *= 2 #Check main diagonal symmetric (top left to bottom right)
        if np.array_equal(a, np.flip(a).T):
            sym *= 3 #Check antidiagonal symmetric (top right to bottom left)
        if np.array_equal(a, np.flipud(a)):
            sym *= 5 # Check horizontal symmetric of array
        if np.array_equal(a, np.fliplr(a)):
            sym *= 7 # Check vertical symmetric of array
        return sym
    except:
        return 0
    
def bbox(a):
    try:
        r = np.any(a, axis=1)
        c = np.any(a, axis=0)
        rmin, rmax = np.where(r)[0][[0, -1]]
        cmin, cmax = np.where(c)[0][[0, -1]]
        return rmin, rmax, cmin, cmax
    except:
        return 0,a.shape[0],0,a.shape[1]
    
def cmask(t_in):
    cmin = 999
    cm = 0
    for c in range(10):
        t = t_in.copy().astype('int8')
        t[t==c],t[t>0],t[t<0]=-1,0,1
        b = bbox(t)
        a = (b[1]-b[0])*(b[3]-b[2])
        s = (t[b[0]:b[1],b[2]:b[3]]).sum()
        if a>2 and a<cmin and s==a:
            cmin=a
            cm=c
    return cm

def mask_rect(a):
    r,c = a.shape
    m = a.copy().astype('uint8')
    for i in range(r-1):
        for j in range(c-1):
            if m[i,j]==m[i+1,j]==m[i,j+1]==m[i+1,j+1]>=1:m[i,j]=2
            if m[i,j]==m[i+1,j]==1 and m[i,j-1]==2:m[i,j]=2
            if m[i,j]==m[i,j+1]==1 and m[i-1,j]==2:m[i,j]=2
            if m[i,j]==1 and m[i-1,j]==m[i,j-1]==2:m[i,j]=2
    m[m==1]=0
    return (m==2)

def crop_min(t_in):
    try:
        b = np.bincount(t_in.flatten(),minlength=10)
        c = int(np.where(b==np.min(b[np.nonzero(b)]))[0])
        coords = np.argwhere(t_in==c)
        x_min, y_min = coords.min(axis=0)
        x_max, y_max = coords.max(axis=0)
        return t_in[x_min:x_max+1, y_min:y_max+1]
    except:
        return t_in
    
def call_pred_train(t_in, t_out, pred_func):
    try:
        feat = {}
        feat['s_out'] = t_out.shape
        if t_out.shape==t_in.shape:
            diff = in_out_diff(t_in,t_out)
            feat['diff'] = diff
            feat['cm'] = t_in[diff!=0].max()
        else:
            feat['diff'] = (t_in.shape[0]-t_out.shape[0],t_in.shape[1]-t_out.shape[1])
            feat['cm'] = cmask(t_in)
        feat['sym'] = check_symmetric(t_out)
        args = inspect.getfullargspec(pred_func).args
        if len(args)==1:
            return pred_func(t_in)
        elif len(args)==2:
            t_pred = pred_func(t_in,feat[args[1]])    
        elif len(args)==3:
            t_pred = pred_func(t_in,feat[args[1]],feat[args[2]])
        feat['sizeok'] = len(t_out)==len(t_pred)
        t_pred = np.resize(t_pred,t_out.shape)
        return t_pred, feat
    except:
        return t_in, feat

def call_pred_test(t_in, pred_func, feat):
    args = inspect.getfullargspec(pred_func).args
    if len(args)==1:
        return pred_func(t_in)
    elif len(args)==2:
        t_pred = pred_func(t_in,feat[args[1]]) 
    elif len(args)==3:
        t_pred = pred_func(t_in,feat[args[1]],feat[args[2]])
    return t_pred

def get_tile(img ,mask):
    try:
        m,n = img.shape
        a = img.copy().astype('int8')
        a[mask] = -1
        r=c=0
        for x in range(n):
            if np.count_nonzero(a[0:m,x]<0):continue
            for r in range(2,m):
                if 2*r<m and (a[0:r,x]==a[r:2*r,x]).all():break
            if r<m:break
            else: r=0
        for y in range(m):
            if np.count_nonzero(a[y,0:n]<0):continue
            for c in range(2,n):
                if 2*c<n and (a[y,0:c]==a[y,c:2*c]).all():break
            if c<n:break
            else: c=0
        if c>0:
            for x in range(n-c):
                if np.count_nonzero(a[:,x]<0)==0:
                    a[:,x+c]=a[:,x]
                elif np.count_nonzero(a[:,x+c]<0)==0:
                    a[:,x]=a[:,x+c]
        if r>0:
            for y in range(m-r):
                if np.count_nonzero(a[y,:]<0)==0:
                    a[y+r,:]=a[y,:]
                elif np.count_nonzero(a[y+r,:]<0)==0:
                    a[y,:]=a[y+r,:]
        return a[r:2*r,c:2*c]
    except:
        return a[0:1,0:1]
    
def patch_image(t_in,s_out,cm=0):
    try:
        t = t_in.copy()
        ty,tx=t.shape
        if cm>0:
            m = mask_rect(t==cm)
        else:
            m = (t==cm)   
        tile = get_tile(t ,m)
        if tile.size>2 and s_out==t.shape:
            rt = np.tile(tile,(1+ty//tile.shape[0],1+tx//tile.shape[1]))[0:ty,0:tx]
            if (rt[~m]==t[~m]).all():
                return rt
        for i in range(6):
            m = (t==cm)
            t -= cm
            if tx==ty:
                a = np.maximum(t,t.T)
                if (a[~m]==t[~m]).all():t=a.copy()
                a = np.maximum(t,np.flip(t).T)
                if (a[~m]==t[~m]).all():t=a.copy()
            a = np.maximum(t,np.flipud(t))
            if (a[~m]==t[~m]).all():t=a.copy()
            a = np.maximum(t,np.fliplr(t))
            if (a[~m]==t[~m]).all():t=a.copy()
            t += cm
            m = (t==cm)
            lms = measure.label(m.astype('uint8'))
            for l in range(1,lms.max()+1):
                lm = np.argwhere(lms==l)
                lm = np.argwhere(lms==l)
                x_min = max(0,lm[:,1].min()-1)
                x_max = min(lm[:,1].max()+2,t.shape[0])
                y_min = max(0,lm[:,0].min()-1)
                y_max = min(lm[:,0].max()+2,t.shape[1])
                gap = t[y_min:y_max,x_min:x_max]
                sy,sx=gap.shape
                if i==1:
                    sy//=2
                    y_max=y_min+sx
                gap = t[y_min:y_max,x_min:x_max]
                sy,sx=gap.shape
                allst = as_strided(t, shape=(ty,tx,sy,sx),strides=2*t.strides)    
                allst = allst.reshape(-1,sy,sx)
                allst = np.array([a for a in allst if np.count_nonzero(a==cm)==0])
                gm = (gap!=cm)
                for a in allst:
                    if sx==sy:
                        fpd = a.T
                        fad = np.flip(a).T
                        if i==1:gm[sy-1,0]=gm[0,sx-1]=False
                        if (fpd[gm]==gap[gm]).all():
                            gm = (gap!=cm)
                            np.putmask(gap,~gm,fpd)
                            t[y_min:y_max,x_min:x_max] = gap
                            break
                        if i==1:gm[0,0]=gm[sy-1,sx-1]=False
                        if (fad[gm]==gap[gm]).all():
                            gm = (gap!=cm)
                            np.putmask(gap,~gm,fad)
                            t[y_min:y_max,x_min:x_max] = gap
                            break 
                    fud = np.flipud(a)
                    flr = np.fliplr(a)
                    if i==1:gm[sy-1,0]=gm[0,sx-1]=gm[0,0]=gm[sy-1,sx-1]=False
                    if (a[gm]==gap[gm]).all():
                        gm = (gap!=cm)
                        np.putmask(gap,~gm,a)
                        t[y_min:y_max,x_min:x_max] = gap
                        break
                    elif (fud[gm]==gap[gm]).all():
                        gm = (gap!=cm)
                        np.putmask(gap,~gm,fud)
                        t[y_min:y_max,x_min:x_max] = gap
                        break
                    elif (flr[gm]==gap[gm]).all():
                        gm = (gap!=cm)
                        np.putmask(gap,~gm,flr)
                        t[y_min:y_max,x_min:x_max] = gap
                        break
        if s_out==t.shape:
            return t
        else:
            m = (t_in==cm)
            return np.resize(t[m],crop_min(m).shape)
    except:
        return []

## DSL Implementation

We start with the functions that take *one image* and produce an *a list of images*.](http://)

In [5]:
# np.array -> [np.array]
def groupByColor_unlifted(pixmap):
    """ Split an image into a collection of images with unique color """
    # Count the number of colors
    nb_colors = int(pixmap.max()) + 1
    # Create a pixmap for each color
    splited = [(pixmap == i) * i for i in range(1, nb_colors)]
    # Filter out empty images
    return [x for x in splited if np.any(x)]

# np.array -> [np.array]
def cropToContent_unlifted(pixmap):
    """ Crop an image to fit exactly the non 0 pixels """
    # Op argwhere will give us the coordinates of every non-zero point
    true_points = np.argwhere(pixmap)
    if len(true_points) == 0:
        return []
    # Take the smallest points and use them as the top left of our crop
    top_left = true_points.min(axis=0)
    # Take the largest points and use them as the bottom right of our crop
    bottom_right = true_points.max(axis=0)
    # Crop inside the defined rectangle
    pixmap = pixmap[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1]
    return [pixmap]

# np.array -> [np.array]
def splitH_unlifted(pixmap):
    """ Split horizontally an image """
    h = pixmap.shape[0]
    if h % 2 == 1:
        h = h // 2
        return [pixmap[:h,:], pixmap[h+1:,:]]
    else:
        h = h // 2
        return [pixmap[:h,:], pixmap[h:,:]]

# np.array -> [np.array]
def negative_unlifted(pixmap):
    """ Compute the negative of an image (and conserve the color) """
    negative = np.logical_not(pixmap).astype(int)
    color = max(pixmap.max(), 1)
    return [negative * color]

# np.array -> [np.array]
def rotation_unlifted(pixmap):
    """ Compute an image with rotation"""
        #90度反時計回り
    ans1 = np.rot90(np.array(pixmap))
    #180度反時計回り
    ans2 = np.rot90(np.array(pixmap), 2) 
    #270度反時計回り
    ans3 = np.rot90(np.array(pixmap), 3)
    return [ans1, ans2, ans3]
        
def unrotation_unlifted(pixmap):
    #90度時計回り
    ans4 = np.rot90(np.array(pixmap),-1)
    #180度時計回り
    ans5 = np.rot90(np.array(pixmap), -2)
    #270度時計回り
    ans6 = np.rot90(np.array(pixmap), -3) 
    return [ans4, ans5, ans6]

# np.array -> [np.array]
def flip_unlifted(pixmap):
    """ Compute an image with flip"""
    #左右反転
    ans1 = np.fliplr(np.array(pixmap))
    #上下反転
    ans2 = np.flipud(np.array(pixmap))
    #転置
    ans3 = np.transpose(np.array(pixmap))
    return [ans1, ans2, ans3]    

# np.array -> [np.array]
def splitV_unlifted(pixmap):
    """ Split horizontally an image """
    v = pixmap.shape[1]
    if v % 2 == 1:
        v = v // 2
        return [pixmap[:,:v], pixmap[:,v+1:]]
    else:
        v = v // 2
        return [pixmap[:,:v], pixmap[:,v:]]

def copy_horizontal_unlifted(pixmap):
    """ copy picture and add horizontally """
    try: 
        return [pixmap.repeat(2, axis=0)]
    except:
        return []
    
def copy_vertical_unlifted(pixmap):
    """ copy picture and add vertically """
    try:
        return [pixmap.repeat(2, axis=1)]
    except:
        return []

def crop_inside_unlifted(pixmap):
    """ Crop inside an image to fit exactly the non 0 pixels """
    # Op argwhere will give us the coordinates of every non-zero point
    try:
        true_points = np.argwhere(pixmap)
        if len(true_points) == 0:
            return []
        # Take the smallest points and use them as the top left of our crop
        top_left = true_points.min(axis=0)
        # Take the largest points and use them as the bottom right of our crop
        bottom_right = true_points.max(axis=0)
        # Crop inside the defined rectangle
        pixmap = pixmap[top_left[0]+1:bottom_right[0], top_left[1]+1:bottom_right[1]]
        return [pixmap]
    except:
        return []

def expand_twice_unlifted(arr):
    try:
        return [arr.repeat(2, axis=1).repeat(2,axis=0)]
    except:
        return []

def expand_three_unlifted(arr):
    try:
        return [arr.repeat(3, axis=1).repeat(3,axis=0)]
    except:
        return []
    
def expand_four_unlifted(arr):
    try:
        return [arr.repeat(4, axis=1).repeat(4,axis=0)]
    except:
        return []

def horizontal_combi_twice_unlifted(arr):
    try:
        return [np.concatenate((arr,arr),axis=1)]
    except:
        return []

def horizontal_combi_three_unlifted(arr):
    try:
        return [np.concatenate((arr,arr,arr),axis=1)]
    except:
        return []

def vertical_combi_twice_unlifted(arr):
    try:
        return [np.concatenate((arr,arr),axis=0)]
    except:
        return []

def vertical_combi_three_unlifted(arr):
    try:
        return [np.concatenate((arr,arr,arr),axis=0)]
    except:
        return []

def kronecker_expansion_unlifted(arr):
    try:
        flg = arr > 0
        return [np.kron(flg,arr)]
    except:
        return []


    
def expansion_fliplr_unlifted(pixmap):
    """ Expand an image by connecting flipped image"""
    try: 
        tmp = np.fliplr(np.array(pixmap))
        ans1 = np.concatenate((pixmap, tmp), axis=1)
        ans2 = np.concatenate((tmp, pixmap), axis=1)
        return [ans1, ans2]
    except:
        return []

def expansion_flipud_unlifted(pixmap):
    """ Expand an image by connecting flipped image"""
    try:
        tmp = np.flipud(np.array(pixmap))
        ans1 = np.concatenate((pixmap, tmp), axis=0)
        ans2 = np.concatenate((tmp, pixmap), axis=0)
        return [ans1, ans2]
    except:
        return []

# np.array -> [np.array]
def overlapV_unlifted(pixmap):
    """ Split vertically an image and overlap"""
    try:
        v = pixmap.shape[1]
        if v % 2 == 1:
            v = v // 2
            a, b = pixmap[:,:v], pixmap[:,v+1:]
        else:
            v = v // 2
            a, b = pixmap[:,:v], pixmap[:,v:]
        flg_a = a > 0
        flg_b = b > 0
        return [np.logical_and(flg_a, flg_b).astype("int32"), np.logical_or(flg_a, flg_b).astype("int32"), np.logical_xor(flg_a, flg_b).astype("int32")]
    except:
        return []

# np.array -> [np.array]
def overlapH_unlifted(pixmap):
    """ Split horizontally an image and overlap"""
    try: 
        h = pixmap.shape[0]
        if h % 2 == 1:
            h = h // 2
            a, b = [pixmap[:h,:], pixmap[h+1:,:]]
        else:
            h = h // 2
            a, b = [pixmap[:h,:], pixmap[h:,:]]
        flg_a = a > 0
        flg_b = b > 0
        return [np.logical_and(flg_a, flg_b).astype("int32"), np.logical_or(flg_a, flg_b).astype("int32"), np.logical_xor(flg_a, flg_b).astype("int32")]
    except:
        return []

def three_diagonal_pattern_unlifted(x):
    try:
        H, W = x.shape
        colors = [0, 0, 0]
        for yy in range(H):
            for xx in range(W):
                color = x[yy, xx]
                if color != 0:
                    colors[(yy+xx)%3] = color
        y = x.copy()
        for yy in range(H):
            for xx in range(W):
                y[yy, xx] = colors[(yy+xx)%3]
        return [y]
    except:
        return []

def expand_by_unique_colors_unlifted(pixmap):
    """ copy picture and add horizontally """
    try: 
        x = pixmap.copy()
        unique_num = len(np.unique(x[x > 0]))
        return [x.repeat(unique_num, axis=0).repeat(unique_num, axis=1)]
    except:
        return []

def kronecker_negative_expansion_unlifted(arr):
    try:
        flg = arr > 0
        negative = np.logical_not(arr).astype(int)
        color = max(arr.max(), 1)
        tmp = negative * color
        return [np.kron(flg,tmp)]
    except:
        return []

def kronecker_nn_expansion_unlifted(arr):
    try:
        negative = np.logical_not(arr).astype(int)
        color = max(arr.max(), 1)
        tmp = negative * color
        flg = tmp > 0
        return [np.kron(flg,tmp)]
    except:
        return []

def kronecker_minimum_position_unlifted(arr):
    try:
        unique, counts = np.unique(arr, return_counts=True)
        num_dict = dict(zip(unique, counts))
        mini = min(num_dict.values())
        for i in num_dict.keys():
            if num_dict[i] == mini:
                num_element = i
        flg = arr == num_element
        return [np.kron(flg, arr)]
    except:
        return []

def kronecker_maximum_position_unlifted(arr):
    try:
        unique, counts = np.unique(arr, return_counts=True)
        num_dict = dict(zip(unique, counts))
        maxi = max(num_dict.values())
        for i in num_dict.keys():
            if num_dict[i] == maxi:
                num_element = i
        flg = arr == num_element
        return [np.kron(flg, arr)]
    except:
        return []

def crop_from_left_unlifted(pixmap):
    """ Crop an image to fit exactly the non 0 pixels """
    try:
        ans = []
        num_list = []
        H, W = pixmap.shape
        for i in range(W):
            unique, counts = np.unique(pixmap[:,i], return_counts=True)
            num_dict = dict(zip(unique, counts))
            num_dict.pop(0, None)
            for j in list(num_dict.keys()):
                if j not in num_list:
                    num_list.append(j)
        for i in num_list:
           # Op argwhere will give us the coordinates of every non-zero point
            true_points = np.argwhere(pixmap==i)
            if len(true_points) == 0:
                continue
           # Take the smallest points and use them as the top left of our crop
            top_left = true_points.min(axis=0)
           # Take the largest points and use them as the bottom right of our crop
            bottom_right = true_points.max(axis=0)
           # Crop inside the defined rectangle
            tmp = pixmap[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1]
            ans.append(tmp)
        return ans
    except:
        return []

def crop_from_top_unlifted(pixmap):
    """ Crop an image to fit exactly the non 0 pixels """
    try: 
        ans = []
        num_list = []
        H, W = pixmap.shape
        for i in range(H):
            unique, counts = np.unique(pixmap[i,:], return_counts=True)
            num_dict = dict(zip(unique, counts))
            num_dict.pop(0, None)
            for j in list(num_dict.keys()):
                if j not in num_list:
                    num_list.append(j)
        for i in num_list:
           # Op argwhere will give us the coordinates of every non-zero point
            true_points = np.argwhere(pixmap==i)
            if len(true_points) == 0:
                continue
           # Take the smallest points and use them as the top left of our crop
            top_left = true_points.min(axis=0)
           # Take the largest points and use them as the bottom right of our crop
            bottom_right = true_points.max(axis=0)
           # Crop inside the defined rectangle
            tmp = pixmap[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1]
            ans.append(tmp)
        return ans
    except:
        return []

def split_quarter_unlifted(pixmap):
    """ Split quarterlly an image """
    try: 
        h, v = pixmap.shape
        if h%2==1 and v%2 ==1:
            h = h//2
            v = v//2
            return [pixmap[:h,:v], pixmap[:h,v+1:], pixmap[h+1:,:v], pixmap[h+1:,v+1:]]
        elif h%2==0 and v%2 ==1:
            h = h//2
            v = v//2
            return [pixmap[:h,:v], pixmap[:h,v+1:], pixmap[h:,:v], pixmap[h:,v+1:]]
        elif h%2==1 and v%2==0:
            h = h//2
            v = v//2
            return [pixmap[:h,:v], pixmap[:h,v:], pixmap[h+1:,:v], pixmap[h+1:,v:]]
        else:
            h = h//2
            v = v//2
            return [pixmap[:h,:v], pixmap[:h,v:], pixmap[h:,:v], pixmap[h:,v:]]
    except:
        return []
    
def expansion_negative_fliplr_unlifted(pixmap):
    try: 
        w = pixmap.shape[1]
        tmp = np.fliplr(np.array(pixmap))
        negative = np.logical_not(tmp).astype(int)
        color = max(tmp.max(), 1)
        tmp = negative * color
        return [np.concatenate([tmp, pixmap], axis=1), np.concatenate([pixmap, tmp], axis=1)]
    except:
        return []

def expansion_negative_flipud_unlifted(pixmap):
    try:
        w = pixmap.shape[1]
        tmp = np.flipud(np.array(pixmap))
        negative = np.logical_not(tmp).astype(int)
        color = max(tmp.max(), 1)
        tmp = negative * color
        return [np.concatenate([tmp, pixmap], axis=0), np.concatenate([pixmap, tmp], axis=0)]
    except:
        return []

def expansion_by_unique_num_unlifted(pixmap):
    """ Crop an image to fit exactly the non 0 pixels """
    try: 
        ans = pixmap.copy()
        unique, counts = np.unique(pixmap, return_counts=True)
        num_dict = dict(zip(unique, counts))
        num_dict.pop(0, None)
        num = len(num_dict)
        for i in range(num-1):
            ans = np.concatenate([ans, pixmap], axis=1)
        tmp = ans.copy()
        for i in range(num-1):
            ans = np.concatenate([ans, tmp], axis=0)
        return [ans]
    except:
        return []

def clockwise_expansion_unlifted(pixmap):
    h,w = pixmap.shape
    try:
        upper_right = np.rot90(np.array(pixmap))
        lower_right = np.rot90(np.array(pixmap),-1)
        lower_left = np.rot90(np.array(pixmap), -2)
        upper = np.concatenate([pixmap, upper_right], axis=1)
        lower = np.concatenate([lower_left, lower_right], axis=1)
        return [np.concatenate([upper, lower], axis=0)]
    except:
        return []

def Hoverlap_check_unlifted(pixmap):
    try: 
        v = pixmap.shape[1]
        if v % 2 == 1:
            v = v // 2
            a, b = pixmap[:,:v], pixmap[:,v+1:]
        else:
            v = v // 2
            a, b = pixmap[:,:v], pixmap[:,v:]
        flg_a = a > 0
        flg_b = b > 0
        if np.any(np.logical_and(flg_a, flg_b).astype("int32")):
            return [a]
        else:
            return [a+b]
    except:
        return []
    
def Voverlap_check_unlifted(pixmap):
    try: 
        h = pixmap.shape[0]
        if h % 2 == 1:
            h = h // 2
            a, b = [pixmap[:h,:], pixmap[h+1:,:]]
        else:
            h = h // 2
            a, b = [pixmap[:h,:], pixmap[h:,:]]
        flg_a = a > 0
        flg_b = b > 0
        if np.any(np.logical_and(flg_a, flg_b).astype("int32")):
            return [a]
        else:
            return [a+b]
    except:
        return []
    
def horizontal_combi_five_unlifted(arr):
    try:
        tmp = arr.copy()
        for i in range(2):
            tmp = np.concatenate([tmp,np.fliplr(tmp)], axis=1)
        ans = np.concatenate([tmp, arr], axis=1)
        return [ans]
    except:
        return []

def expansion_8directions_unlifted(pixmap):
    try:
        tmp_ud = np.flipud(np.array(pixmap))
        tmp_lr = np.fliplr(np.array(pixmap))
        tmp_corner = np.rot90(np.array(pixmap),2)
        top = np.concatenate([tmp_corner, tmp_ud, tmp_corner], axis=1)
        middle = np.concatenate([tmp_lr, pixmap, tmp_lr], axis=1)
        return [np.concatenate([top, middle, top], axis=0)]
    except:
        return []

def addition_by_size_unlifted(pixmap):
    """ copy picture and add horizontally """
    try:
        H = pixmap.shape[0]
        tmp = pixmap.copy()
        for _ in range(H-1):
            tmp = np.concatenate([tmp, pixmap], axis=1)
        ans = tmp.copy()
        for _ in range(H-1):
            ans = np.concatenate([ans, tmp], axis=0)
        return [ans]
    except:
        return []

def pickup_objects_unlifted(pixmap):
    try:
        loc = np.argwhere(pixmap > 0)
        h, w = pixmap.shape
        count = 0
        groups = [] # pick up coordinates
        ans = [] # pick up objects
        if len(loc) > 0:
            while True:
                if count == 0:
                    tmp, remain = make_group(loc, h, w)
                else:
                    tmp, remain = make_group(remain, h, w)
                groups.append(tmp)
                count += 1
                if len(remain) == 0:
                    break
            for i in groups:    
                x_min = np.min(np.array(i)[:,0]) # x coordinate np.array(i)[:,0]
                x_max = np.max(np.array(i)[:,0]) # x coordinate
                y_min = np.min(np.array(i)[:,1]) # y coordinate np.array(i)[:,1]
                y_max = np.max(np.array(i)[:,1]) # y coordinate
                tmp = pixmap[x_min:x_max+1, y_min:y_max+1]
                ans.append(tmp)
            return ans
        else:
            return []
    except:
        return []
    
def make_copy_in_right_side_unlifted(pixmap):
    try:
        points = np.argwhere(pixmap>0).tolist()
        color_num = np.max(pixmap)
        max_x = np.max(np.array(points)[:,1])
        tmp = points.copy()
        for i in tmp:
            i_after = 2 * (max_x -i[1]) + 1 + i[1]
            if i_after < pixmap.shape[1]:
                pixmap[i[0],i_after]=color_num
        return [pixmap]
    except:
        return []
    
def find_separation_lines_unlifted(pixmap):
    try:
        h,w = pixmap.shape
        horizontal_loc = []
        for i in range(h):
            if len(np.unique(pixmap[i,:])) == 1:
                horizontal_loc.append(i)
        vertical_loc = []
        for j in range(w):
            if len(np.unique(pixmap[:,j])) == 1:
                vertical_loc.append(j)
        ans = np.zeros([len(horizontal_loc)+1, len(vertical_loc)+1])
        h_element = [i for i in range(h) if i not in horizontal_loc]
        w_element = [i for i in range(w) if i not in vertical_loc]
        colors = [pixmap[i][j] for i in h_element for j in w_element]
        
        if len(np.unique(colors)) == 1:
            for i in range(len(horizontal_loc)+1):
                for j in range(len(vertical_loc)+1):
                    ans[i][j] = colors[0]
            return [ans]
        else:
            for i in range(len(horizontal_loc)):
                for j in range(len(vertical_loc)): 
                    if i == len(horizontal_loc)-1:
                        if j == len(vertical_loc)-1:
                            ans[i,j] = np.unique(pixmap[horizontal_loc[i-1]+1:horizontal_loc[i], vertical_loc[j-1]+1:vertical_loc[j]])[0] 
                            ans[i+1,j] = np.unique(pixmap[horizontal_loc[i-1]+1:horizontal_loc[i], vertical_loc[j]+1:])[0] 
                            ans[i, j+1] = np.unique(pixmap[horizontal_loc[i]+1:, vertical_loc[j-1]+1:vertical_loc[j]])[0] 
                            ans[i+1, j+1] = np.unique(pixmap[horizontal_loc[i]+1:, vertical_loc[j]+1:])[0] 
                    elif j == 0:
                        ans[i,j] = np.unique(pixmap[horizontal_loc[i-1]+1:horizontal_loc[i], :vertical_loc[j]])[0] 
                        ans[i+1,j] = np.unique(pixmap[horizontal_loc[i]+1:, :vertical_loc[j]])[0] 
                    else:
                        ans[i,j] = np.unique(pixmap[horizontal_loc[i-1]+1:horizontal_loc[i], vertical_loc[j-1]+1:vertical_loc[j]])[0] 
                        ans[i+1,j] = np.unique(pixmap[horizontal_loc[i]+1:, vertical_loc[j-1]+1:vertical_loc[j]])[0]          
                else:
                    if j == 0:
                        ans[i,j] = np.unique(pixmap[:horizontal_loc[i], :vertical_loc[j]])[0]
                    elif j == len(vertical_loc)-1:
                        ans[i,j] = np.unique(pixmap[:horizontal_loc[i], vertical_loc[j-1]+1:vertical_loc[j]])[0] 
                        ans[i,j+1] = np.unique(pixmap[:horizontal_loc[i], vertical_loc[j]+1:])[0] 
                    else:
                        ans[i,j] = np.unique(pixmap[:horizontal_loc[i], vertical_loc[j-1]+1:vertical_loc[j]])[0]            
        return [ans]
    except:
        return []
    
def unique_center_to_edge_unlifted(pixmap):
    try:
        ans = np.zeros_like(pixmap)
        unique, counts = np.unique(pixmap, return_counts=True)
        num_dict = dict(zip(unique, counts))
        num_dict.pop(0, None)
        if len(num_dict) > 1:
            return []
        else:
            h,w = ans.shape
            ans[h-1,:] = list(num_dict.values())[0]
            ans[0,:] = list(num_dict.values())[0]
            ans[:,w-1] = list(num_dict.values())[0]
            ans[:,0] = list(num_dict.values())[0]
            return [ans]
    except:
        return []

We now write functions that take a list of images and transform it to a new list. ([np.array] -> [np.array])

In [6]:
def identity(x: [np.array]):
    return x

def tail(x):
    if len(x) > 1:
        return x[1:]
    else:
        return x

def init(x):
    if len(x) > 1:
        return x[:1]
    else:
        return x

def union(x):
    """ Compute the pixel union of all images in the list. """
    try: 
        if len(x) < 2:
            return x
    
        # Make sure everybody have the same shape
        first_shape = tuple(x[0].shape)
        for pixmap in x[1:]:
            if first_shape != tuple(pixmap.shape):
                return []
    
        return [np.bitwise_or.reduce(np.array(x).astype(int))]
    except:
        return []

def intersect(x):
    """ Compute the pixel intersection of all images in the list. """
    if len(x) < 2:
        return x
    
    # Make sure everybody have the same shape
    first_shape = tuple(x[0].shape)
    for pixmap in x[1:]:
        if first_shape != tuple(pixmap.shape):
            return []
    
    return [(np.prod(np.array(x), axis=0) > 0).astype(int)]

def sortByColor(xs):
    """ Sort pictures by increasing color id. """
    xs = [x for x in xs if len(x.reshape(-1)) > 0]
    return list(sorted(xs, key=lambda x: x.max()))

def sortByWeight(xs):
    """ Sort images by how many non zero pixels are contained. """
    xs = [x for x in xs if len(x.reshape(-1)) > 0]
    return list(sorted(xs, key=lambda x: (x>0).sum()))

def reverse(x):
    """ Reverse the order of a list of images. """
    return x[::-1]

def connect_inlist(pixmap):
    try:
        ans =[]
        if len(pixmap) >=2:
            row_len = [pixmap[i].shape[0] for i in range(len(pixmap))]
            col_len = [pixmap[i].shape[1] for i in range(len(pixmap))]
            if len(np.unique(row_len))==1:
                ans_horizon = pixmap[0]
                for i in range(1, len(pixmap)):
                    ans_horizon = np.concatenate([ans_horizon, pixmap[i]], axis=1)
                ans.append(ans_horizon)
            if len(np.unique(col_len))==1:
                ans_vertical = pixmap[0]
                for i in range(1, len(pixmap)):
                    ans_vertical = np.concatenate([ans_vertical, pixmap[i]], axis=0)
                ans.append(ans_vertical)
        return ans
    except:
        return []
    
def find_horizontal_asymmetric_objects(x):
    try: 
        ans = []
        for picture in x:
            h, w = picture.shape
            w = w//2
            for i in range(w):
                if np.all(picture[:,i] == picture[:,w-1-i]):
                    continue
                else:
                    ans.append(picture)
                    break
        return ans
    except:
        return []

def find_horizontal_symmetric_objects(x):
    try:
        ans = []
        for picture in x:
            flg = 0
            h, w = picture.shape
            w = w//2
            for i in range(w):
                if np.all(picture[:,i] == picture[:,w-1-i]) != 1:
                    flg = 1
                    break
            if flg == 0:
                ans.append(picture)       
        return ans
    except:
        return []

def find_vertical_asymmetric_objects(x):
    try:
        ans = []
        for picture in x:
            h, w = picture.shape
            h = h//2
            for i in range(h):
                if np.all(picture[i,:] == picture[h-1-i,:]):
                    continue
                else:
                    ans.append(picture)
                    break
        return ans
    except:
        return []

def find_vertical_symmetric_objects(x):
    try:
        ans = []
        for picture in x:
            flg = 0
            h, w = picture.shape
            h = h//2
            for i in range(h):
                if np.all(picture[i,:] == picture[h-1-i,:]) != 1:
                    flg = 1
                    break
            if flg == 0:
                ans.append(picture)       
        return ans
    except:
        return []

def find_element_with_smallest_noise_objects(x):
    try:
        ans = []
        nums = []
        for picture in x:
            unique, counts = np.unique(picture, return_counts=True)
            num_dict = dict(zip(unique, counts))
            nums.append(sum(num_dict.values()) - max(num_dict.values()))
        mini_ind = [i for i in range(len(nums)) if nums[i] == min(nums)]
        for i in mini_ind:
            ans.append(x[i])
        return ans
    except:
        return []
    
def find_element_with_largest_noise_objects(x):
    try:
        ans = []
        nums = []
        for picture in x:
            unique, counts = np.unique(picture, return_counts=True)
            num_dict = dict(zip(unique, counts))
            nums.append(sum(num_dict.values()) - max(num_dict.values()))
        maxi_ind = [i for i in range(len(nums)) if nums[i] == max(nums)]
        for i in maxi_ind:
            ans.append(x[i])
        return ans
    except:
        return []

def find_objects_with_largest_uniques(x):
    try:
        ans = []
        nums = []
        for picture in x:
            unique, counts = np.unique(picture, return_counts=True)
            num_dict = dict(zip(unique, counts))
            nums.append(len(num_dict))
        maxi_ind = [i for i in range(len(nums)) if nums[i] == max(nums)]
        for i in maxi_ind:
            ans.append(x[i])
        return ans
    except:
        return []

def find_objects_with_smallest_uniques(x):
    try:
        ans = []
        nums = []
        for picture in x:
            unique, counts = np.unique(picture, return_counts=True)
            num_dict = dict(zip(unique, counts))
            nums.append(len(num_dict))
        mini_ind = [i for i in range(len(nums)) if nums[i] == min(nums)]
        for i in mini_ind:
            ans.append(x[i])
        return ans
    except:
        return []

def find_unique_objects(pixmap):
    try:
        if len(pixmap) >=3:
            ans = []
            for tmp in pixmap:
                check = [np.all(tmp == j) for j in ans]
                if np.any(check):
                    true_position = [k for k in range(len(check)) if check[k]][0]
                    ans.pop(true_position)
                else:
                    ans.append(tmp)
            return ans
        else:
            return []
    except:
        return []
    
def find_same_objects(pixmap):
    try:
        if len(pixmap) >=3:
            uniques = find_unique_objects(pixmap)
            ans = pixmap.copy()
            for u in uniques:
                check = [np.all(tmp == u) for tmp in ans]
                true_position = [k for k in range(len(check)) if check[k]][0]
                ans.pop(true_position)
            if len(ans) >0:
                return [ans[0]]
            else:
                return []
        else:
            return []
    except:
        return []
    
def overlap_by_maximum_value(pixmap):
    def select_bigger_element_in_matrix(a,b):
        ans = a.copy()
        h, w = a.shape
        for i in range(h):
            for j in range(w):
                ans[i][j] = max(a[i][j], b[i][j])
        return ans
    try:
        col_nums = [i.shape[1] for i in pixmap]
        row_nums = [i.shape[0] for i in pixmap]
        if len(np.unique(col_nums)) == 1 and len(np.unique(row_nums)) == 1 and len(pixmap)>=2:
            ans = pixmap[0]
            for i in range(1,len(pixmap)):
                ans = select_bigger_element_in_matrix(ans, pixmap[i])
            return [ans]
        else:
            return []
    except:
        return []
    
def overlap_by_minimum_value(pixmap):
    def select_bigger_element_in_matrix(a,b):
        ans = a.copy()
        h, w = a.shape
        for i in range(h):
            for j in range(w):
                ans[i][j] = min(a[i][j], b[i][j])
        return ans
    col_nums = [i.shape[1] for i in pixmap]
    row_nums = [i.shape[0] for i in pixmap]
    if len(np.unique(col_nums)) == 1 and len(np.unique(row_nums)) == 1 and len(pixmap)>=2:
        ans = pixmap[0]
        for i in range(1,len(pixmap)):
            ans = select_bigger_element_in_matrix(ans, pixmap[i])
        return [ans]
    else:
        return []
    
def represent_only_inside(pixmap):
    try: 
        ans = []
        for pic in pixmap:
            points = sorted(np.argwhere(pic>0).tolist())
            inside = find_inside(points)
            color = np.max(pic)
            pic[:,:] = 0
            for i in inside:
                pic[i[0],i[1]] = color
            ans.append(pic)
        return ans
    except:
        return []
    
def combine_into_one_piece(pixmap):
    try:
        dim = [x.shape[0] * x.shape[1] for x in pixmap]
        sorted_ids = sorted(range(len(dim)), key=lambda k: dim[k], reverse=True)
        original_pic_id = sorted_ids[0]
        other_ids = [i for i in sorted_ids if i != original_pic_id]
        ans = pixmap[original_pic_id].copy()
        h, w = ans.shape
        for i in other_ids:
            pixmap[i][pixmap[i]==0]=np.max(ans)
        for i in other_ids:
            flg = False
            sample_h, sample_w = pixmap[i].shape
            for row in range(h):
                for col in range(w):
                    if row+sample_h>h or col+sample_w>w:
                        continue
                    else:
                        zero_num = np.sum(ans[row:row+sample_h,col:col+sample_w] == 0)
                        non_zero_after_combi = np.sum(pixmap[i]-ans[row:row+sample_h,col:col+sample_w]!=0)
                        if zero_num == non_zero_after_combi:
                            flg = True
                            ans[row:row+sample_h,col:col+sample_w] = pixmap[i]
                            break
                if flg:
                    break
        return [ans]
    except:
        return []
    
def combine_into_one_piece2(pixmap):
    try:
        dim = [x.shape[0] * x.shape[1] for x in pixmap]
        sorted_ids = sorted(range(len(dim)), key=lambda k: dim[k], reverse=True)
        original_pic_id = sorted_ids[0]
        other_ids = [i for i in sorted_ids if i != original_pic_id]
        ans = pixmap[original_pic_id].copy()
        unique, counts = np.unique(ans, return_counts=True)
        num_dict = dict(zip(unique, counts))
        change_num = max(num_dict, key=num_dict.get) #https://stackoverflow.com/questions/268272/getting-key-with-maximum-value-in-dictionary
        ans[ans==change_num] = 0
        h, w = ans.shape
        for i in other_ids:
            basic_color = np.max(pixmap[i]) 
            flg = False
            for row in range(h):
                for col in range(w):
                    if ans[row][col]==basic_color:
                        ans[row-1:row+2,col-1:col+2] = pixmap[i]
                        flg = True
                        break
                if flg:
                    break
        return [ans]
    except:
        return []
    
def combine_hole_objects(pixmap):
    # https://note.nkmk.me/python-math-factorial-permutations-combinations/
    try:
        tmp = []
        ans = []
        for i in pixmap:
            if np.min(i) == 0:
                 tmp.append(i)
        for v in itertools.permutations(range(len(tmp)), len(tmp)):
            for i in range(len(tmp)):
                if i == 0:
                    first_h = tmp[v[i]]
                    first_v = tmp[v[i]]
                else:
                    first_h = np.concatenate([first_h, tmp[v[i]]], axis=1)
                    first_v = np.concatenate([first_v, tmp[v[i]]], axis=0)
            ans.append(first_h)
            ans.append(first_v)
        return ans
    except:
        return []

## Composition of functions

It is important to make sure we can chain both functions. To compose two functions `f` and `g` of type `[np.array] -> [np.array]` ; We symply call `g(f([input_image]))`.

But for each function of the type `np.array -> [np.array]` some work is required. We need to generated a *lifted version* version of them. A function `f: np.array -> [np.array]` can be turned into a function of type `[np.array] -> [np.array]` by applying `f` on each image of the input list and concatenating the results.

If you want to know more about the `lift` concept, have a look to the concept of [*monades*](https://en.wikipedia.org/wiki/Monad_%28functional_programming%29). We are indeed using the *list monade*.

In [7]:
def lift(fct):
    # Lift the function
    def lifted_function(xs):
        list_of_results = [fct(x) for x in xs]
        return list(itertools.chain(*list_of_results))
    # Give a nice name to the lifted function
    import re
    lifted_function.__name__ = re.sub('_unlifted$', '_lifted', fct.__name__)
    return lifted_function

cropToContent = lift(cropToContent_unlifted)
groupByColor = lift(groupByColor_unlifted)
splitH = lift(splitH_unlifted)
negative = lift(negative_unlifted)
rotation = lift(rotation_unlifted)
unrotation = lift(unrotation_unlifted)
flip = lift(flip_unlifted)
splitV = lift(splitV_unlifted)
copy_horizontal = lift(copy_horizontal_unlifted)
copy_vertical = lift(copy_vertical_unlifted)
crop_inside = lift(crop_inside_unlifted)
expand_twice = lift(expand_twice_unlifted)
expand_three = lift(expand_three_unlifted)
expand_four = lift(expand_four_unlifted)
horizontal_combi_twice = lift(horizontal_combi_twice_unlifted)
horizontal_three_twice = lift(horizontal_combi_three_unlifted)
vertical_combi_twice = lift(vertical_combi_twice_unlifted)
vertical_combi_three = lift(vertical_combi_three_unlifted)
kronecker_expansion = lift(kronecker_expansion_unlifted)
expansion_fliplr = lift(expansion_fliplr_unlifted)
expansion_flipud = lift(expansion_flipud_unlifted)
overlapV = lift(overlapV_unlifted)
overlapH = lift(overlapH_unlifted)
three_diagonal_pattern = lift(three_diagonal_pattern_unlifted)
expand_by_unique_colors = lift(expand_by_unique_colors_unlifted)
kronecker_negative_expansion = lift(kronecker_negative_expansion_unlifted)
kronecker_nn_expansion = lift(kronecker_nn_expansion_unlifted)
kronecker_minimum_position = lift(kronecker_minimum_position_unlifted)
kronecker_maximum_position = lift(kronecker_maximum_position_unlifted)
crop_from_left = lift(crop_from_left_unlifted)
crop_from_top = lift(crop_from_top_unlifted)
split_quarter = lift(split_quarter_unlifted)
expansion_negative_fliplr = lift(expansion_negative_fliplr_unlifted)
expansion_negative_flipud = lift(expansion_negative_flipud_unlifted)
expansion_by_unique_num = lift(expansion_by_unique_num_unlifted)
clockwise_expansion = lift(clockwise_expansion_unlifted)
Hoverlap_check = lift(Hoverlap_check_unlifted)
Voverlap_check = lift(Voverlap_check_unlifted)
horizontal_combi_five = lift(horizontal_combi_five_unlifted)
expansion_8directions = lift(expansion_8directions_unlifted)
addition_by_size = lift(addition_by_size_unlifted)
pickup_objects = lift(pickup_objects_unlifted)
make_copy_in_right_side = lift(make_copy_in_right_side_unlifted)
find_separation_lines = lift(find_separation_lines_unlifted)
unique_center_to_edge = lift(unique_center_to_edge_unlifted)

# Program evaluation


We define our building blocks for programs (the functions in our DSL). We will define a program as a list of functions from our DSL ; `program: [[np.array] -> [np.array]]`. The instructions in our programs will be executed *from left to right*. This mean that if we want to first `splitByColor` and then compute the `negative` of the image, we need to write `[splitByColor, negative]` in this order.

In [8]:
def program_desc(program):
    """ Create a human readable description of a program. """
    desc = [x.__name__ for x in program]
    return(' >> '.join(desc))

# Display the program description alongside its output
program = [splitH, groupByColor, negative, intersect]
print(program_desc(program))

splitH_lifted >> groupByColor_lifted >> negative_lifted >> intersect


In [9]:
def evaluate(program: [], input_image: np.array):
    # Make sure the input is a np.array
    input_image = np.array(input_image)
    assert type(input_image) == np.ndarray
    
    # Apply each function on the image
    image_list = [input_image]
    for fct in program:
        # Apply the function
        image_list = fct(image_list)
        # Filter out empty images
        image_list = [img for img in image_list if img.shape[0] > 0 and img.shape[1] > 0]
        # Break if there is no data
        if image_list == []:
            return []
    return image_list        

In [10]:
task_file = str(training_path / training_tasks[21])
with open(task_file, 'r') as f:
    task = json.load(f)
print(np.array(task['train'][0]['input']))
print(np.array(task['train'][0]['output']))
evaluate(program=[pickup_objects], input_image=task['train'][0]['input'])

[[0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 6 6 0 0 0]
 [0 0 0 5 0 0 0 5 0 0 0]
 [0 0 4 4 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 7 0 0 0 0]
 [0 0 0 0 0 5 7 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]]
[[6 6 7]
 [0 5 7]
 [4 4 0]]


[array([[6, 6],
        [0, 5]]),
 array([[0, 5],
        [4, 4]]),
 array([[0, 7],
        [5, 7]])]

# Program generation (Genetic Algorithm)
We now have a simple and powerful language to express various transformation on images. But someone or something still have to write the actual program that can solve a task. In this part, we will implement a naive but somewhat efficient genetic algorithm that will be able to find by itself the solution to a task.

The strategy will be as follow:

* We generate random program with one node, and then run them. We keep the best solution (the *elites* of our population).
* Starting from this best solutions, we create new program though mutation. We avaluate them again and update our collection of elite.
* We continue doing this process again and again... until a solution is found.
---
Since we use multiple fitness function, our aproache can be qualified of [multi-objectives](https://en.wikipedia.org/wiki/Multi-objective_optimization) : we try to optimise multiple objectives at the same time. Our *elites* can be understood as an approximation of the pareto surface (collection of pareto optimal solution). In our specific case, when a solution to the task exists in our DSL, their exists a global minimum that will be smaller than any candidate. In a such case the pareto surface is reduced to a single point. Nethertheless, this is a good image to keep in mind to understand what the collection of *elites* represent.

# Fitness

To help our algorithm progress in the right direction, we need a way to give a score to an existing program. The smaller is the score of the program, the closer we are to the solution. One can think of this score as a distance of our program to the optimal solution.

Notice that one can think of this program as a minimization problem (minimize `score`) or maximization problem (minimize `-score`). On machine learning it is common to minimise a distance wereas in genetic algorithm literature you can read that we maximize the fitness of an agent^1. Both convention work perfectly, but it is more convenient if we choose one and stick to it. Therefore, we will MINIMIZE the score of our programs.

Because we can't really comme up with one single good score function that would describe well the progression of the algorithm on all task of the dataset, we will evaluate how our program perform on different aspects through a collection of them.

^1: The reason you see maximization and positive score in Genetic Programming literature is that you need all your values to be positive in order to build a probability distribution over your population. Since we use an elitist algorithm instead of a sampling of the population for reproduction, we do not need this restriction.

In [11]:
def are_two_images_equals(a, b):
    if tuple(a.shape) == tuple(b.shape):
        if (np.abs(b-a) < 1).all():
            return True
    return False

def is_all_solution(program, task, verbose=True):
    for sample in task: # For each pair input/output
        i = np.array(sample['input'])
        o = np.array(sample['output'])

        # Evaluate the program on the input
        images = evaluate(program, i)
        if len(images) < 1:
            return False
        
        # The solution should be in the 3 first outputs 
        images = images
        
        # Check if the output is in the images produced
        is_program_of_for_sample = any([are_two_images_equals(x, o) for x in images])
        if not is_program_of_for_sample:
            return False
    
    return True
def is_solution(program, task, verbose=True):
    for sample in task: # For each pair input/output
        i = np.array(sample['input'])
        o = np.array(sample['output'])

        # Evaluate the program on the input
        images = evaluate(program, i)
        if len(images) < 1:
            return False
        
        # The solution should be in the 3 first outputs 
        images = images[:3]
        
        # Check if the output is in the images produced
        is_program_of_for_sample = any([are_two_images_equals(x, o) for x in images])
        if not is_program_of_for_sample:
            return False
    
    return True

In [12]:
def width_fitness(predicted, expected_output):
    """ How close the predicted image is to have the right width. Less is better."""
    return np.abs(predicted.shape[0] - expected_output.shape[0])

def height_fitness(predicted, expected_output):
    """ How close the predicted image is to have the right height. Less is better."""
    return np.abs(predicted.shape[1] - expected_output.shape[1])

def activated_pixels_fitness(p, e):
    """ How close the predicted image to have the right pixels. Less is better."""
    shape = (max(p.shape[0], e.shape[0]), max(p.shape[1], e.shape[1]))
    diff = np.zeros(shape, dtype=int)
    diff[0:p.shape[0], 0:p.shape[1]] = (p > 0).astype(int)
    diff[0:e.shape[0], 0:e.shape[1]] -= (e > 0).astype(int)
    
    return (diff != 0).sum()

def colors_fitness(p, e):
    p_colors = np.unique(p)
    e_colors = np.unique(e)
    
    nb_inter = len(np.intersect1d(p_colors, e_colors))

    return (len(p_colors) - nb_inter) + (len(e_colors) - nb_inter)

def colors_num_sum_fitness(p, e):
    p_h, p_w = p.shape
    e_h, e_w = e.shape
    p_colors_sum = np.sum(np.sum(p))
    e_colors_sum = np.sum(np.sum(e))
    return np.abs(p_colors_sum - e_colors_sum)

fitness_functions = [height_fitness, width_fitness,  colors_fitness, colors_num_sum_fitness] #activated_pixels_fitness, 
coefficients = [1, 1, 1, 1] #, 1

In [13]:
# ([[np.array] -> [np.array]], Taks) -> (int, int, ..., int)
def evaluate_fitness(program, task, coefficients):
    """ Take a program and a task, and return its fitness score. """
    score = np.zeros((len(fitness_functions)))
    
    # For each sample
    for sample in task:
        i = np.array(sample['input'])
        o = np.array(sample['output'])
        
        # For each fitness function
        for index, fitness_function in enumerate(fitness_functions):
            images = evaluate(program, i)
            if images == []: # Penalize no prediction!
                score[index] += 500
            else: # Take only the score of the first output
                score[index] = min([fitness_function(images[i], o) for i in range(len(images))])
    total_score = sum([score[di] * coefficients[di] for di in range(len(score))])
    return total_score 

# GA main algorithm
Now that we can compare two programs we need a way to generate some of them. We will generate them randomly from a pool of best candidate.
For the initial run, and also to be able to evaluate fresh candidates, we will also allow spontaneous generation of new born one instruction programs.

In [14]:
def build_candidates(allowed_nodes=[identity], best_candidates=[], nb_candidates=50): 
    """
    Create a poll of fresh candidates using the `allowed_nodes`.
    The pool contain a mix of new single instructions programs
    and mutations of the best candidates.
    """
    new_candidates = []
    length_limit = 2 # Maximal length of a program
    
    def random_node():
        return random.choice(allowed_nodes)
    
    while(len(new_candidates) < nb_candidates): # Until we have enougth new candidates
        # Add 10 new programs
        for i in range(5):
            new_candidates += [[random_node()]]
        
        # Create new programs based on each best candidate
        for best_program in best_candidates:
            # Add one op on its right but limit the length of the program
            if len(best_program) < length_limit - 1:
                new_candidates += [best_program + [random_node()]]
            # Add one op on its left but limit the length of the program
            if len(best_program) < length_limit - 1:
                new_candidates += [[random_node()] + best_program]
            # Mutate one instruction of the existing program
            new_candidates += [list(best_program)]
            new_candidates[-1][random.randrange(0, len(best_program))] = random_node()
   
    # Truncate if we have too many candidates
    np.random.shuffle(new_candidates)
    return new_candidates[:nb_candidates]

In [15]:
def random_node(allowed_nodes):
    return random.choice(allowed_nodes)

def generate_init_dsl(dsls, nums): # generate initial species
    new_candidates = []
    while len(new_candidates) < nums:
        new_candidates += [[random_node()]]
    return new_candidates 

def generate_roulette(dsls, task): # generate score for roulette selection
    fitness_vec = [evaluate_fitness(i, task, coefficients) for i in dsls]
    total = np.sum([1/(i+1) for i in fitness_vec])
    roulette = np.zeros(len(fitness_vec))
    for i in range(len(fitness_vec)):
        roulette[i] = (1/(fitness_vec[i]+1)) / total
    return roulette

def roulette_choice(dsls, task): # select good dsls to keep
    roulette = generate_roulette(dsls, task)
    choiced_ind = np.random.choice(len(roulette), 2, replace = True, p = roulette)
    choiced_dsls = [dsls[i] for i in choiced_ind]
    return choiced_dsls

# https://stackoverflow.com/questions/16547820/crossover-different-length-genotypes
def simple_crossover(parent1, parent2): # crossover
    num = min(len(parent1), len(parent2))
    cross_point = random.randrange(1,num-1)
    child1_1 = parent1[:cross_point]
    child1_2 = parent1[cross_point:]
    child2_1 = parent2[:cross_point]
    child2_2 = parent2[cross_point:]
    return child1_1+child2_2, child2_1+child1_2
    
def mutation(programs, num_mutation, p_value, allowed_nodes): # mutation
    ans = []
    for i in range(num_mutation):
        mutation_flg = np.random.choice(2,1,p=[1-p_value, p_value])
        if mutation_flg == 1:
            mutated_program = random_node(programs)
            num = len(mutated_program)
            point = random.randrange(0,num-1)
            new_dsl = random_node(allowed_nodes)
            mutated_program[point] = new_dsl
            ans.append(mutated_program)
    return ans

# Solve the task
This is the last step to our genetic algorithm. We have all the building blocks:
 * Generating both new programs and mutation of existing solutions
 * Evaluating the fitness score of a program
 * Comparing two programs to know if one perform better than the other
 * Detecting when a solution was found
 
We can now write a function that will keep generating programs with increasing complexity until a solution is found.
Using our partial order, we are going to keep the best candidates. Because the order is partial,
there is no bound on how many uncomparables candidates we may have at a given iteration.

In [16]:
def build_model(task, class_num, coefficients, flg, generations=5, verbose=True):
    #change candidate nodes by class num
    if class_num >= 1 and class_num <=4: 
        # np.array -> [np.array]
        candidates_nodes_lifted=[three_diagonal_pattern, unique_center_to_edge] 
        # [np.array] -> [np.array]
        candidates_nodes = [tail, init, union, intersect,sortByColor, sortByWeight, reverse, cropToContent, 
                            groupByColor, negative, rotation, unrotation, flip, represent_only_inside] 
    elif class_num >= 5 and class_num <= 7:
        # np.array -> [np.array]
        if flg:
            candidates_nodes_lifted=[pickup_objects]
            candidates_nodes = [
                            find_element_with_smallest_noise_objects, find_element_with_largest_noise_objects,
                            find_horizontal_asymmetric_objects, find_horizontal_symmetric_objects,
                            find_vertical_asymmetric_objects, find_vertical_symmetric_objects, 
                            find_objects_with_largest_uniques, find_objects_with_smallest_uniques, find_unique_objects, find_same_objects]
        else:
            candidates_nodes_lifted=[splitH, splitV, 
                                 overlapV, overlapH, split_quarter, crop_from_left, crop_from_top, copy_horizontal, copy_vertical, 
                                 kronecker_negative_expansion, kronecker_maximum_position, kronecker_minimum_position, kronecker_expansion, 
                                 expansion_fliplr, expansion_8directions, expansion_flipud, expand_twice, expand_three,
                                 expand_four, expand_by_unique_colors, expansion_negative_fliplr, expansion_negative_flipud, 
                                 expansion_by_unique_num, clockwise_expansion, 
                                 horizontal_combi_twice, horizontal_three_twice, horizontal_combi_five, 
                                 vertical_combi_twice, vertical_combi_three,find_separation_lines, crop_inside, 
                                 addition_by_size, make_copy_in_right_side
                                ]
        # [np.array] -> [np.array]
            candidates_nodes = [tail, init, union, intersect,sortByColor, sortByWeight, reverse,  
                            cropToContent, groupByColor, negative,  represent_only_inside, 
                            combine_into_one_piece, combine_into_one_piece2, combine_hole_objects,
                            overlap_by_maximum_value, overlap_by_minimum_value, connect_inlist]
    else:
        # np.array -> [np.array]
        candidates_nodes_lifted=[
                         overlapV, overlapH, crop_from_left, crop_from_top, copy_horizontal, copy_vertical,
                         kronecker_expansion, kronecker_negative_expansion, kronecker_minimum_position, kronecker_maximum_position,
                         expansion_negative_fliplr, expansion_negative_flipud, expansion_by_unique_num,
                         clockwise_expansion, expansion_8directions,expansion_fliplr, expansion_flipud, 
                         expand_twice, expand_three, expand_four, expand_by_unique_colors, 
                         horizontal_combi_twice, horizontal_three_twice, horizontal_combi_five, 
                         vertical_combi_twice, vertical_combi_three, 
                         find_separation_lines, crop_inside, addition_by_size, make_copy_in_right_side
        ]
        # [np.array] -> [np.array]
        candidates_nodes=[
            tail, init, union, intersect,sortByColor, sortByWeight, reverse, 
            cropToContent, groupByColor, negative, rotation, unrotation, flip, 
            combine_into_one_piece, combine_into_one_piece2, combine_hole_objects, overlap_by_maximum_value, overlap_by_minimum_value,
                         ]

    best_candidates = {} # A dictionary of {score:candidate}
    for i in range(generations):
        candidates = build_candidates(candidates_nodes_lifted+candidates_nodes, best_candidates.values()) # Create a list of candidates
        for candidate in candidates:
            score = evaluate_fitness(candidate, task, coefficients)
            is_uncomparable = True # True if we cannot compare the two candidate's scores because of empty best_candidates
            
            # Compare the new candidate to the existing best candidates
            best_candidates_items = list(best_candidates.items())                
            for best_score, best_candidate in best_candidates_items:
                if score <= best_score:
                    del best_candidates[best_score] # Remove previous best candidate and add the new one
                    best_candidates[score] = candidate
                    is_uncomparable = False
            if is_uncomparable:
                best_candidates[score] = candidate
        for program in best_candidates.values(): # For each best candidate, we look if we have an answer
            if is_solution(program, task):
                return program
        #print(roulette_choice(candidates, task))
            
        #if verbose:
        #    print("Iteration ", i+1)
        #    print("Best candidates length:", len(best_candidates))
        #    print("Current Best score:", min(best_candidates.keys()))
        #    print("Current candidate implementation:", program_desc(best_candidates[min(best_candidates.keys())]))
    return None

In [17]:
def flattener(pred):
    str_pred = str([row for row in pred])
    str_pred = str_pred.replace(', ', '')
    str_pred = str_pred.replace('[[', '|')
    str_pred = str_pred.replace('][', '|')
    str_pred = str_pred.replace(']]', '|')
    return str_pred

sample_sub = pd.read_csv(data_path/'sample_submission.csv')
sample_sub = sample_sub.set_index('output_id')

In [18]:
mode = "eval"
if mode=='eval':
    task_path = evaluation_path
    df = evaluation_descriptive_df
elif mode=='train':
    task_path = training_path
    df = training_descriptive_df
elif mode=='test':
    task_path = test_path
    df = test_descriptive_df
    
all_task_ids = sorted(os.listdir(task_path))
for i in range(len(all_task_ids)):
    flg = False
    task_file = str(task_path / all_task_ids[i])
    class_num = df[df.task_name==all_task_ids[i]]["class"].values[0]
    with open(task_file, 'r') as f:
        task = json.load(f)
        
    # tile and symmetry check --- 
    train_ans_for_tile_and_symmetry = []
    for j, t in enumerate(task["train"]):
        t_in, t_out = np.array(t["input"]).astype('uint8'), np.array(t["output"]).astype('uint8')
        t_pred, feat = call_pred_train(t_in, t_out, patch_image)
        train_ans_for_tile_and_symmetry.append(np.all(t_pred==t_out))
    # ---------------------------
    
    if np.all(train_ans_for_tile_and_symmetry): # tile and symmetry ------
        print("data no: " + str(i) + " class: " + str(class_num) + ", Found program: Tile and symmetry in "+str(task_file))
        if mode == 'test':
            for task_num in range(len(task["test"])):
                t_in = np.array(task["test"][task_num]["input"]).astype('uint8')
                t_pred = call_pred_test(t_in, patch_image, feat) 
                t_pred = flattener(t_pred.astype(int).tolist())
                sample_sub.loc[f'{all_task_ids[i][:-5]}_{task_num}','output'] = t_pred
                
    else: # DSL + GA -----------------------------------------------------
        if is_all_solution([pickup_objects], task['train'], verbose=True):
            flg = True
        program = build_model(task['train'], class_num, coefficients, flg, verbose=True)
        if program is None:
            print("data no: " + str(i) + " class: " + str(class_num) + ", No program was found in "+str(task_file))
        else:
            print("data no: " + str(i) + " class: " + str(class_num) + ", Found program in "+str(task_file) + " " + str(program_desc(program)))
            if mode == 'test':
                for task_num in range(len(task["test"])):
                    images = evaluate(program, np.array(task['test'][task_num]["input"]))
                    images = images[:3] # The solution should be in the 3 first outputs
                    preds_list = [flattener(image.astype(int).tolist()) for image in images]
                    sample_sub.loc[f'{all_task_ids[i][:-5]}_{task_num}','output'] = ' '.join(preds_list)

data no: 0 class: 6, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/00576224.json
data no: 1 class: 4, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/009d5c81.json
data no: 2 class: 2, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/00dbd492.json
data no: 3 class: 4, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/03560426.json
data no: 4 class: 2, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/05a7bcf2.json
data no: 5 class: 4, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/0607ce86.json
data no: 6 class: 6, Found program in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/0692e18c.json kronecker_negative_expansion_lifted
data no: 7 class: 2, No program was found in /kaggle/input/abstraction-and-reasoning-challenge/evaluation/070dd51e.json
data no: 8 

In [19]:
sample_sub.to_csv('submission.csv')
sample_sub.head()

Unnamed: 0_level_0,output
output_id,Unnamed: 1_level_1
00576224_0,|32|78| |32|78| |00|00|
009d5c81_0,|00000000000000|00000888888888|00000800080808|...
00dbd492_0,|00000000000222220000|02222222220200020000|020...
03560426_0,|0000000000|0000000000|0000000000|0000000000|0...
05a7bcf2_0,|000000000020000000080000000000|00000000002220...
