In [1]:
import csv, re
import numpy as np
import os
import json
from sklearn.preprocessing import normalize
import pickle
import math
from random import shuffle

## Read data

In [2]:
def parse_table_metadata(file_name="imdb-metadata.json"):
    f = open(file_name)
    columns_all = []
    tables_all = []
    for line in f:
        metadata = json.loads(line)
        table_name = metadata['name']
        columns = metadata['columns']
        tables_all.append((table_name, metadata['length'], metadata['size']))
        for k in columns.keys():
            columns_all.append((table_name + '.' + k, columns[k]))

    # normalize columns
    numerical = []
    minValues = []
    maxValues = []
    nullss = []
    distincts = []
    for _,i in columns_all:
        numerical.append(i['numerical'])
        minValues.append(i['minValue'])
        maxValues.append(i['maxValue'])
        nullss.append(i['nulls'])
        distincts.append(i['distinct'])
    minValues = normalize([minValues], norm="max")
    maxValues = normalize([maxValues], norm="max")
    nullss = normalize([nullss], norm="max")
    distincts = normalize([distincts], norm="max")

    normalizes_column = {}
    for i, (k, _) in enumerate(columns_all):
        if(numerical[i] == 1):
            normalizes_column[k] = np.array((numerical[i], minValues[0][i], maxValues[0][i], nullss[0][i], distincts[0][i]))
        else:
            normalizes_column[k] = np.array((numerical[i], 0, 1, nullss[0][i], distincts[0][i]))
            
    normalized_table = normalize([x[1:] for x in tables_all], axis=0)
    normalizes_table = {}
    for i in range(len(normalized_table)):
        normalizes_table[tables_all[i][0]] = np.array((normalized_table[i][0],normalized_table[i][1]))
    return (normalizes_column, normalizes_table)

normalizes_column, normalizes_table = parse_table_metadata("imdb-metadata.json")
table_column_dict = {}
for i in normalizes_column.keys():
    table_column_dict[i.split(".")[1]] = i.split(".")[0]
    
table_column_dict # column -> table

{'name': 'name',
 'name_pcode_nf': 'name',
 'name_pcode_cf': 'name',
 'person_id': 'person_info',
 'md5sum': 'title',
 'id': 'title',
 'imdb_index': 'title',
 'surname_pcode': 'name',
 'episode_of_id': 'title',
 'production_year': 'title',
 'episode_nr': 'title',
 'note': 'person_info',
 'phonetic_code': 'title',
 'kind_id': 'title',
 'title': 'title',
 'movie_id': 'movie_link',
 'season_nr': 'title',
 'nr_order': 'cast_info',
 'role_id': 'cast_info',
 'person_role_id': 'cast_info',
 'imdb_id': 'title',
 'kind': 'kind_type',
 'country_code': 'company_name',
 'name_pcode_sf': 'company_name',
 'subject_id': 'complete_cast',
 'status_id': 'complete_cast',
 'info': 'person_info',
 'keyword': 'keyword',
 'link': 'link_type',
 'company_id': 'movie_companies',
 'company_type_id': 'movie_companies',
 'info_type_id': 'person_info',
 'keyword_id': 'movie_keyword',
 'linked_movie_id': 'movie_link',
 'link_type_id': 'movie_link',
 'gender': 'name',
 'role': 'role_type',
 'series_years': 'title'}

In [3]:
condition_dim = 20

data_dirs = ['../../imdb-6-16-2-3-48-one-round-1/', '../../imdb-6-16-2-3-48-one-round-missing-1/','../../imdb-6-16-2-3-48-one-round-2/', '../../imdb-6-16-2-3-48-one-round-missing-2/']
dir_resources = [{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},\
{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},\
    {'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48}]
#{'n_executor': 6, 'g_mem': 16, 'n_core':2, 'n_worker':3, 'parallelism': 48},
#{'n_executor': 6, 'g_mem': 16, 'n_core':4, 'n_worker':3, 'parallelism': 96}]

query_ids = []
plan_ids = []
physical_plans = []
times = []
resources = []
group_idxs = [] # record the dir that a data is from

valid = [] # to check if the query is successfully executed, otherwise assign it a time of 1.5 * max
for idx, data_dir in enumerate(data_dirs):
    for file in os.listdir(data_dir):
        csvreader = csv.reader(open(data_dir + file))
        for row in csvreader:
            if len(row) > 3:
                valid.append((row[0], row[1]))

for idx, data_dir in enumerate(data_dirs):
    r = re.compile(r'.*?([a-zA-Z].*)')
    for file in os.listdir(data_dir):
        csvreader = csv.reader(open(data_dir + file))
        for row in csvreader:
            if len(row) > 3:
                physical_plans.append(row[4])
                query_ids.append(row[0])
                plan_ids.append(row[1])
                times.append(row[3])        
                resources.append(dir_resources[idx])
                group_idxs.append(idx)
            else:
                if not (row[0], row[1]) in valid:
                    print(row[0], row[1])
                    physical_plans.append(row[2])
                    query_ids.append(row[0])
                    plan_ids.append(row[1])
                    times.append(-1)        
                    resources.append(dir_resources[idx])
                    group_idxs.append(idx)                   

physical_plans = [[i for i in p.split("\n") if not i.startswith("==")] for p in physical_plans]

print("=== Number of valid physical plans: {}".format(len(valid)))
print("=== Number of physical plans: {}".format(len(physical_plans)))

zipped = list(zip(physical_plans, query_ids, plan_ids, times, resources, group_idxs))

d = {}
for i in zipped:
    d[(i[1], i[2])] = i 

trains = []
vals= []
tests = []
dir = 'imdb-sets/'

with open(dir+'train.txt','r') as f:
    for i in f.readlines():
        q_id = i.strip().split(" ")[0]
        p_id = i.strip().split(" ")[1]
        trains.append(d[(q_id, p_id)])

with open(dir+'val.txt','r') as f:
    for i in f.readlines():
        q_id = i.strip().split(" ")[0]
        p_id = i.strip().split(" ")[1]
        vals.append(d[(q_id, p_id)])
        
with open(dir+'test.txt','r') as f:
    for i in f.readlines():
        q_id = i.strip().split(" ")[0]
        p_id = i.strip().split(" ")[1]
        tests.append(d[(q_id, p_id)])

zipped = trains + vals + tests 
physical_plans, query_ids, plan_ids, times, resources, group_idxs = list(zip(*zipped))

4-0 49
4-6 35
4-6 54
4-17 51
4-48 33
4-6 46
4-3 48
4-6 58
4-6 34
4-22 32
4-6 35
4-44 51
4-6 51
4-55 33
4-44 48
4-6 33
4-6 16
4-0 33
4-44 35
4-22 40
3-29 25
4-6 63
4-48 50
4-22 56
3-29 17
4-22 41
4-44 32
4-47 40
=== Number of valid physical plans: 1756
=== Number of physical plans: 1784


In [4]:
# zipped = list(zip(physical_plans, query_ids, plan_ids, times, resources, group_idxs))
# zipped.sort(key=lambda x: int(x[3]))
# print(zipped[0][3])


# train = []
# val = []
# test = []

# idx = 0

# while idx + 10 < len(zipped):
#     chunk = zipped[idx:idx+10]
#     shuffle(chunk)
#     train = train + [(i[1],i[2]) for i in chunk[:8]]
#     val.append((chunk[8][1],chunk[8][2]))
#     test.append((chunk[9][1],chunk[9][2]))
#     idx += 10

# train = train + [(i[1],i[2]) for i in zipped[idx:]]

# # for idx, i in enumerate(zipped):
# #     if idx%5 == 0:
# #         val.append((i[1], i[2]))
# #     elif idx%2==0:
# #         test.append((i[1], i[2]))
# #     else:
# #         train.append((i[1], i[2]))
        
# dir = 'imdb-sets/'

# with open(dir+'train.txt','w') as f:
#     for i in train:
#         f.write(str(i[0]) + ' '+str(i[1])+'\n')
# with open(dir+'val.txt','w') as f:
#     for i in val:
#         f.write(str(i[0]) + ' '+str(i[1])+'\n')
# with open(dir+'test.txt','w') as f:
#     for i in test:
#         f.write(str(i[0]) + ' '+str(i[1])+'\n')
        


In [5]:
for i, e in enumerate(times):
    if e != -1:
        print(i)
        break
print("== Example:")
print("Query Id: {}".format(query_ids[i]))
print("Plan Id: {}".format(plan_ids[i]))
print("Time： {}".format(times[i]))
print("Resources: {}".format(resources[i]))
print("Group Idx: {}".format(group_idxs[i]))
for e in physical_plans[i]:
    print(e)


16
== Example:
Query Id: 1-37
Plan Id: 0
Time： 7155
Resources: {'n_executor': 6, 'g_mem': 16, 'n_core': 2, 'n_worker': 3, 'parallelism': 48}
Group Idx: 0
HashAggregate (10)
+- HashAggregate (9)
   +- Project (8)
      +- BroadcastHashJoin Inner BuildLeft (7)
         :- Project (3)
         :  +- Filter (2)
         :     +- Scan csv  (1)
         +- Project (6)
            +- Filter (5)
               +- Scan csv  (4)


(1) Scan csv 
Output [3]: [id#192, kind_id#195, production_year#196]
Batched: false
Location: InMemoryFileIndex [hdfs://11.167.227.34:9000/datasets/imdb/title.csv]
PushedFilters: [IsNotNull(kind_id), IsNotNull(production_year), LessThan(kind_id,7), EqualTo(production_year,2006), IsNotNull(id)]
ReadSchema: struct<id:int,kind_id:int,production_year:int>

(2) Filter
Input [3]: [id#192, kind_id#195, production_year#196]
Condition : ((((isnotnull(kind_id#195) AND isnotnull(production_year#196)) AND (kind_id#195 < 7)) AND (production_year#196 = 2006)) AND isnotnull(id#192))


In [6]:
def split_plan(operations): # operations: list of string
    # split skeleton and details
    skeleton = []
    detail = []
    flag = False
    for o in operations:
        if(len(o) > 0):
            if(o[0] == "("):
                flag = True
            if(flag == False):
                skeleton.append(o)
            else:
                detail.append(o)
    
    return skeleton, detail

class TreeNode:
  def __init__(self, value, idx):
    self.value = value 
    self.children = [] 
    self.idx = idx
 
  def add_child(self, child_node):
    self.children.append(child_node) 

  def __repr__(self, level=0):
    ret = "  "*level+repr(self.value)+"\n"
    for child in self.children:
        ret += child.__repr__(level+1)
    return ret

# build a tree from the strings  
def parse_skeleton(skeleton):
    nodes = []
    for (i, o) in enumerate(skeleton):
        # each line is a node, find its parent by back-tracking ":"
        level = 0
        for (j, l) in enumerate(o):
            if(l == "-"):
                level = j
                break
        # construct node
        name = re.sub(r"[:* +-]", '', o)
        idx = int(name.split("(")[-1][:-1]) - 1 # the index starts from 1, make it start from 0
        name = name.split("(")[0]
        node = TreeNode(name, idx)
        # find its parent
        if(o.strip(" :").startswith("+-")):
            x = i-1
            for x in range(i-1, 0, -1):
                if(skeleton[x][level-1] != ":"):
                    break
            nodes[x].add_child(node)          
        elif(o.strip(" :").startswith("-")):
            nodes[-1].add_child(node)
        # add node to nodes
        nodes.append(node)
    return nodes[0], nodes # the root of the tree and a list of all nodes

# generate the structure matrix
def gen_struct_matrix(nodes):
    nodes_len = len(nodes)
    matrix = np.zeros([nodes_len, nodes_len])
    for node in nodes:
        for child in node.children:
            matrix[node.idx, child.idx] = 1 # is parent
            matrix[child.idx, node.idx] = -1 # is child
    return matrix

In [7]:
for i in physical_plans:
    for j in i:
        if "Subqueries" in j:
            print(i)
            break

In [8]:
max([int(i) for i in times])

266383

## Parse physical plans

In [9]:
from torchtext.vocab import vocab
from collections import Counter, OrderedDict

class Operation:
    def __init__(self, s):
        idx, operator = self.get_idx_operator(s)
        self.idx = idx
        self.operator = operator
        self.auxi = self.get_auxi(s)
    def __repr__(self):
        return "({}) {} \n  {}\n".format(self.idx, self.operator, self.auxi)
    def get_idx_operator(self, s):
        idx = int(s[0].split(")")[0][1:])
        operator = s[0].split(")")[1].split("[")[0].strip()
        return idx, operator
    def get_auxi(self, s):
        auxi_dict = {}
        for i in s[1:]:
            k = re.sub(r'[^a-zA-Z]', '', i.split(":")[0])
            v = ":".join(i.split(":")[1:]).strip() 
            v = re.sub(r'#[0-9]+', '', v.replace("[","").replace("]","")) # remove "#xx" and "[]"
            if k!="Condition": 
                v = v.split(", ") # take the key words
            else: v = [v]
            auxi_dict[k] = v
            
        return auxi_dict
    
def gen_operator_vocab(operations):
    operators = []
    for o in operations:
        operators += [i.operator for i in o]
    counter = Counter(operators)
    sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    ordered_dict = OrderedDict(sorted_by_freq_tuples)
    v = vocab(ordered_dict, specials=["<unk>"])
    v.set_default_index(v["<unk>"])
    return v

def gen_detail_vocab(operations):
    words = []
    for o in operations:
        for s in o:
            for k,v in s.auxi.items():
                words.append(k)
                for x in v:
                    words.append(x)
    counter = Counter(words)
    sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    ordered_dict = OrderedDict(sorted_by_freq_tuples)
    v = vocab(ordered_dict, specials=["<unk>"])
    v.set_default_index(v["<unk>"])
    return v

def gen_all_vocab(operations):
    words = []
    for o in operations:
        words += [i.operator for i in o]
        for s in o:
            for k,v in s.auxi.items():
                words.append(k)
                for x in v:
                    words.append(x)
    counter = Counter(words)
    sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    ordered_dict = OrderedDict(sorted_by_freq_tuples)
    v = vocab(ordered_dict, specials=["<unk>"])
    v.set_default_index(v["<unk>"])
    return v

# parse details
def parse_detail(detail):
    operations = [[detail[0]]]
    parsed_operations = []
    for line in detail[1:]:
        if(line.startswith("(")):
            operations.append([line])
        else:
            last = operations[-1]
            operations[:-1].append(last.append(line))
    for operation in operations:
        parsed_operations.append(Operation(operation))
    return parsed_operations # return a list of operations, each of which contains operator and auxiliary info

# operation to words
def op_to_vector(operation, v):
#     debug = [operation.operator]
    words = [v.lookup_indices([operation.operator])]
    for i in operation.auxi.keys():
        words.append(v.lookup_indices([str(i)]))
        words.append(v.lookup_indices(operation.auxi[i]))
#         debug.append(i)
#         debug.append(operation.auxi[i])
#     print(debug)
    words = [item for sublist in words for item in sublist]
    return words

In [10]:
# test all
# take the information of each operator (not formatted)
structures = []
details = []
skeleton_lens = []
for plan in physical_plans:
    skeleton, detail = split_plan(plan)
    # print(skeleton)
    _, nodes = parse_skeleton(skeleton)
    skeleton_lens.append(len(skeleton))
    structures.append(gen_struct_matrix(nodes))
    details.append(parse_detail(detail))
print(len(structures))
# print(structures[-1])
print(len(details))
# print(details[-1])

v = gen_operator_vocab(details)
all_v = gen_all_vocab(details)
print("operator vocab length: {}".format(len(v)))
print(v.get_itos())
print("all vocab length: {}".format(len(all_v)))
from copy import deepcopy
details_copy =deepcopy(details)

print(min(skeleton_lens))
print(max(skeleton_lens))

1784
1784
operator vocab length: 8
['<unk>', 'Project', 'Scan csv', 'Filter', 'BroadcastHashJoin', 'SortMergeJoin', 'HashAggregate', 'SortAggregate']
all vocab length: 892
10
25


In [11]:
# test the structure
a = zip(structures, details)
for i in a:
    if 10 < len(i[0]) < 20:
        print(i)
        break

(array([[ 0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 1.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  1.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  1.,  0., -1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0., -1.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0., -1.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0., -1.,  0.,
      

In [12]:
# extract key information and reformat it
# key information: table, column, predicate
class Condition: 
    def __init__(self, column, operator, operand):
        self.column = column
        self.operator = operator
        self.operand = operand
    def __repr__(self):
        return "Condition <{} {} {}>".format(self.column, self.operator, self.operand)

def reformat_scanparquet(operation):
    auxi = operation.auxi
    table = auxi["Location"][0].split("/")[-1]
    columns = auxi["Output"]
    columns_w_table = [table+"."+c for c in columns]
    new_auxi = {}
    new_auxi["Table"] = [table]
    new_auxi["Columns"] = columns_w_table
    conditions = []
    if auxi.get("PushedFilters"):
        for i in auxi["PushedFilters"]:
            if len(i.split("(")) > 1:
                operator = i.split("(")[0]
                objects = i.split("(")[1][:-1].split(",")
                o = table+"."+objects[0]
                if(len(objects) > 1):
                    operand = objects[1]
                    if(operand.isnumeric()):
                        operand = float(operand)
                    elif(operand in columns):
                        operand = table+"."+operand
                    else:
                        pass
                        # print("unsupported operand: {}".format(operand))
                else: 
                    operand = ''
                conditions.append(Condition(o, operator, operand))
        # new_auxi["Condition"] = conditions
        new_auxi["Condition"] = []
        for i in auxi["PushedFilters"]:
            new_auxi["Condition"] += i.split(", ")
    operation.auxi = new_auxi
    return operation

def reformat_scancsv(operation):
    auxi = operation.auxi
    table = auxi["Location"][0].split("/")[-1].split(".")[0]
    columns = [i.replace("L","") for i in auxi["Output"]]
    columns_w_table = [table+"."+c for c in columns]
    new_auxi = {}
    new_auxi["Table"] = [table]
    new_auxi["Columns"] = columns_w_table
    conditions = []
    
    if auxi.get('PushedFilters'):
        # hard code for wrongly split
        pushedfilters = []
        i = 0
        while i < len(auxi["PushedFilters"]):
            e = auxi["PushedFilters"][i]
            if e.startswith("In"): 
                pushedfilters.append(e+","+auxi["PushedFilters"][i+1])
                i += 2
            else: 
                pushedfilters.append(e)
                i += 1
                
        # print(pushedfilters)
        for i in pushedfilters:
            operator = i.split("(")[0]
            objects = i.split("(")[1][:-1].split(",")
            o = table+"."+objects[0]
            if(len(objects) > 1):
                operand = objects[1]
                if(operand.isnumeric()):
                    operand = float(operand)
                elif(operand in columns):
                    operand = table+"."+operand
                else:
                    pass
                    # print("unsupported operand: {}".format(operand))
            else: 
                operand = ''
            conditions.append(Condition(o, operator, operand))
        # new_auxi["Condition"] = conditions
        new_auxi["Condition"] = []
        for i in auxi["PushedFilters"]:
            new_auxi["Condition"] += i.split(", ")
    operation.auxi = new_auxi
    return operation

def reformat_logicalrelation(operation, parents):
    auxi = operation.auxi
    columns = [i for i in auxi['Arguments'] if not i in ['parquet', 'true', 'false']]
    columns = [i[:-1] for i in columns if i.endswith("L")]
    tables = [table_column_dict[i] for i in columns]
    auxi['Table'] = list(dict.fromkeys(tables))
    auxi['Columns'] = [table_column_dict[i]+'.'+i for i in columns]
    return operation

def reformat_filter(operation, parents):
    parent = parents[0]
    table = parent.auxi["Table"]
    auxi = operation.auxi
    auxi["Table"] = table
    if auxi.get('Input'):
        columns = [i.replace("L","") for i in auxi["Input"]]
        auxi['Columns'] = [table[0]+"."+i for i in columns]
    else:
        auxi['Columns'] = parent.auxi["Columns"]
    # todo parse conditions
    if auxi.get("Condition"):
        auxi['Condition'] = [i.replace("L", "") for i in auxi['Condition']]
    else:
        auxi['Condition'] = []
    return operation

def reformat_project(operation, parents): 
    # project acts like glue - it records the tables and comlumns of the ancestors 
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    output =  auxi["Output"] if auxi.get("Output") else auxi["Arguments"]
    for i in output:
        for j in columns:
            if i.replace("L", "") in j:
                auxi['Columns'].append(j)
                break
    return operation

def reformat_exchange(operation, parents):
    parent = parents[0]
    table = parent.auxi["Table"]
    auxi = operation.auxi
    auxi["Table"] = table
    auxi['Columns'] = [table[0]+"."+i.strip('L') for i in auxi["Input"]]
    return operation

def reformat_reusedexchange(operation, parents):
    auxi = operation.auxi
    columns = [i.strip('L') for i in auxi["Output"]]
    tables = [table_column_dict[i] for i in columns]
    auxi['Table'] = list(dict.fromkeys(tables))
    auxi['Columns'] = [table_column_dict[i]+'.'+i for i in columns]
    return operation

def reformat_hashagg(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = columns
    auxi['Condition'] = [i.replace("L", "") for i in [" AND ".join([i for i in \
        auxi['Functions'] + auxi['AggregateAttributes']])]]
    return operation    

def reformat_sortagg(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = columns
    auxi['Condition'] = [i.replace("L", "") for i in [" AND ".join([i for i in \
        auxi['Functions'] + auxi['AggregateAttributes']])]]
    return operation      

def reformat_agg(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = columns
    auxi['Condition'] = []
    return operation  

def reformat_sort(operation, parents):
    # print(parents)
    parent = parents[0]
    table = parent.auxi["Table"]
    auxi = operation.auxi
    columns = [i.replace("L","") for i in auxi["Input"]]
    auxi["Table"] = table
    auxi['Columns'] = [table[0]+"."+i for i in columns]
    auxi["Condition"] = [" AND ".join(auxi['Arguments'])]
    return operation  

def reformat_smjoin(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    for i in auxi["Leftkeys"] + auxi["Rightkeys"]:
        for j in columns:
            if i.replace("L","") in j:
                auxi['Columns'].append(j)
    return operation

def reformat_join(operation, parents):  
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    c = [i.replace("L","").replace("(","").replace(")","") for i in auxi['Arguments'][1].split(" = ")] # todo: currently hardcoded
    for i in c:
        for j in columns:
            if i in j:
                auxi['Columns'].append(j)
                break
    return operation 
    
def reformat_bchjoin(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    for i in auxi["Leftkeys"] + auxi["Rightkeys"]:
        i = i.replace("L", "")
        for j in columns:
            if i in j:
                auxi['Columns'].append(j)
                break
    return operation 

def reformat_schjoin(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    for i in auxi["Leftkeys"] + auxi["Rightkeys"]:
        i = i.replace("L", "")
        for j in columns:
            if i in j:
                auxi['Columns'].append(j)
                break
    return operation 

def reformat_cartesianproduct(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    c = [i.strip() for i in parent.auxi["Columns"][0].replace("L","").replace("(","").replace(")","").split("=")]
    for i in c:
        for j in columns:
            if i in j:
                auxi['Columns'].append(j)
                break
    return operation

def reformat_bcexchange(operation, parents):
    tables = []
    columns = []
    for parent in parents:
        tables += parent.auxi["Table"]
        columns += parent.auxi["Columns"]
    auxi = operation.auxi
    auxi["Table"] = tables
    auxi['Columns'] = []
    for i in auxi["Input"]:
        for j in columns:
            if i in j:
                auxi['Columns'].append(j)
                break
    return operation


def reformat(operation, structure):
    parent_indices = [i + 1 for i, x in enumerate(structure[operation.idx - 1]) if x == 1]
    parent_operations = [operation_dict[i] for i in parent_indices]

    if(operation.operator) == "Scan csv":
        reformat_scancsv(operation)
    elif(operation.operator) == "Scan parquet":
        reformat_scanparquet(operation)
    elif(operation.operator) == "Filter":
        reformat_filter(operation, parent_operations)
    elif(operation.operator) == "Project":
        reformat_project(operation, parent_operations)
    elif(operation.operator) == "Exchange":
        reformat_exchange(operation, parent_operations)
    elif(operation.operator) == "ReusedExchange":
        reformat_reusedexchange(operation, parent_operations)
    elif(operation.operator) == "HashAggregate":
        reformat_hashagg(operation, parent_operations)
    elif(operation.operator) == "SortAggregate":
        reformat_sortagg(operation, parent_operations)
    elif(operation.operator) == "Aggregate":
        reformat_agg(operation, parent_operations)
    elif(operation.operator) == "Sort":
        reformat_sort(operation, parent_operations)
    elif(operation.operator) == "SortMergeJoin":
        reformat_smjoin(operation, parent_operations)
    elif(operation.operator) == "BroadcastExchange":
        reformat_bcexchange(operation, parent_operations)
    elif(operation.operator) == "BroadcastHashJoin":
        reformat_bchjoin(operation, parent_operations)
    elif(operation.operator) == "ShuffledHashJoin":
        reformat_schjoin(operation, parent_operations)
    elif(operation.operator) == "LogicalRelation":
        reformat_logicalrelation(operation, parent_operations)
    elif(operation.operator) == "Join":
        reformat_join(operation, parent_operations)
    elif(operation.operator) == "CartesianProduct":
        reformat_cartesianproduct(operation, parent_operations)       
    else:
        print("Unseen operation: {}".format(operation.operator))
        print(operation)
        

details =deepcopy(details_copy)
for i, o in enumerate(details):
    print(i, o)
    print(structures[i])
    operation_dict = {}
    for d in o:
        # print(d)
        operation_dict[d.idx] = d # cannot combine the two, otherwise cannot find parents sometimes
    for d in o:
        reformat(d, structures[i])

0 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int>']}
, (5) Filter 
  {'Input': ['movie_id'], 'Condition': ['isnotnull(movie_id)']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id']}
, (7) BroadcastHashJoin 
  {'Leftkeys': ['id'], 'Rightkeys': ['movie_id'], 'Joincondition': ['None']}
, (8) Project 
  {'Output': ['id'], 'Input': ['id', 'movie_id']}
, (9) Scan csv 
  {'Output': ['movie_id', 'role_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs:/

 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]]
90 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'info_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_info.csv'], 'PushedFilters': ['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,98)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,info_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'info_type_id'], 'Condition': ['((isnotnull(info_type_id) AND (info_type_id > 98)) AND isnotnull(movie_id))']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id', 'info_type_id']}
, (7) SortMergeJoin 
  {'Leftkeys': ['id'

 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]]
194 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'info_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_info.csv'], 'PushedFilters': ['IsNotNull(info_type_id)', 'LessThan(info_type_id,15)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,info_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'info_type_id'], 'Condition': ['((isnotnull(info_type_id) AND (info_type_id < 15)) AND isnotnull(movie_id))']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id', 'info_type_id']}
, (7) BroadcastHashJoin 
  {'Leftkeys': ['i

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]]
338 [(1) Scan csv 
  {'Output': ['id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(production_year)', 'EqualTo(production_year,1990)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'production_year'], 'Condition': ['((isnotnull(production_year) AND (production_year = 1990)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_type_id)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'company_type_i

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
527 [(1) Scan csv 
  {'Output': ['id', 'kind_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(kind_id)', 'EqualTo(kind_id,7)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,kind_id:int>']}
, (2) Filter 
  {'Input': ['id', 'kind_id'], 'Condition': ['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'kind_id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_type_id)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'company_type_id'], 'Condition': ['((isnotnull(company_type_id) AND (company_type_id < 2)) AND isnotnull(movie_id))']}
, (6) Project 
  {

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
612 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'info_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_info.csv'], 'PushedFilters': ['IsNotNull(info_type_id)', 'LessThan(info_type_id,15)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,info_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'info_type_id'], 'Condition': ['((isnotnull(info_type_id) AND (info_type_id < 15)) AND isnotnull(movie_id))']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id', 'info_type_id']}
, (7) BroadcastHashJoin 
  {'Leftkeys': ['id'], 'Rightkeys': ['movie_id'], 'Joincondition': ['N

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   0.  0.  0.  0.  0.  1.  0.]]
978 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_id', 'company_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,525)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_id:int,company_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'company_id', 'company_type_id'], 'Condition': ['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 525)) AND (company_type_id < 2)) AND isno

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
1062 [(1) Scan csv 
  {'Output': ['id', 'kind_id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2004)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,kind_id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'kind_id', 'production_year'], 'Condition': ['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2004)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'kind_id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int>']}
, (5) Filter 
  {'Input': ['movie_id'], 'Condition': ['isnotnull(movie_

]
[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.

1113 [(1) Scan csv 
  {'Output': ['id', 'kind_id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,1)', 'LessThan(production_year,2012)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,kind_id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'kind_id', 'production_year'], 'Condition': ['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 1)) AND (production_year < 2012)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'kind_id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_id)', 'GreaterThan(company_id,7851)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_id:int>

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
1238 [(1) Scan csv 
  {'Output': ['id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(production_year)', 'EqualTo(production_year,1958)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'production_year'], 'Condition': ['((isnotnull(production_year) AND (production_year = 1958)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_id)', 'GreaterThan(company_id,12740)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'company_id'], 'Condition': ['((isnotnull(company_id) AND (company_id > 12740))

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
1350 [(1) Scan csv 
  {'Output': ['id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(production_year)', 'GreaterThan(production_year,1963)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'production_year'], 'Condition': ['((isnotnull(production_year) AND (production_year > 1963)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int>']}
, (5) Filter 
  {'Input': ['movie_id'], 'Condition': ['isnotnull(movie_id)']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id']}
, (7) SortMergeJoin 
  {'Leftkeys': ['id'], 'Rightkeys': [

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   0.  0.  0.  0.  0.  1.  0.]]
1417 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/cast_info.csv'], 'PushedFilters': ['IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int>']}
, (5) Filter 
  {'Input': ['movie_id'], 'Condition': ['isnotnull(movie_id)']}
, (6) Project 
  {'Output': ['movie_id'], 'Input': ['movie_id']}
, (7) BroadcastHashJoin 
  {'Leftkeys': ['id'], 'Rightkeys': ['movie_id'], 'Joincondition': ['None']}
, (8) Project 
  {'Output': ['id'], 'Input': ['id', 'movie_id']}
, (9) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemo

   1.  0.]]
1485 [(1) Scan csv 
  {'Output': ['id', 'kind_id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,1992)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,kind_id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'kind_id', 'production_year'], 'Condition': ['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 1992)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'kind_id', 'production_year']}
, (4) Scan csv 
  {'Output': ['person_id', 'movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/cast_info.csv'], 'PushedFilters': ['IsNotNull(person_id)', 'LessThan(person_id,1816925)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<person_id:int,movie_id:int

   0.  0.  0.  0.  0.  1.  0.]]
1540 [(1) Scan csv 
  {'Output': ['id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(id)'], 'ReadSchema': ['struct<id:int>']}
, (2) Filter 
  {'Input': ['id'], 'Condition': ['isnotnull(id)']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id']}
, (4) Scan csv 
  {'Output': ['movie_id', 'company_id', 'company_type_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,11323)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int,company_id:int,company_type_id:int>']}
, (5) Filter 
  {'Input': ['movie_id', 'company_id', 'company_type_id'], 'Condition': ['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 11323)) AND (company_type_id = 2)) AND 

[[ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  1.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0. -1.  0.  0.  

   1.  0.]]
1643 [(1) Scan csv 
  {'Output': ['id', 'kind_id', 'production_year'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/title.csv'], 'PushedFilters': ['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2011)', 'IsNotNull(id)'], 'ReadSchema': ['struct<id:int,kind_id:int,production_year:int>']}
, (2) Filter 
  {'Input': ['id', 'kind_id', 'production_year'], 'Condition': ['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2011)) AND isnotnull(id))']}
, (3) Project 
  {'Output': ['id'], 'Input': ['id', 'kind_id', 'production_year']}
, (4) Scan csv 
  {'Output': ['movie_id'], 'Batched': ['false'], 'Location': ['InMemoryFileIndex hdfs://11.167.227.34:9000/datasets/imdb/movie_companies.csv'], 'PushedFilters': ['IsNotNull(movie_id)'], 'ReadSchema': ['struct<movie_id:int>']}
, (5) Filter 
  {'Input': ['movie_id'], 'Condition': ['isnotnull(movie_

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [13]:
# test
print(len(details))
for d in details:
    for i in d:
        if i.auxi.get('Columns'):
            c = i.auxi['Columns']
            for j in c:
                if 'L' in j:
                    print(j)

1784


## Word2Vec for predicate keywords

In [14]:
# ## trying to represent a condition as a tree and use max/min pooling on and/or
# ## is it good? info vanishing? previously was averaging vectors for each word

# # A generic class from which all operators derive
# class Node(list):
#     @property
#     def label(self):
#         return self.__class__.__name__

#     def __repr__(self):
#         return self.label + super().__repr__()

# # Subclass for each operator: no additional logic is needed 
# class AND(Node): pass
# class OR(Node): pass

# def parse(s):
#     try:
#         # replace isnotnull(xxx) to isnotnull[xxx] TODO: add more 
#         regexs = [r"(isnotnull\(.*?\))", r"(IsNotNull\(.*?\))", r"(EqualTo\(.*?\))", r"(GreaterThan\(.*?\))", \
#             r"(LessThan\(.*?\))", r"(StringEndsWith\(.*?\))", r"(EndsWith\(.*?\))", r"(cast\(.*?\))", \
#                 r"(GreaterThanOrEqual\(.*?\))", r"(LessThanOrEqual\(.*?\))", r"(EqualTo\(.*?\))", \
#                     r"(max\(.*?\))", r"(min\(.*?\))", r"(sum\(.*?\))", r"(partial_max\(.*?\))", r"(partial_min\(.*?\))", r"(partial_sum\(.*?\))", r"(partial_avg\(.*?\))",\
#                     r"(Contains\(.*?\))", r"(StringContains\(.*?\))", r"(avg\(.*?\))", r"(sum\(.*?\))", r"(EquaulTo\(.*?\))", r"(count\(.*?\))", r"(partial_count\(.*?\))"\
#                         r"(Or\(.*?\))", r"(And\(.*?\))"]
#         def replace(x):
#             return x.group(0).replace("(","[").replace(")", "]")
#         for regex in regexs:
#             s = re.sub(regex,replace, s)

#         # Rely completely on the operators that have been defined as subclasses of Node
#         Operators = { cls.__name__ : cls for cls in Node.__subclasses__() }
#         # Create a regular expression based on those operator names (AND, OR)
#         regex = r"\s*([()]|" + "|".join(fr"\b{operator}\b" for operator in Operators.keys()) + ")\s*"
#         # Tokenise the input
#         tokens = iter((token for token in re.split(regex, s, flags=re.IGNORECASE) if token))
#         def dfs(end=")"):
#             operator = ""
#             operands = []
#             while True:
#                 token = next(tokens, "")
#                 if not token or token == ")":
#                     raise ValueError(f"Operand expected, but got '{token}'")
#                 operands.append(dfs() if token == "(" else token)
#                 token = next(tokens, "")
#                 if token == end:
#                     return Operators[operator](operands) if operator else operands[0]
#                 utoken = token.upper()
#                 if utoken in Operators:
#                     if operator and utoken != operator:
#                         raise ValueError("Use parentheses to indicate operator precedence")
#                     operator = utoken
#                 else:
#                     raise ValueError(f"{', '.join(Operators.keys())} expected, but got '{token}'")
#         return dfs("")
#     except:
#         print("failed to parse condition: "+ s)
#         return s
# tree = parse("((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))")
# print(tree)

In [15]:
# print(details[0][1])
# # parse('((((isnotnull(p_sizeL) AND isnotnull(p_type)) AND (p_sizeL = 11)) AND EndsWith(p_type, STEEL)) AND isnotnull(p_partkeyL))')
# parse('(max(total_revenue)) AND (max(total_revenue))')

In [16]:
# for i in details[0]:
#     if(i.auxi.get('Condition')):
#         for c in i.auxi['Condition']:
#             # print(c)
#             print(parse(c))

In [17]:
model_dir = "condition_word2vec_imdb.model"

from os.path import exists

# build a word2vec model for keywords in a predicate
def gen_condition_sentences(queries):
    trimmed_conditionss  = []
    for q in queries:
        conditions = [i.auxi.get("Condition") for i in q]
        conditions = [' '.join(i).replace("AND","").replace("OR", "").replace("(", " ").replace(")", " ") for i in conditions if i!=None]
        conditions = [re.sub(' +', ' ', i).split(" ") for i in conditions]
        for c in conditions:
            for i in c:
                if(i == ""): c.remove(i)
        trimmed_conditionss +=  conditions
    return trimmed_conditionss

# def gen_condition_sentence(q):
#     conditions = [i.auxi.get("Condition") for i in q]
#     conditions = [' '.join(i).replace("AND","").replace("OR", "").replace("(", " ").replace(")", " ") for i in conditions if i!=None]
#     trimmed_conditions = [re.sub(' +', ' ', i).split(" ") for i in conditions]
#     trimmed_conditions = [i for j in trimmed_conditions for i in j]
#     trimmed_conditions = [i for i in trimmed_conditions if i != ""]
#     return trimmed_conditions

sentences = gen_condition_sentences(details[:int(0.8* len(details))])
print(len(sentences))
print(len(details))
# print([i for i in details])
print(sentences[0])

from gensim.test.utils import common_texts
from gensim.models import Word2Vec

# if(not exists(model_dir)):
#     model = Word2Vec(sentences=sentences, vector_size=100, window=5, min_count=10, workers=4)
#     model.save(model_dir)
# else: model = Word2Vec.load(model_dir)
    
model = Word2Vec(sentences=sentences, vector_size=20, window=5, min_count=10, workers=4)
model.save(model_dir)

# test
# model.wv["IsNotNull"]


14998
1784
['IsNotNull', 'id']


In [18]:
sentences[:10]

[['IsNotNull', 'id'],
 ['isnotnull', 'id'],
 ['IsNotNull', 'movie_id'],
 ['isnotnull', 'movie_id'],
 ['IsNotNull', 'role_id', 'GreaterThan', 'role_id,1', 'IsNotNull', 'movie_id'],
 ['isnotnull', 'role_id', 'role_id', '>', '1', 'isnotnull', 'movie_id'],
 ['IsNotNull', 'movie_id'],
 ['isnotnull', 'movie_id'],
 ['IsNotNull', 'movie_id'],
 ['isnotnull', 'movie_id']]

In [19]:
for i in sentences:
    for j in i:
        if "L" in j:
            print(j)

LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
L

LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan
LessThan


In [20]:
from gensim.models import FastText
ft = FastText(sentences, min_count=1, vector_size=condition_dim)

from fse import Average, IndexedList
from fse.models import uSIF
sif_model = uSIF(ft)
sif_model.train(IndexedList(sentences))

sif_model.save("usif_imdb")

# sif_model2 = uSIF(ft).load("usif_new")

# with open('usif_new.pkl', 'wb') as f:
#     pickle.dump(sif_model, f)

# print(sif_model.sv[0])
# print(sif_model.infer([(sentences[0], 0)]))

In [21]:
uSIF.load("usif_imdb")

<fse.models.usif.uSIF at 0x7f8422e7eb20>

In [22]:
# # TODO in these physical plans, there are no "OR"s 
# # so the conditions can be safely flattened without caring the sequence

# # map each condition sentence to a vector

# def gen_condition_vector_old(sentence, model, dim = 100):
#     vectors = []
#     for i in sentence:
#         try:
#             v = model.wv[i]
#             vectors.append(v)
#         except:
#             print("Unknown word: {}".format(i))
#             pass
#         res = np.average(np.array(vectors), axis=0)
#         if(type(res) == np.float64):
#             res = np.zeros(dim)
#     return res

# print(sentences[2])
# print(gen_condition_vector_old(sentences[2], model, condition_dim).shape)

In [23]:
# # new version : consider AND and OR in a condition
# def gen_condition_trees_new(queries):
#     trimmed_conditionss  = []
#     for q in queries:
#         conditions = [i.auxi.get("Condition") for i in q] 
#         parsed_conditions = []
#         for op_conditions in conditions:
#             if(op_conditions == None): parsed_conditions.append([])
#             elif (len(op_conditions) == 1): # filter has one long condition
#                 print(op_conditions)
#                 parsed_conditions.append([parse(op_conditions[0])])
#             else: # scan has multiple conditions 
#                 print(op_conditions)
#                 parsed_conditions.append([parse(i) for i in op_conditions])
#         trimmed_conditionss.append(parsed_conditions)

#     return trimmed_conditionss

# def parse_tree_new(node, model):
#     if(isinstance(node, str)):
#         words = re.split(r"\[|\]|,|_", node)
#         words = [i.replace(" ","") for i in words]
#         vocab  = list(model.wv.index_to_key)
#         words = filter(lambda x: x in vocab, words)
#         vectors = [model.wv[i] for i in words if i!=""]
#         vector = np.average(np.array(vectors), axis=0)
#         return vector
#     elif(isinstance(node, AND)):
#         vectors = []
#         for i in node:
#             parsed_vector = parse_tree_new(i, model)
#             if hasattr(parsed_vector, "__len__"):
#                 vectors.append(parsed_vector)
#         print(vectors)
#         print(np.array(vectors).shape)
#         vector = np.min(np.array(vectors), axis = 0)
#         return vector
#     elif(isinstance(node, OR)):
#         vectors = [parse_tree_new(i, model) for i in node]
#         vector = np.max(np.array(vectors), axis = 0)
#         return vector
#     else:
#         raise RuntimeError(("Unknown node: ".format(node)))

# def parse_trees_new(trees, model):
#     # vectors = [parse_tree(tree, model) for tree in trees]
#     vectors = []
#     for tree in trees:
#         vectors.append(parse_tree_new(tree, model))
#     print(np.array(vectors).shape)
#     return np.max(np.array(vectors), axis = 0)

# def gen_condition_vector_new(condition, model, dim = 20):
#     if(condition == []): return np.zeros(dim)
#     elif(len(condition) == 1): 
#         return parse_tree_new(condition[0], model)
#     else:
#         trees = [parse(i) for i in condition]
#         return parse_trees_new(trees, model)

# def gen_condition_vectors_new(operations, model, dim = 20):
#     vectors = []
#     for operation in operations:
#         vectors.append(gen_condition_vector_new(operation, model, dim))
#     return np.array(vectors)



# print(len(details[2]))
# trees = gen_condition_trees_new(details)
# print(len(trees[2]))
# print(gen_condition_vectors_new(trees[2], model, condition_dim).shape)

## One-hot to encode operators, tables, and columns

In [24]:
def gen_condition_trees(queries):
    trimmed_conditionss  = []
    for q in queries:
        conditions = [i.auxi.get("Condition") for i in q] 
        parsed_conditions = []
        for op_conditions in conditions:
            if(op_conditions == None): parsed_conditions += []
            elif (len(op_conditions) == 1): # filter has one long condition
                parsed_conditions += [parse(op_conditions[0])]
            else: # scan has multiple conditions 
                parsed_conditions += [parse(i) for i in op_conditions]
        trimmed_conditionss +=  parsed_conditions
    return trimmed_conditionss

def gen_condition_tree(operation):
    op_conditions = operation.auxi.get("Condition")
    parsed_conditions = []
    if(op_conditions == None): parsed_conditions += []
    elif (len(op_conditions) == 1): # filter has one long condition
        parsed_conditions += [parse(op_conditions[0])]
    else: # scan has multiple conditions 
        parsed_conditions += [parse(i) for i in op_conditions]
    return parsed_conditions

def parse_tree(node, model):
    if(isinstance(node, str)):
        print("\n")
        print(str)
        words = re.split(r"[^a-zA-Z0-9\s]", str)
        vectors = [model.wv[i] for i in words]
        vector = np.average(np.array(vectors), axis=0)
        return vector
    elif(isinstance(node, AND)):
        vectors = [parse_tree(i, model) for i in node]
        vector = np.min(np.array(vectors), axis = 0)
        return vector
    elif(isinstance(node, OR)):
        vectors = [parse_tree(i, model) for i in node]
        vector = np.max(np.array(vectors), axis = 0)
        return vector
    else: raise RuntimeError

def parse_trees(trees, model):
    vectors = [parse_tree(tree, model) for tree in trees]
    return np.max(np.array(vectors), axis = 0)

def gen_condition_vector2(operation, model, dim = 100):
    condition = operation.auxi.get('Condition')
    if(condition == None): return np.zeros(dim)
    elif(len(condition) == 1): 
        tree = parse(condition[0])
        return parse_trees([tree], model)
    else:
        trees = [parse(i) for i in condition]
        return parse_trees(trees, model)


In [25]:
# index the keywords

def onehot(l):
    length = len(l)
    arr = np.zeros([length, length])
    for i in range(length):
        arr[i][i] = 1
    d = {}
    for idx, i in enumerate(l):
        d[i] = arr[idx]
    return d
    
tables = []
columns = []
operations = []
predicates = []
for o in details:
    for operation in o:
        operations += [operation.operator]
        if (operation.operator) == "Scan csv" or (operation.operator) == "Scan parquet":
            columns = columns + operation.auxi["Columns"]
            tables = tables + operation.auxi["Table"]
        else:
            columns = columns + operation.auxi["Columns"]
        # if(operation.auxi.get("Condition")):
        #     predicates = predicates + [operation.auxi.get("Condition").split("(")][0]
columns = (list(set(columns)))
tables = (list(set(tables)))
operations = (list(set(operations)))

print(len(columns))
print(len(tables)) # 60, 21 for 113 query set; 15, 6 for 5k query set
print(len(operations))

print(columns)

column_onehot = onehot(columns)
table_onehot = onehot(tables)
operation_onehot = onehot(operations)
table_onehot

15
6
7
['cast_info.role_id', 'movie_info.info_type_id', 'cast_info.person_id', 'movie_companies.company_id', 'title.id', 'movie_info.movie_id', 'movie_companies.company_type_id', 'movie_keyword.keyword_id', 'title.kind_id', 'movie_keyword.movie_id', 'movie_info_idx.info_type_id', 'movie_companies.movie_id', 'cast_info.movie_id', 'movie_info_idx.movie_id', 'title.production_year']


{'title': array([1., 0., 0., 0., 0., 0.]),
 'cast_info': array([0., 1., 0., 0., 0., 0.]),
 'movie_info_idx': array([0., 0., 1., 0., 0., 0.]),
 'movie_keyword': array([0., 0., 0., 1., 0., 0.]),
 'movie_companies': array([0., 0., 0., 0., 1., 0.]),
 'movie_info': array([0., 0., 0., 0., 0., 1.])}

In [26]:
# save the results 
import pickle
with open('column_onehot_imdb.pkl', 'wb') as f:
    pickle.dump(column_onehot, f)
with open('table_onehot_imdb.pkl', 'wb') as f:
    pickle.dump(table_onehot, f)
with open('operation_onehot_imdb.pkl', 'wb') as f:
    pickle.dump(operation_onehot, f)

print(len(pickle.load(open('table_onehot_imdb.pkl', 'rb'))))
print(len(pickle.load(open('operation_onehot_imdb.pkl', 'rb'))))
print(len(column_onehot))

6
7
15


In [27]:
details[0][0]
normalizes_column, normalizes_table = parse_table_metadata("imdb-metadata.json")

# np.concatenate((table_onehot['region'],normalizes_table['region']))
# np.concatenate((column_onehot['region.r_name'],normalizes_column['region.r_name']))


print(len(normalizes_table))
print(len(normalizes_column))

21
108


## Encode the entire operator

In [28]:
import pickle
column_onehot = pickle.load(open('column_onehot_imdb.pkl', 'rb'))
table_onehot = pickle.load(open('table_onehot_imdb.pkl', 'rb'))
operation_onehot = pickle.load(open('operation_onehot_imdb.pkl', 'rb'))
condition_model = Word2Vec.load('condition_word2vec_imdb.model')
normalizes_column, normalizes_table = parse_table_metadata("imdb-metadata.json")
# encode the key information of each operation
def encode_operation(operation, operation_onehot, table_onehot, column_onehot, condition_model, normalizes_column, normalizes_table):
    auxi = operation.auxi
    if (auxi.get('Table')):
        table_v = np.concatenate((table_onehot[auxi["Table"][0]], normalizes_table[auxi["Table"][0]]))
    else:
        table_v = np.zeros(len(table_onehot)+len(list(normalizes_table.values())[0]))
#     if(operation.operator == "Scan csv"):
#         table_v = table_onehot[auxi["Table"][0]]
#     else:
#         table_v = np.zeros(len(table_onehot))
    normalizes_column_size = len(list(normalizes_column.values())[0])
    try:
        column_v = [np.concatenate((column_onehot[i], normalizes_column.get(i, np.zeros(normalizes_column_size)))) for i in auxi["Columns"]]
    #     column_v = [column_onehot[i] for i in auxi["Columns"]]
        column_v = np.concatenate(column_v, axis=0)
        operator_v = operation_onehot[operation.operator]
    except:
        # print(auxi['Columns'])
        # column_v = [np.concatenate((column_onehot[i], normalizes_column.get(i, np.zeros(normalizes_column_size)))) for i in auxi["Columns"]]
        # #     column_v = [column_onehot[i] for i in auxi["Columns"]]
        # column_v = np.concatenate(column_v, axis=0)
        # operator_v = operation_onehot[operation.operator]

        column_v = np.zeros(list(column_onehot.values())[0].size)
        operator_v = np.zeros(list(operation_onehot.values())[0].size)
        print("Dealing with unseen/problematic operator: {}".format(operation))

    if(auxi.get("Condition")):
        # for min-max tree
        # tree = gen_condition_tree(operation)
        # condition_v = gen_condition_vector_new(tree, condition_model)
        
        # for sif
        condition = auxi["Condition"]
        print(condition)
        condition_v = sif_model.infer([(condition, 0)])

    else:
        condition_v = np.zeros(condition_dim)
    return operator_v, table_v, column_v, condition_v


# print(details[0][0])
operator_v, table_v, column_v, condition_v = encode_operation(details[0][0], operation_onehot, table_onehot, column_onehot, condition_model, normalizes_column, normalizes_table)
print(operator_v.shape)
print(table_v.shape)
print(column_v.shape)
print(condition_v.shape)

['IsNotNull(id)']
(7,)
(8,)
(20,)
(1, 20)


In [29]:
# details to vectors
encoded = []
# max_seq_len = max([i.shape[0] for i in structures])
max_seq_len = 57
print(max_seq_len)
for query, structure in zip(details, structures):
    q = []
    for i, operation in enumerate(query):
        operator_v, table_v, column_v, condition_v = encode_operation(operation, operation_onehot, table_onehot, column_onehot, condition_model,normalizes_column, normalizes_table)
        connectivity = structure[i]
        connectivity = np.pad(connectivity, (0,max_seq_len - len(connectivity)), "constant", constant_values = (0))
        vs = {"operator": operator_v, "column": column_v, "condition": condition_v, "table": table_v, "structure": connectivity}
        q.append(vs)
    encoded.append(q)


57
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'GreaterThan(role_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,8)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 8)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'GreaterThan(role_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id > 1)) AND isnotnull(movie_id))']

['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'LessThan(production_year,2000)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year < 2000)) AND isnotnull(id))']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'LessThan(company_id,127160)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id < 127160)) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'LessThan(person_id,3521904)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 3521904)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,2375)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 2375)) AND isnotnull(movie_id)

['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,315)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 315)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'GreaterThan(production_year,2009)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year > 2009)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'GreaterThan(kind_id,3)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id > 3)) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_

['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 1)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'EqualTo(keyword_id,1118)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id = 1118)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,1)', 'EqualTo(production_year,2010)', 'IsNotNull

['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'GreaterThan(production_year,2011)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year > 2011)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2001)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2001)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,78213)', 'IsNotNull(movie_id)']
['((isnotnull(keywor

['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 4)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'EqualTo(keyword_id,1417)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id = 1417)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,1991)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 1991)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 3)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,13654)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 13654)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'EqualTo(production_year,2012)

['IsNotNull(company_id)', 'EqualTo(company_id,19)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id = 19)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,493729)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 493729)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,1187578)', 'LessThan(role_id,11)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 1187578)) AND (role_id < 11)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)'

['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'EqualTo(keyword_id,39091)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id = 39091)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,4)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 4)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,2578432)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 2578432)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,6)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 6)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(role_id)', 'LessThan(role_id,9)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (r

['((isnotnull(person_id) AND (person_id > 2419858)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 101)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,2681287)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 2681287)) AND (role_id = 1)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 7)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,7565)', 'IsNotNull(movie_i

['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'LessThan(production_year,2010)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year < 2010)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,240473)', 'EqualTo(role_id,3)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 240473)) AND (role_id = 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,335)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 335)) AND isnotnull(movie_id))']
['partial_count(1) 

['isnotnull(id)']
['IsNotNull(company_type_id)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,845)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 845)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 7)) AND isnotnull(id))']
['IsNotNull(company_id)', 'LessThan(company_id,428)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id < 428)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,525577)', 'EqualTo(role_id,9)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND is

['isnotnull(id)']
['IsNotNull(company_id)', 'LessThan(company_id,11369)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id < 11369)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,107)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 107)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'GreaterThan(production_year,2010)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year > 2010)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,15)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 15)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 101)) AND 

['((isnotnull(info_type_id) AND (info_type_id = 16)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 101)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,1614)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 1614)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2011)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2011)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 16)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'Great

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'GreaterThan(production_year,2004)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year > 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 101)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,44523)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 44523)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,1148661)', 'LessThan(role_id,2)', 'IsNotNull(movie_id)']
['(

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,4)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 4)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,1554285)', 'LessThan(role_id,3)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 1554285)) AND (role_id < 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'GreaterThan(production_year,1936)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year > 1936)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,1487650)', 'LessThan(role_id,8)', 'IsNotN

['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'EqualTo(production_year,2009)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year = 2009)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(role_id)', 'EqualTo(role_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id = 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,7467)', 'IsNotNull(movie_id)']
['((isnotn

['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,1)', 'LessThan(production_year,1995)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 1)) AND (production_year < 1995)) AND isnotnull(id))']
['IsNotNull(company_id)', 'GreaterThan(company_id,1736)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id > 1736)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,3176078)', 'EqualTo(role_id,8)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 3176078)) AND (role_id = 8)) AND isnotnull(movie_id))']
['IsNotNull

['IsNotNull(info_type_id)', 'LessThan(info_type_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,7467)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 7467)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,15)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 15)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 101)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,34557)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 34557)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,1999)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 1999)) AND isnotnull(id))']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnot

['((isnotnull(person_id) AND (person_id < 1234513)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(company_id)', 'EqualTo(company_id,19)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id = 19)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,493729)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 493729)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_typ

['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id = 12591)) AND (company_type_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,829)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 829)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'LessThan(production_year,2007)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year < 2007)) AND isnotnull(id))']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'EqualTo(company_id,12591)', 'EqualTo(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id = 12591)) AND (company_type_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keywor

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2004)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2004)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 7)) AND isnotnull(id))']
['IsNotNull(company_id)', 'LessThan(company_id,52087)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id < 52087)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,158834)', 'GreaterThan(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 158834)) AND (role_id > 1)) AND 

['((isnotnull(keyword_id) AND (keyword_id < 3291)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,91063)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 91063)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_typ

['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,3736)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 3736)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'GreaterThan(production_year,2010)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year > 2010)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,15)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 15)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 101)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(role_id)', 'LessThan(role_id,4)',

['count(1) AND count(1)']
['IsNotNull(production_year)', 'LessThan(production_year,2012)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year < 2012)) AND isnotnull(id))']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,65820)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 65820)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 4)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isno

['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(role_id)', 'EqualTo(role_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id = 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,33285)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 33285)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,13)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 13)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3921)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keywor

['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'LessThan(production_year,1953)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year < 1953)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,335383)', 'EqualTo(role_id,5)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 335383)) AND (role_id = 5)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,6898)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 6898)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNul

['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,990535)', 'EqualTo(role_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 990535)) AND (role_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 101)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'GreaterThan(kind_id,2)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id > 2)) AND isnotnull(id))']
['IsNotNull(role_id)', 'EqualTo(role_id,6)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id = 6)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(mov

['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 4)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'EqualTo(company_id,26870)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id = 26870)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,12102)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 12102)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,

['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,2)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 2)) AND isnotnull(id))']
['IsNotNull(company_id)', 'GreaterThan(company_id,80011)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id > 80011)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'EqualTo(person_id,613664)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id = 613664)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 1)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'LessThan(person_id,759216)', 'IsNotN

['((isnotnull(keyword_id) AND (keyword_id < 335)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,33285)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 33285)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,13)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 13)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3921)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3921)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessTha

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'LessThan(person_id,1855596)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 1855596)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,7024)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 7024)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) A

['count(1) AND count(1)']
['IsNotNull(kind_id)', 'GreaterThan(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id > 1)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,3326509)', 'LessThan(role_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 3326509)) AND (role_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'GreaterThan(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id > 1)) AND isnotnull(id))']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,990535)', 'E

['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,9)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 9)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,71574)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 71574)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,1963)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 1963)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'LessThan(role_id,5)', 'IsNot

['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,2048)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 2048)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,4)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 4)) AND isnotnull(id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,1554285)', 'LessThan(role_id,3)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 1554285)) AND (role_id < 3)) AND 

['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,2048)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 2048)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'EqualTo(production_year,2007)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year = 2007)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'GreaterThan(role_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,8)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 8)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'L

['((isnotnull(keyword_id) AND (keyword_id < 44523)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'GreaterThan(production_year,2010)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year > 2010)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'LessThan(person_id,614751)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 614751)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', '

['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3921)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3921)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3588)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3588)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'EqualTo(production_year,2004)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year = 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)'

['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'LessThan(person_id,463537)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 463537)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 16)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 101)) AND isnotnull(movie_id)

['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,2909)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 2909)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2004)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'LessThan(role_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,2909)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 2909)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['Is

['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 100)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 1)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,813072)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 813072)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 99)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_

['((isnotnull(info_type_id) AND (info_type_id < 16)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2002)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2002)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 1)) AND isnotnull(movie_

['((isnotnull(keyword_id) AND (keyword_id = 16264)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3588)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3588)) AND isnotnull(movie_id)

['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,8)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 8)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,825)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 825)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'LessThan(person_id,2451133)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 2451133)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,19661)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 19661)) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,18)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 18)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'LessThan(production_year,2011)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year < 2011)) AND isnotnull(id))']
['IsNotNull(company_id)', 'Great

['IsNotNull(company_type_id)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,1535)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 1535)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'GreaterThan(person_id,2812137)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id > 2812137)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'GreaterThan(kind_id,2)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id > 2)) AND isnotnull(id))']
['IsNotNull(role_id)', 'EqualTo(role_id,6)

['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,8)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 8)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 100)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2006)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2006)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3588)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3588)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)

['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 1)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,813072)', 'EqualTo(role_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id > 813072)) AND (role_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 99)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['c

['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(person_id)', 'LessThan(person_id,2451133)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 2451133)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,99)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 99)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,1963)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 1963)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'LessThan(role_id,5)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id < 5)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 4)) AND is

['count(1) AND count(1)']
['IsNotNull(production_year)', 'EqualTo(production_year,1958)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year = 1958)) AND isnotnull(id))']
['IsNotNull(company_id)', 'GreaterThan(company_id,12740)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id > 12740)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'EqualTo(production_year,1959)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year = 1959)) AND isnotnull(id))']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'EqualTo(company_id,7851)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)

['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'EqualTo(company_id,1627)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id = 1627)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,9986)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 9986)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_type_id)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'Equa

['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year < 1999)) AND isnotnull(id))']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'LessThan(company_id,27)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id < 27)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 16)) AND isnotnull(movie_id

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,1)', 'LessThan(production_year,2012)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 1)) AND (production_year < 2012)) AND isnotnull(id))']
['IsNotNull(company_id)', 'GreaterThan(company_id,7851)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id > 7851)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,16)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 16)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,101)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 101)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'EqualTo(company_id,26870)', 'IsNotNull(

['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,2048)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 2048)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,8)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 8)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 100)) AND isnotnull(movie_id))']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,825)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 825)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
[

['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2004)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)

['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'GreaterThan(role_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(role_id)', 'LessThan(role_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id < 4)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['

['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 7)) AND isnotnull(id))']
['IsNotNull(company_id)', 'LessThan(company_id,428)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id < 428)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'LessThan(person_id,525577)', 'EqualTo(role_id,9)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND isnotnull(role_id)) AND (person_id < 525577)) AND (role_id = 9)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2006)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2006)) AND isnotn

['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 100)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(person_id)', 'LessThan(person_id,1855596)', 'IsNotNull(movie_id)']
['((isnotnull(person_id) AND (person_id < 1855596)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,7024)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 7024)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'LessThan(kind_id,4)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id < 4)) AND is

['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']
['IsNotNull(company_type_id)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,3)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 3)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'GreaterThan(kind_id,1)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id > 1)) AND isnotnull(id))']
['IsNotNull(company_type_id)', 'EqualTo(company_type_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(company_type_id) AND (company_type_id = 2)) AND isnotnull(movie_id))']
['IsNotNull(person_id)', 'IsNotNull(role_id)', 'GreaterThan(person_id,990535)', 'EqualTo(role_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(person_id) AND is

['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,2002)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year > 2002)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2004)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'LessThan(role_id,2)', 'IsNotNull(movie_id)

['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'EqualTo(company_id,1627)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id = 1627)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,9986)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 9986)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'GreaterThan(production_year,1999)', 'IsNotNull(id)']
['

['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,103)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 103)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'EqualTo(production_year,1997)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year = 1997)) AND isnotnull(id))']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,10922)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 10922)) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'EqualTo(keyword_id,687)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id = 687)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1)

['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,4)', 'GreaterThan(production_year,1992)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 4)) AND (production_year > 1992)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'EqualTo(info_type_id,1)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id = 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'GreaterThan(info_type_id,100)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id > 100)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'LessThan(kind_id,7)', 'LessThan(production_year,1960)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id < 7)) AND (production_year < 1960)) AND isnotnull(id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,5)

['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,33285)', 'GreaterThan(company_type_id,1)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id > 33285)) AND (company_type_id > 1)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,13)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 13)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'GreaterThan(keyword_id,3921)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id > 3921)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'GreaterThan(company_id,33285)', 'GreaterThan(company_type_id,1)', 'IsNotNull(mov

['count(1) AND count(1)']
['IsNotNull(id)']
['isnotnull(id)']
['IsNotNull(company_id)', 'IsNotNull(company_type_id)', 'EqualTo(company_id,19)', 'LessThan(company_type_id,2)', 'IsNotNull(movie_id)']
['((((isnotnull(company_id) AND isnotnull(company_type_id)) AND (company_id = 19)) AND (company_type_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,6)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_type_id < 6)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,6663)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 6663)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(production_year)', 'EqualTo(production_year,2007)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year = 2007)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'GreaterThan

['((isnotnull(info_type_id) AND (info_type_id > 4)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,7)', 'EqualTo(production_year,2004)', 'IsNotNull(id)']
['((((isnotnull(kind_id) AND isnotnull(production_year)) AND (kind_id = 7)) AND (production_year = 2004)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(role_id)', 'LessThan(role_id,2)', 'IsNotNull(movie_id)']
['((isnotnull(role_id) AND (role_id < 2)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,2909)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 2909)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'IsNotNull(production_year)', 'EqualTo(kind_id,1)', 'LessThan(production_year,2012)', 'IsNotNu

['count(1) AND count(1)']
['IsNotNull(production_year)', 'LessThan(production_year,2011)', 'IsNotNull(id)']
['((isnotnull(production_year) AND (production_year < 2011)) AND isnotnull(id))']
['IsNotNull(company_id)', 'GreaterThan(company_id,1060)', 'IsNotNull(movie_id)']
['((isnotnull(company_id) AND (company_id > 1060)) AND isnotnull(movie_id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(keyword_id)', 'LessThan(keyword_id,31445)', 'IsNotNull(movie_id)']
['((isnotnull(keyword_id) AND (keyword_id < 31445)) AND isnotnull(movie_id))']
['partial_count(1) AND count']
['count(1) AND count(1)']
['IsNotNull(kind_id)', 'EqualTo(kind_id,7)', 'IsNotNull(id)']
['((isnotnull(kind_id) AND (kind_id = 7)) AND isnotnull(id))']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(movie_id)']
['isnotnull(movie_id)']
['IsNotNull(info_type_id)', 'LessThan(info_type_id,4)', 'IsNotNull(movie_id)']
['((isnotnull(info_type_id) AND (info_ty

In [30]:
# 0 padding for each operator
column_len = []
for i in encoded:
    for j in i:
        column_len.append(j["column"].shape)
# max_column_len = max(column_len)[0]
max_column_len=100
print(max_column_len)
vs = []
for i, plan_id, query_id, t, r, g in zip(encoded, plan_ids, query_ids, times, resources, group_idxs):
    v = []
    for j in i:
        j["column"] = np.pad(j["column"], (0,max_column_len - len(j["column"])), "constant", constant_values = (0))
        # print(j["condition"])
        v.append(np.concatenate((j["operator"], j["table"], j["column"], j["condition"], j["structure"]), axis=None))
        # print(j["operator"].shape)
        # print(j["table"].shape)
        # print(j["column"].shape)
        # print(j["condition"].shape)
        # print(j["structure"].shape)
    v = np.vstack(v)
    print(v.shape)
    vs.append((plan_id, query_id, v, t, r, g))

100
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(10, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(10, 192)
(25, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(15, 192)
(10, 192)
(15, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(15, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(15, 192)
(10, 192)
(15, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(15, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(10, 192)
(15, 192)
(10, 192)
(20, 192)
(15, 192)
(15, 192)
(15, 192)
(10, 192)
(10, 192)
(15, 192)
(15, 192)
(20, 192)
(15, 192)
(15, 192)
(10, 192)
(10, 192)
(10, 1

(20, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(15, 192)
(25, 192)
(15, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(15, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)


(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(20, 192)
(20, 192)
(15, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(15, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(15, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(25, 192)
(20, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)
(20, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(25, 192)
(20, 192)
(25, 192)


In [31]:
len(j["table"])

8

## save the parsed plans

In [32]:
# import torch
# d = {}
# vs2 = []
# for plan_id, query_id, v, t in vs:
#     v = torch.tensor(v, dtype=torch.float32)
#     d[(int(query_id), int(plan_id))] = (v, float(t))
#     vs2.append(v)
# vs2 = np.array(vs2)
# np.save("encoded_plans_job_2.npy", vs2)
# with open('encoded.pkl_2', 'wb') as f:
#     pickle.dump(d, f)

#     # d: {(qId, pId) -> (v, t)}

In [33]:
import torch
d = {}
vs2 = []
for i, (plan_id, query_id, v, t, r, g) in enumerate(vs):
    v = torch.tensor(v, dtype=torch.float32)
    d[(query_id, plan_id, g, i)] = (v, float(t), r)
    vs2.append(v)
vs2 = np.array(vs2)
np.save("encoded_plans_imdb.npy", vs2)
with open('encoded_imdb.pkl', 'wb') as f:
    pickle.dump(d, f)

    # d: {(qId, pId, group) -> (v, t, resources)}

  vs2 = np.array(vs2)
  vs2 = np.array(vs2)


In [34]:
len(d)

1784

In [35]:
# print(vs[0][2].shape)
# print(details[0])
# print(structures[0])
# print(physical_plans[0])
# with open("test_plan.pkl", "wb") as fp:  
#     pickle.dump(physical_plans[0], fp)
# with open("test_v.pkl", "wb") as fp:  
#     pickle.dump(vs[0][2], fp)

In [36]:
# # check vector
# key = 15
# v = np.load("encoded_plans_w_structure.npy", allow_pickle=True)[key]
# print(v.shape)
# print(physical_plans[key])

# with open('column_onehot.pkl', 'rb') as f:
#     column_onehot = pickle.load(f)

# with open('table_onehot.pkl', 'rb') as f:
#     table_onehot = pickle.load(f)

# with open('operation_onehot.pkl', 'rb') as f:
#     operation_onehot = pickle.load(f)

# for i,o in enumerate(v):
#     print("")
#     print("{}:".format(i))
#     operator_v = o[:9]
#     table_v = o[9: 15]
#     column_v = o[15: 93]
#     predicate_v = o[93:193]
#     connectivity_v = o[193:]

#     operator = ""
#     for k,v in operation_onehot.items():
#         if(np.array_equal(v, operator_v)):
#             operator = k
#             break

#     table = ""
#     for k,v in table_onehot.items():
#         if(np.array_equal(v, table_v)):
#             table = k
#             break

#     column1, column2, column3 = "","",""
#     for k,v in column_onehot.items():
#         if(np.array_equal(v, column_v[:26])):
#             column1 = k
#         if(np.array_equal(v, column_v[26:52])):
#             column2 = k
#         if(np.array_equal(v, column_v[52:])):
#             column3 = k
#     column = [column1, column2, column3]

#     print("operator: {}".format(operator))
#     print("table: {}".format(table))
#     print("column: {}".format(column))
#     print("connectivity: {}".format(connectivity_v))

In [37]:
normalizes_table

{'aka_name': array([0.02248885, 0.04735919]),
 'aka_title': array([0.00901886, 0.02635202]),
 'cast_info': array([0.90431028, 0.73516924]),
 'char_name': array([0.07835266, 0.15108064]),
 'comp_cast_type': array([9.98015335e-08, 3.76858060e-08]),
 'company_name': array([0.00586327, 0.01242559]),
 'company_type': array([9.98015335e-08, 6.49355427e-08]),
 'complete_cast': array([0.00337045, 0.00125313]),
 'info_type': array([2.81939332e-06, 1.37698137e-06]),
 'keyword': array([0.00334759, 0.00249584]),
 'kind_type': array([1.74652684e-07, 6.95737958e-08]),
 'link_type': array([4.49106901e-07, 1.98285318e-07]),
 'movie_companies': array([0.06509877, 0.04848882]),
 'movie_info': array([0.3701569 , 0.55110013]),
 'movie_info_idx': array([0.0344324 , 0.02000379]),
 'movie_keyword': array([0.11287379, 0.0314747 ]),
 'movie_link': array([0.00074844, 0.00027827]),
 'name': array([0.1039805 , 0.23045722]),
 'person_info': array([0.07394455, 0.22700304]),
 'role_type': array([2.99404601e-07, 1.25