In [1]:
import networkx as nx
import math
import matplotlib.pyplot as plt
from networkx.algorithms import tree
import pandas
import pandasgen
import os
import subprocess

In [2]:
home_path = '/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/'
common_edges = {}

In [3]:
def read_edges(path='./result/'): #'./result_approx/'
    metrics = ['cell_PK', 'col'] #, 'col'] #, 'rel' 'cell_noPK'
    edges_metrics = []
    for metric in metrics:
        file_name= path + 'infered_mst_' + metric + '.csv'
        #home_path + 'src/preserving_ops/infered_mst_' + metric + '.csv'
        edges = []
        with open(file_name) as f:
            for line in f:
                tokens = line.strip().split(',')
                e = (tokens[0], tokens[1], tokens[2])
                edges.append(e)
        edges_metrics.append(edges)
    return edges_metrics

In [4]:
######## Synthetic dataset generation
def data_gen(row_cnt, col_cnt, version_cnt, branch_factor, freq):
    ds,errors = pandasgen.generate__dataset((row_cnt,col_cnt), version_cnt, scale=branch_factor, snapshot_freq=freq)
    print (errors)
    # Visualize this workflow
    #ds.lineage.draw_graph(canvas_size=(50,50))
    return ds.lineage.graph.edges

In [5]:
####### Building Blocks for lineage inference

def profiling(dir_path, endwith, result_dir):
    files=sorted(os.listdir(dir_path))
    print (files)
    artifacts = ''
    for f in files:
        if f.endswith(endwith): #f.startswith('repo'): #
            artifacts += dir_path + f + ','
    print ("----------profiling ----------")
    proc = subprocess.Popen(['./src/profiling/profiling', '-dir', artifacts, '-result', result_dir], stdout=subprocess.PIPE)
    output = proc.communicate()[0]
    print('Got stdout:', str(output).replace('\\r', '\n').replace('\\n', '\n'))

def pre_clustering(clustering_strategy, result_dir):
    print ("---------pre_clustering----------")
    proc = subprocess.Popen(['./src/pre_clustering/pre_clustering', clustering_strategy, '-result', result_dir], stdout=subprocess.PIPE)
    output = proc.communicate()[0]
    print('Got stdout:', str(output).replace('\\r', '\n').replace('\\n', '\n'))


def clear_files(dir='./result'):
    cmd = 'rm ' + dir + '/*.csv'
    os.system(cmd)
#     cmd = 'rm ' + dir + '_approx/*.csv'
#     os.system(cmd)

def preserving_ops(dir_path, endwith, result_dir, with_pk, approx=0):
    files=sorted(os.listdir(dir_path))
    print (files)
    artifacts = ''
    for f in files:
        if f.endswith(endwith): #f.startswith('repo'): #
            artifacts += dir_path + f + ','
    print ("----------preserving-ops----------")
    if approx != 0:
        proc = subprocess.Popen(['./src/preserving_ops/preserving_ops', '-dir', artifacts, '-result', result_dir, '-approx', str(approx)], stdout=subprocess.PIPE)
    else:
        if with_pk:
            proc = subprocess.Popen(['./src/preserving_ops/preserving_ops', '-dir', artifacts, '-result', result_dir], stdout=subprocess.PIPE)
        else:
            proc = subprocess.Popen(['./src/preserving_ops/preserving_ops', '-dir', artifacts, '-result', result_dir, '-noPK'], stdout=subprocess.PIPE)
    output = proc.communicate()[0]
    print('Got stdout:', str(output).replace('\\r', '\n').replace('\\n', '\n'))


In [6]:
def lineage_inference(pre_clustering_strategy, result_dir, sketch_size=100):
    #result_dir = "./result"
    with_pk = False
    clear_files(result_dir)
    #clear_files(result_dir + "_approx")
    profiling('./dataset/', '.csv', result_dir) #retail/') #nb_123977/artifacts/')
    pre_clustering('-no_pre_cluster', result_dir)  #('-no_pre_cluster')# '-exact_schema'
    #preserving_ops('./dataset/', '.csv', result_dir, with_pk) #nb_123977/artifacts/') # /repo_user/')
    preserving_ops('./dataset/', '.csv', result_dir, True)
    preserving_ops('./dataset/', '.csv', result_dir, True, sketch_size)


In [7]:
### calculate statistics
def calculate_common_edge(inferred):
    result = []
    for infer_edges in inferred:
        common = 0
        for e in infer_edges:
            key = e[0] + " " + e[1]
            if key in e_dict:
                common += 1
        result.append(common)
    return result

In [10]:
row_cnt = 10000
col_cnt = 20
branch_factor = 1.5
version_cnt = [40] # 20, 40, 50
materialize_freq = [2] # [,2,3,4,5]
repeat = 1


for v_cnt in version_cnt:
    for freq in materialize_freq:
        result = [] # 10 sample exp
        for i in range(repeat):
            # generate data
            clear_files("./dataset")
            real_edges = data_gen(row_cnt, col_cnt, v_cnt, branch_factor, freq)
            e_dict = {}
            for e in real_edges:
                e_dict[e[0] + ".csv " + e[1] + ".csv"] = 1
                e_dict[e[1]+ ".csv " + e[0]+ ".csv"]= 1
            lineage_inference('-no_pre_cluster', './result', 400)
            inferred = read_edges()
            no_pre_cluster_result = calculate_common_edge(inferred)
            inferred_approx = read_edges('./result_approx/')
            approx_result = calculate_common_edge(inferred_approx)
#             lineage_inference('-exact_schema')
#             inferred = read_edges()
#             exact_schema_result = calculate_common_edge(inferred)
            result.append([no_pre_cluster_result, approx_result])
        common_edges[str(v_cnt) + ',' + str(freq)] = result

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[{'sample': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}, {'iloc': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv'

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[{'iloc': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./d

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[{'add_rows': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[{'sample': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}, {'iloc': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 85, in apply_ops\n    raise pd.errors.EmptyDataError\npandas.errors.EmptyDataError\n'}, {'sample': 'Traceback (most recent call last):\n  File "/Users/silu/Documents/2019-spring/lineage-inference/EXP/RELIC_Silu/pandasgen.py", line 318, in generate__dataset\n    dataset.apply_ops(ops)\n  File "/Users/silu/Documents/2019-spring

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

[]
['.DS_Store', '0.csv', '1.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '2.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '3.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv']
----------profiling ----------
Got stdout: b'./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./datase

Got stdout: b'16807 --------- 282475249
1457850878 --------- 1458777923
./dataset/0.csv,./dataset/1.csv,./dataset/10.csv,./dataset/11.csv,./dataset/12.csv,./dataset/13.csv,./dataset/14.csv,./dataset/15.csv,./dataset/16.csv,./dataset/17.csv,./dataset/18.csv,./dataset/19.csv,./dataset/2.csv,./dataset/20.csv,./dataset/21.csv,./dataset/22.csv,./dataset/23.csv,./dataset/24.csv,./dataset/25.csv,./dataset/26.csv,./dataset/27.csv,./dataset/28.csv,./dataset/29.csv,./dataset/3.csv,./dataset/30.csv,./dataset/31.csv,./dataset/32.csv,./dataset/33.csv,./dataset/34.csv,./dataset/35.csv,./dataset/36.csv,./dataset/37.csv,./dataset/38.csv,./dataset/39.csv,./dataset/4.csv,./dataset/5.csv,./dataset/6.csv,./dataset/7.csv,./dataset/8.csv,./dataset/9.csv,--------- total number of files: 40 ---------
---------load row_l2g row_l2g from file ----------- 
40 files in total. 
---------load col_g2l col_l2g from file ----------- 
40 files in total. 
----------------load files---------------
In total 40 files 
-----

In [12]:
common_edges

{'40,2': [[[37, 27], [30, 26]],
  [[39, 33], [32, 30]],
  [[37, 32], [35, 30]],
  [[38, 33], [28, 33]],
  [[33, 31], [28, 29]],
  [[39, 34], [34, 32]],
  [[35, 31], [28, 30]],
  [[30, 30], [27, 29]],
  [[34, 30], [28, 30]],
  [[36, 33], [27, 30]]]}

In [13]:
for key, val in common_edges.items():
        for each_iter in val:
            print (each_iter[0][0], each_iter[0][1])
            print (each_iter[1][0], each_iter[1][1])

37 27
30 26
39 33
32 30
37 32
35 30
38 33
28 33
33 31
28 29
39 34
34 32
35 31
28 30
30 30
27 29
34 30
28 30
36 33
27 30
