In [1]:
import codecs
import networkx as nx
import os
import pandas as pd
import pickle
import re
from networkx.algorithms import isomorphism
from os import listdir
from os.path import isfile, join
from utils import printProgressBar



# context from gSpan

In [None]:
number_of_families = 3920
input_dir = 'gSpan_s100_l3'
file_names = [f for f in listdir(input_dir) if isfile(join(input_dir, f))]
context = pd.DataFrame()
for file_name in file_names:
    if 'members' not in file_name:
        continue
    graph_number = file_name.split('.')[0].split('_')[2]
    membership = pd.Series([0] * number_of_families)
    with codecs.open(join(input_dir, file_name), 'r', encoding='utf-8') as f:
        for line in f:
            membership[int(line.strip('\n').strip('\r').strip('F'))] = 1
    context[graph_number] = membership
context.info()

In [None]:
context['total_graph'] = context.sum(axis=1)
context.loc[context['total_graph'] > 0]

In [None]:
context.drop(columns=['total_graph'], inplace=True)

In [None]:
context_file_name = 'context.csv'
context.to_csv(context_file_name, header=False, index=False)

# context from families' proper graphs

In [3]:
input_dir = 'graph_binary'
input_files = [f for f in listdir(input_dir) if isfile(join(input_dir, f))]
subgroup_prev = ''
context = pd.DataFrame()
counter = 0
for subgraph in input_files:
    subgroup_id = subgraph.split('-')[0]
    #if subgroup_id != 'F03890':
        #continue
    if subgroup_id == subgroup_prev:
        continue
    membership = list()
    G2 = pickle.load(open(join(input_dir, subgraph), 'rb'))
    supergroup_prev = ''
    is_subgraph = 0
    for supergraph in input_files:
        supergroup_id = supergraph.split('-')[0]
        if supergroup_id == subgroup_id:
            membership.append(1)
            continue
        if supergroup_id == supergroup_prev:
            membership.append(membership[-1])
            continue
        G1 = pickle.load(open(join(input_dir, supergraph), 'rb'))
        GM = isomorphism.DiGraphMatcher(G1, G2, node_match=lambda v1,v2: v1['label'] == v2['label'], edge_match=lambda e1,e2: e1['label'] == e2['label'])
        if GM.subgraph_is_isomorphic():
            membership.append(1)
        else:
            membership.append(0)
        supergroup_prev = supergroup_id
    subgroup_prev = subgroup_id
    context[subgroup_id.replace('F', 'G')] = membership
    counter += 1
    printProgressBar(counter, 3903, prefix = 'Progress:', suffix = 'Complete', length = 50)
context.index = input_files

Progress: |█████████████████████████████████████████████████-| 99.6% Complete

In [4]:
context.to_csv('context_with_header.csv')

# Run AOC-poset

In [5]:
context_with_header = pd.read_csv('context_with_header.csv', header=0, index_col=0)
context_without_header = 'context_without_header.csv'
context_with_header.to_csv(context_without_header, header=False, index=False)

In [6]:
os.system('java -jar AOCPosetBuilder.jar -i ' + context_without_header + ' -a HERMES -d posets/families_simplified.dot -f SIMPLIFIED -z')
os.system('dot -Tpdf posets/families_simplified.dot -o posets/families_simplified.pdf')
os.system('java -jar AOCPosetBuilder.jar -i ' + context_without_header + ' -a HERMES -d posets/families_full.dot -f FULL -z')
#os.system('java -jar AOCPosetBuilder.jar -i ' + context_without_header + ' -a HERMES -d posets/families_minimal.dot -f MINIMAL')

0

Adding extent & intent size to the minimal version

In [2]:
file_min = codecs.open('posets/families_minimal.dot', 'w')
file_simp = 'posets/families_simplified.dot'
with codecs.open(file_simp) as f:
    for line in f:
        if 'graph' in line or 'rankdir' in line or '->' in line or line == '}':
            file_min.write(line)
            continue
        concept_id = re.search('<(.*)>', line).group(1)
        line_elements = line.split('|')
        line_elements[0] = line_elements[0].replace('> (', '> ' + concept_id + '|(').replace('I', 'Graphs').replace('E', 'Families')
        to_be_written = '|'.join(line_elements) + '\n'
        file_min.write(line_elements[0] + '}\"];\n')
file_min.close()
os.system('dot -Tpdf posets/families_minimal.dot -o posets/families_minimal.pdf')

0