In [19]:
import subprocess
import re
import os
import numpy as np

def modify_first_line(file_path):
    """파일의 첫 번째 줄을 '노드 수 엣지 수' 형식으로 변경"""
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # 첫 번째 줄을 "노드 수 엣지 수" 형식으로 변경
    node_edge_info = lines[1].strip().split()
    node_count = node_edge_info[0]
    edge_count = node_edge_info[2]
    lines[0] = f"{node_count} {edge_count}\n"

    with open(file_path, 'w') as file:
        file.writelines(lines)

def run_subgraph_counts(file_path):
    """subgraph_counts.py를 실행하고 결과를 반환"""
    command = ['python', './escape/wrappers/subgraph_counts.py', file_path, '5', '-i']
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    return result.stdout

def parse_output(output):
    """subgraph_counts.py의 출력을 파싱하여 매핑된 패턴과 값을 딕셔너리로 반환"""
    mapping = {
        "4-1": "4-clique",
        "4-2": "Diamond",
        "4-3": "Tailed triangle",
        "4-4": "4-cycle",
        "4-5": "3-star",
        "4-6": "3-path",
        "5-1": "4-star",
        "5-2": "Prong",
        "5-3": "4-path",
        "5-4": "Forktailed-tri",
        "5-5": "Lontailed-tri",
        "5-6": "Doubletailed-tri",
        "5-7": "Tailed-4-cycle",
        "5-8": "5-cycle",
        "5-9": "Hourglass",
        "5-10": "Cobra",
        "5-11": "Stingray",
        "5-12": "Hatted-4-cycle",
        "5-13": "3-wedge-col",
        "5-14": "3-tri-collision",
        "5-15": "Tailed-4-clique",
        "5-16": "Triangle-strip",
        "5-17": "Diamond-wed-col",
        "5-18": "4-wheel",
        "5-19": "Hatted-4-clique",
        "5-20": "Almost-5-clique",
        "5-21": "5-clique"
    }
    
    reverse_mapping = {v: k for k, v in mapping.items()}
    pattern_counts = {}

    for line in output.split("\n"):
        match = re.match(r"(\w[\w\s\-]*)\s+(\d+)\s+(\d+)\s+([\d\.]+)", line)
        if match:
            pattern_name = match.group(1).strip()
            count = float(match.group(3).strip())
            if pattern_name in reverse_mapping:
                mapped_name = reverse_mapping[pattern_name]
                if mapped_name not in pattern_counts:
                    pattern_counts[mapped_name] = []
                pattern_counts[mapped_name].append(count)

    return pattern_counts

def average_counts(pattern_counts):
    """각 패턴의 값을 평균내어 반환"""
    averaged_counts = {pattern: np.mean(counts) for pattern, counts in pattern_counts.items()}
    return averaged_counts

def main():
    base_file_path = './datasets/email-univ.mtx'
    random_folder_path = './datasets/email-univ_random'

    # Base 파일 처리
    modify_first_line(base_file_path)
    output = run_subgraph_counts(base_file_path)
    base_counts = parse_output(output)

    # Base 파일 결과 출력
    print("Base File Results:")
    print("{")
    for key in sorted(base_counts.keys()):
        print(f'    "{key}" : {base_counts[key][0]:.2f},')  # base file has single value for each pattern
    print("}")

    # Random graph files 처리
    random_counts = {}
    for i in range(1, 11):
        file_path = os.path.join(random_folder_path, f'email-univ_r{i}.mtx')
        modify_first_line(file_path)
        output = run_subgraph_counts(file_path)
        counts = parse_output(output)

        # 각 패턴의 값을 합산
        for pattern, count_list in counts.items():
            if pattern not in random_counts:
                random_counts[pattern] = []
            random_counts[pattern].extend(count_list)

    # 평균 계산
    averaged_counts = average_counts(random_counts)

    # Random graph 파일 결과 출력
    print("Random Graphs Results (Averaged):")
    print("{")
    for key in sorted(averaged_counts.keys()):
        print(f'    "{key}" : {averaged_counts[key]:.2f},')
    print("}")

if __name__ == "__main__":
    main()


Base File Raw Output:
 Running command: ./escape/exe/count_five ./datasets/email-univ.mtx
Loading graph
Converting to CSR
Relabeling graph
Creating DAG
Counting 3-vertex
Counting 4-vertex
Getting easy four vertex patterns
Getting four cycles
Getting four cliques
Counting 5-vertex
Getting all triangles
Also getting reverse triangle info
Counting trees
Counting triangle based patterns
Counting 4-cycle and 4-clique based patterns
Counting five cycles
Counting collision patterns
Counting almost cliques
Basic size
Vertices	 1133
Edges		 5451
3 vertex patterns
Ind set		 241761806 235689480 0.97
Only edge	 6163384 5986609 0.97
Wedge		 96402 80373 0.83
Triangle	 5343 5343 1.00
4 vertex patterns
Ind set		 68297710195 64930817277 0.95
Only edge	 3482312242 3259425872 0.94
Matching	 14749398 13349798 0.91
Only wedge	 108934260 86446054 0.79
Only triangle	 6037590 5765708 0.95
3-star		 817936 546054 0.67
3-path		 1755241 1105885 0.63
Tailed triangle	 340646 216794 0.64
4-cycle		 43589 12626 0.29
D

In [2]:
def convert_mtx_to_edges(mtx_file_path, edges_file_path):
    with open(mtx_file_path, 'r') as mtx_file:
        lines = mtx_file.readlines()
    
    # Extract number of nodes and edges
    node_edge_info = lines[1].strip().split()
    node_count = int(node_edge_info[0])
    edge_count = int(node_edge_info[2])

    edges = []
    for line in lines[2:]:
        if line.strip():  # Skip empty lines
            nodes = list(map(int, line.strip().split()))
            if len(nodes) == 2:
                nodes = [node - 1 for node in nodes]
                nodes.sort()
                edges.append(nodes)

    # Write to .edges file
    with open(edges_file_path, 'w') as edges_file:
        edges_file.write(f"{node_count} {edge_count}\n")
        for edge in edges:
            edges_file.write(f"{edge[0]} {edge[1]}\n")

# Perform the conversion
mtx_file_path = './datasets/soc-pages-food.mtx'
edges_file_path = './datasets/soc-pages-food.edges'

convert_mtx_to_edges(mtx_file_path, edges_file_path)

# Output the transformed file content for verification
with open(edges_file_path, 'r') as edges_file:
    transformed_lines = edges_file.readlines()

transformed_lines[:10]


['620 2102\n',
 '0 276\n',
 '0 58\n',
 '0 132\n',
 '0 603\n',
 '0 398\n',
 '0 555\n',
 '1 265\n',
 '1 611\n',
 '2 265\n']