In [1]:
import os
import pandas as pd
import networkx as nx

In [2]:
def read_vertex_cover(file_path):
    vertex_cover = set()
    with open(file_path, 'r') as f:
        for line in f:
            if line.startswith('c') or line.strip() == '':
                continue
            if line.startswith('s'):
                continue
            vertex_cover.add(int(line.strip())-1)
    return vertex_cover


In [3]:
def read_graph(file_path):
    G = nx.Graph()
    with open(file_path, 'r') as f:
        for i, line in enumerate(f):
            if line.startswith('c') or line.strip() == '':
                continue
            if i == 0:
                num_vertices = int(line)
                # Add isolated vertices
                G.add_nodes_from(range(num_vertices))
                continue
            if i == 1:
                continue
            else:
                u, v = map(int, line.split())
                G.add_edge(u, v)
    return G

In [4]:
def construct_dataframe(solutions_folder, embeddings_folder, output_file):
    all_data = []
    
    for file_name in os.listdir(embeddings_folder):
        if file_name.endswith('.emb'):
            base_name = os.path.splitext(file_name)[0]
            embedding_path = os.path.join(embeddings_folder, file_name)
            vc_path = os.path.join(solutions_folder, f"{base_name}.mtx.vc")

            if not os.path.exists(vc_path):
                print(f"No vertex cover file for {base_name}. Skipping...")
                continue
            
            # Read embeddings
            embeddings = pd.read_csv(embedding_path, sep=' ', skiprows=1, header=None)
            embeddings = embeddings.set_index(0)
            
            # Read vertex cover
            vertex_cover = read_vertex_cover(vc_path)
            
            # Create dataframe rows
            for node, embedding in embeddings.iterrows():
                in_vertex_cover = 1 if int(node) in vertex_cover else 0
                all_data.append([f"{base_name}_{node}"] + embedding.tolist() + [in_vertex_cover])


    # each row contains 
    columns = ['id'] + [f'dim_{i}' for i in range(embeddings.shape[1])] + ['vc']
    df = pd.DataFrame(all_data, columns=columns)
    

    # Create the directory if it doesn't exist
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    # Save the dataframe to a CSV file, overwriting if it exists
    df.to_csv(output_file, index=False)
    print(f"Dataframe saved to {output_file}")
    return df

In [5]:
def construct_dataframe_alternate_features(graphs_folder, solutions_folder, output_file):
    all_data = []

    counter = 0
    
    for file_name in os.listdir(graphs_folder):
        if file_name.endswith('.mtx'):
            base_name = os.path.splitext(file_name)[0]
            graph_path = os.path.join(graphs_folder, file_name)
            vc_path = os.path.join(solutions_folder, f"{base_name}.mtx.vc")

            if not os.path.exists(vc_path):
                print(f"No vertex cover file for {base_name}. Skipping...")
                continue
            
            # Read embeddings
            G = read_graph(graph_path)
            betweenness_centrality = nx.betweenness_centrality(G, normalized=True)
            closeness_centrality = nx.closeness_centrality(G)
            page_rank = nx.pagerank(G)
            #eigenvector_centrality = nx.eigenvector_centrality(G)

            
            # Read vertex cover
            vertex_cover = read_vertex_cover(vc_path)
            

            for node in G.nodes():
                degree = G.degree(node)
                clustering_coefficient = nx.clustering(G, node)
                #betweenness_centrality[node]
                #closeness_centrality[node]
                #page_rank[node]
                #eigenvector_centrality[node]
                in_vertex_cover = 1 if int(node) in vertex_cover else 0
                all_data.append([f"{base_name}_{node}", degree, clustering_coefficient, betweenness_centrality[node], closeness_centrality[node], page_rank[node], in_vertex_cover])

            counter += 1
            if counter % 10 == 0:
                print(f"Processed {counter} graphs")

    # each row contains 
    columns = ['id', 'degree', 'clustering', 'betweenness', 'closeness', 'pagerank', 'vc']
    df = pd.DataFrame(all_data, columns=columns)
    
    # Create the directory if it doesn't exist
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    # Save the dataframe to a CSV file, overwriting if it exists
    df.to_csv(output_file, index=False)
    print(f"Dataframe saved to {output_file}")
    return df

In [7]:
# Example usage
construct_dataframe_alternate_features('./new_graphs/', './new_results/', './dataframe/new_vertexcover_1.csv')

Dataframe saved to ./dataframe/new_vertexcover.csv


Unnamed: 0,id,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,...,dim_55,dim_56,dim_57,dim_58,dim_59,dim_60,dim_61,dim_62,dim_63,vc
0,road-germany-osm-870_48,-0.218458,-0.308010,0.411398,0.564421,0.239055,-0.023074,0.088772,-0.077043,-0.968173,...,0.297791,0.385316,-0.718415,0.026524,0.295321,0.587453,-0.541224,0.433240,-0.579376,0
1,road-germany-osm-870_84,0.014478,0.145214,-0.226025,0.348184,0.431266,-0.307672,-0.185224,0.104396,-0.527833,...,0.132472,0.223778,-0.019925,0.304500,-0.218851,0.638966,0.222167,0.220398,0.175287,1
2,road-germany-osm-870_53,0.010709,-0.686221,0.842412,0.320028,0.052189,-0.496863,0.573810,-0.180794,-0.346258,...,0.126022,-0.325621,-0.159384,-0.034375,0.369887,0.221446,-0.234478,0.299917,-0.360503,1
3,road-germany-osm-870_87,0.156471,-0.124547,0.078685,0.302516,0.733372,-0.325905,-0.646655,0.198423,-0.324329,...,0.041563,0.238015,0.171942,0.349895,-0.054096,0.325964,-0.168214,0.209359,0.118543,1
4,road-germany-osm-870_75,-0.050810,0.037539,0.062119,0.537865,0.571625,-0.228491,-0.250273,0.083184,-0.530751,...,-0.001170,0.185902,0.055538,0.339540,0.039305,0.282012,0.373325,0.417216,0.219205,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151146,road-germany-osm-688_148,-0.956480,-0.122923,0.162614,0.251543,0.517386,0.030934,0.070044,-0.261513,-0.557620,...,0.437850,0.038455,-0.136569,-0.120588,0.530312,1.032468,-0.149987,-0.071809,0.694658,0
151147,road-germany-osm-688_93,-0.727900,0.097949,0.524382,0.092523,-0.065884,-0.129169,0.207942,0.108298,-0.586053,...,0.609660,-0.389505,0.170519,0.033831,0.084101,-0.369159,-0.256467,-0.161799,-0.202765,0
151148,road-germany-osm-688_110,0.390123,0.106508,0.445037,0.045064,0.194372,-0.379080,-0.099030,0.326904,-0.461446,...,0.725566,0.077722,-0.493433,0.041155,-0.212917,0.270339,0.305930,-0.229284,0.402413,0
151149,road-germany-osm-688_139,0.263927,-0.526501,0.521481,0.163034,0.176480,0.201539,-0.657257,-0.081229,-0.046403,...,0.644257,0.096094,-0.558761,0.307621,-0.212630,0.099911,0.249569,-0.470689,-0.004125,0
