## Creating BA networks

In [60]:
import networkx as nx
import matplotlib.pyplot as plt

# BA Parameters: (#nodes, avg_edge_per_node)
BA_info = [(1000,4),(1000,10), (1000,20),(2000,4),(2000,10),(2000,20),(3000,4),(3000,10),(3000,20)]

# Generate BA graphs
for x in BA_info:
    num_nodes, avg_degree = x[0], x[1]
    ba_graph = nx.barabasi_albert_graph(n=num_nodes, m=avg_degree)
    # Save as edge list
    nx.write_edgelist(ba_graph, f"./datasets/BA/ba_edgelist_{num_nodes}_{avg_degree}.edges", data=False)
# nx.draw(ba_graph, with_labels=False, node_color='lightblue', edge_color='gray', node_size=5, font_size=8)
# plt.show()


## Load all network graphs and show info

In [72]:
import os
import networkx as nx
import pandas as pd

directory = "./datasets/"
graph_info = []

for dirpath, _, files in os.walk(directory):
    for filename in files:
        try:
            if filename.endswith(".edges"):
                file_path = os.path.join(dirpath, filename)  # Use dirpath to get the full file path
                graph = nx.read_edgelist(file_path, comments="%", nodetype=int)
                # is_directed = isinstance(graph, nx.DiGraph)
                num_nodes = graph.number_of_nodes()
                num_edges = graph.number_of_edges()
                degrees = [deg for node, deg in graph.degree()]
                avg_degree = 2 * num_edges / num_nodes if num_nodes > 0 else 0
                max_degree = max(degrees) if degrees else 0
                min_degree = min(degrees) if degrees else 0
                avg_clustering_coefficient = nx.average_clustering(graph) if num_nodes > 0 else 0
                num_unconnected_subgraphs = nx.number_connected_components(graph)
                largest_component = max(nx.connected_components(graph), key=len, default=set())
                subgraph_largest_component = graph.subgraph(largest_component)
                portion_nodes_largest_component = ((len(largest_component) / num_nodes)*100) if num_nodes > 0 else 0
                portion_edges_largest_component = ((subgraph_largest_component.number_of_edges())*100) / num_edges if num_edges > 0 else 0
                density = nx.density(graph) if num_nodes > 0 else 0

                graph_info.append({
                    "graph G": os.path.splitext(filename)[0],
                    # "Is Directed": is_directed,
                    "#Nodes n": num_nodes,
                    "#Edges e": num_edges,
                    "Avg Degree <e>": avg_degree,
                    "Max Degree e_max": max_degree,
                    "Min Degree e_min": min_degree,
                    "Avg Clustering Coefficient c": avg_clustering_coefficient,
                    "#subgraphs s": num_unconnected_subgraphs,
                    "Portion Nodes in Largest Component Gn%": portion_nodes_largest_component,
                    "Portion Edges in Largest Component Ge%": portion_edges_largest_component,
                    "Density d": density
                })
        except Exception as e: 
            print(e, f'{filename}')

df = pd.DataFrame(graph_info)
# print(df)
df.to_excel("graph_info.xlsx", index=False)
