In [1]:
import networkx as nx
import numpy as np
import matlab.engine

# Helper function to load and analyze a graph
def analyze_graph(file_path, network_name):
    # Step 1: Load the undirected graph using networkx
    G_nx = nx.read_edgelist(file_path, create_using=nx.Graph(), nodetype=int)

    # Step 2: Check the basic properties of the graph
    num_nodes = G_nx.number_of_nodes()
    num_edges = G_nx.number_of_edges()
    
    # Degree statistics
    degrees = np.array([deg for (node, deg) in G_nx.degree()])
    max_degree = np.max(degrees)
    min_degree = np.min(degrees)
    avg_degree = np.mean(degrees)

    # Degree distribution type (simplified: scale-free or unknown)
    if np.max(degrees) > 10 * np.mean(degrees):  # rough heuristic for scale-free
        degree_dist_type = 'Scale-free'
    else:
        degree_dist_type = 'Unknown'

    # Step 3: Check for connectivity (undirected graph)
    is_connected = nx.is_connected(G_nx)
    if not is_connected:
        diameter = 'N/A - Graph not connected'
    else:
        diameter = 'Good'  # Diameter only makes sense for connected graphs
        

    # Print out the required information
    print(f"--- {network_name} ---")
    print(f"n (Number of nodes): {num_nodes}")
    print(f"m (Number of edges): {num_edges}")
    print(f"min(d) (Minimum degree): {min_degree}")
    print(f"max(d) (Maximum degree): {max_degree}")
    print(f"avg(d) (Average degree): {avg_degree:.2f}")
    print(f"Degree distribution type: {degree_dist_type}")
    print(f"Diameter: {diameter}")
    print()

    # Step 4: Compute the Local Clustering Coefficient (LCC) for each node
    lcc_dict = nx.clustering(G_nx)
    lcc_values = np.array(list(lcc_dict.values()))  # Convert to numpy array

    return degrees, lcc_values

# Helper function to plot distributions using MATLAB
def plot_with_matlab(eng, degree, lcc_values, network_name):
    nbins = 50
    
    # Plot Degree Distribution
    eng.workspace['d'] = degree
    figID = 1
    eng.eval(f"plot_distribution(d, '{network_name} Degree Distribution', {nbins}, {figID})", nargout=0)
    
    # Plot LCC Distribution
    eng.workspace['lcc'] = lcc_values
    figID = 2
    eng.eval(f"plot_distribution(lcc, '{network_name} LCC Distribution', {nbins}, {figID})", nargout=0)


In [2]:

eng = matlab.engine.start_matlab()
eng.eval("addpath(genpath('Mcodes'))", nargout=0)

file_path_amazon = 'data/com-amazon.ungraph/com-amazon.ungraph.txt'
degree_amazon, lcc_amazon = analyze_graph(file_path_amazon, "Amazon")
plot_with_matlab(eng, degree_amazon, lcc_amazon, "Amazon")

--- Amazon ---
n (Number of nodes): 334863
m (Number of edges): 925872
min(d) (Minimum degree): 1
max(d) (Maximum degree): 549
avg(d) (Average degree): 5.53
Degree distribution type: Scale-free
Diameter: Good



In [3]:

file_path_facebook = 'data/facebook/facebook_combined.txt'
degree_facebook, lcc_facebook = analyze_graph(file_path_facebook, "Facebook")
plot_with_matlab(eng, degree_facebook, lcc_facebook, "Facebook")

--- Facebook ---
n (Number of nodes): 4039
m (Number of edges): 88234
min(d) (Minimum degree): 1
max(d) (Maximum degree): 1045
avg(d) (Average degree): 43.69
Degree distribution type: Scale-free
Diameter: Good



In [4]:

file_path_dblp = 'data/COM-DBLP/com-dblp.ungraph.txt'
degree_dblp, lcc_dblp = analyze_graph(file_path_dblp, "DBLP")
plot_with_matlab(eng, degree_dblp, lcc_dblp, "DBLP")



--- DBLP ---
n (Number of nodes): 317080
m (Number of edges): 1049866
min(d) (Minimum degree): 1
max(d) (Maximum degree): 343
avg(d) (Average degree): 6.62
Degree distribution type: Scale-free
Diameter: Good



In [5]:
eng.exit()

In [5]:

# Path to the undirected graph file

# CA-CondMat not connected
# Cit-HepPh not connected
# cora
file_path = 'data/com-amazon.ungraph/com-amazon.ungraph.txt'

# Step 1: Load the undirected graph using networkx
G_nx = nx.read_edgelist(file_path, create_using=nx.Graph(), nodetype=int)

# Step 2: Check the basic properties of the graph
num_nodes = G_nx.number_of_nodes()
num_edges = G_nx.number_of_edges()
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

# Step 3: Check for connectivity (undirected graph)
# Check if the graph is connected
is_connected = nx.is_connected(G_nx)
print(f"Is the graph connected? {is_connected}")

# If the graph is not connected, you can also find the number of connected components
if not is_connected:
    num_components = nx.number_connected_components(G_nx)
    print(f"Number of connected components: {num_components}")

# Step 4: Compute the Local Clustering Coefficient (LCC) for each node
lcc_dict = nx.clustering(G_nx)  # returns a dictionary {node: lcc_value}
lcc_values = np.array(list(lcc_dict.values()))  # Convert to numpy array

# degree vector 
degree = np.array([deg for (node, deg) in G_nx.degree()])

Number of nodes: 334863
Number of edges: 925872
Is the graph connected? True


In [6]:
eng = matlab.engine.start_matlab()
eng.eval("addpath(genpath('Mcodes'))", nargout=0)  # Recursively adds all subdirectories

# Assign Python variables to MATLAB workspace
eng.workspace['d'] = degree
nbins = 50
figID = 1

# Corrected eval statement
eng.eval(f"plot_distribution(d, 'com-amazon Degree Distribution', {nbins}, {figID})", nargout=0)


In [7]:
# Assign Python variables to MATLAB workspace
eng.workspace['lcc'] = lcc_values  # Local Clustering Coefficient vector
figID = 2
# Step 6: Call the MATLAB plotting function for LCC distribution
eng.eval(f"plot_distribution(lcc, 'com-amazon LCC Distribution', {nbins}, {figID})", nargout=0)


In [8]:
# Path to the undirected graph file
file_path = 'data/facebook/facebook_combined.txt'

# Step 1: Load the undirected graph using networkx
G_nx = nx.read_edgelist(file_path, create_using=nx.Graph(), nodetype=int)

# Step 2: Check the basic properties of the graph
num_nodes = G_nx.number_of_nodes()
num_edges = G_nx.number_of_edges()
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

# Step 3: Check for connectivity (undirected graph)
# Check if the graph is connected
is_connected = nx.is_connected(G_nx)
print(f"Is the graph connected? {is_connected}")

# If the graph is not connected, you can also find the number of connected components
if not is_connected:
    num_components = nx.number_connected_components(G_nx)
    print(f"Number of connected components: {num_components}")

# Step 4: Compute the Local Clustering Coefficient (LCC) for each node
lcc_dict = nx.clustering(G_nx)  # returns a dictionary {node: lcc_value}
lcc_values = np.array(list(lcc_dict.values()))  # Convert to numpy array

# degree vector 
degree = np.array([deg for (node, deg) in G_nx.degree()])

Number of nodes: 4039
Number of edges: 88234
Is the graph connected? True


In [9]:
eng.eval("clear all", nargout=0)

# Assign Python variables to MATLAB workspace
eng.workspace['d'] = degree
nbins = 50
figID = 1

# Corrected eval statement
eng.eval(f"plot_distribution(d, 'facebook Degree Distribution', {nbins}, {figID})", nargout=0)



In [10]:
# Assign Python variables to MATLAB workspace
eng.workspace['lcc'] = lcc_values  # Local Clustering Coefficient vector
figID = 2
# Step 6: Call the MATLAB plotting function for LCC distribution
eng.eval(f"plot_distribution(lcc, 'facebook LCC Distribution', {nbins}, {figID})", nargout=0)


In [11]:

# Path to the undirected graph file

# CA-CondMat not connected
# Cit-HepPh not connected
# cora
file_path = 'data/COM-DBLP/com-dblp.ungraph.txt'

# Step 1: Load the undirected graph using networkx
G_nx = nx.read_edgelist(file_path, create_using=nx.Graph(), nodetype=int)

# Step 2: Check the basic properties of the graph
num_nodes = G_nx.number_of_nodes()
num_edges = G_nx.number_of_edges()
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

# Step 3: Check for connectivity (undirected graph)
# Check if the graph is connected
is_connected = nx.is_connected(G_nx)
print(f"Is the graph connected? {is_connected}")

# If the graph is not connected, you can also find the number of connected components
if not is_connected:
    num_components = nx.number_connected_components(G_nx)
    print(f"Number of connected components: {num_components}")

# Step 4: Compute the Local Clustering Coefficient (LCC) for each node
lcc_dict = nx.clustering(G_nx)  # returns a dictionary {node: lcc_value}
lcc_values = np.array(list(lcc_dict.values()))  # Convert to numpy array

# degree vector 
degree = np.array([deg for (node, deg) in G_nx.degree()])

Number of nodes: 317080
Number of edges: 1049866
Is the graph connected? True


In [12]:
eng.eval("clear all", nargout=0)

# Assign Python variables to MATLAB workspace
eng.workspace['d'] = degree
nbins = 50
figID = 1

# Corrected eval statement
eng.eval(f"plot_distribution(d, 'com-dblp Degree Distribution', {nbins}, {figID})", nargout=0)



In [13]:
# Assign Python variables to MATLAB workspace
eng.workspace['lcc'] = lcc_values  # Local Clustering Coefficient vector
figID = 2
# Step 6: Call the MATLAB plotting function for LCC distribution
eng.eval(f"plot_distribution(lcc, 'com-dblp LCC Distribution', {nbins}, {figID})", nargout=0)


In [14]:

eng.exit()