In [16]:
# Install required dependencies
!pip install networkx matplotlib pandas numpy scipy


Collecting scipy
  Downloading scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Downloading scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl (38.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.9/38.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: scipy
Successfully installed scipy-1.12.0


In [13]:
# Cell 2: Define Analysis Functions

# Import required libraries
import networkx as nx
import pandas as pd
import numpy as np
import os

# Function to calculate network descriptors
def calculate_network_descriptors(G):
    # Convert multigraph to a simple graph by combining parallel edges
    if isinstance(G, nx.MultiGraph):
        G = nx.Graph(G)
    
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    degrees = np.array([degree for node, degree in G.degree()])
    min_degree = degrees.min()
    max_degree = degrees.max()
    avg_degree = degrees.mean()
    avg_clustering = nx.average_clustering(G)
    assortativity = nx.degree_assortativity_coefficient(G)
    
    # Check if the graph is connected before calculating average path length and diameter
    if nx.is_connected(G):
        avg_path_length = nx.average_shortest_path_length(G)
        diameter = nx.diameter(G)
    else:
        avg_path_length = None  # Set to None if the graph is not connected
        diameter = None
    
    return {
        'Number of Nodes': num_nodes,
        'Number of Edges': num_edges,
        'Minimum Degree': min_degree,
        'Maximum Degree': max_degree,
        'Average Degree': round(avg_degree, 4),
        'Average Clustering Coefficient': round(avg_clustering, 4),
        'Assortativity': round(assortativity, 4),
        'Average Path Length': round(avg_path_length, 4) if avg_path_length is not None else None,
        'Diameter': diameter
    }

# Function to read the networks, calculate descriptors, and save the results
def analyze_networks(project_path, results_dir):
    # Create the results directory if it doesn't exist
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    network_descriptors = []

    for root, dirs, files in os.walk(project_path):
        for file in files:
            if file.endswith('.net'):
                network_path = os.path.join(root, file)
                G = nx.read_pajek(network_path)
                G = G.to_undirected()
                G.remove_edges_from(nx.selfloop_edges(G))

                # Calculate network descriptors
                descriptors = calculate_network_descriptors(G)
                descriptors['Network'] = os.path.relpath(network_path, project_path)
                network_descriptors.append(descriptors)

    # Convert the results to a DataFrame and save as a CSV file
    descriptors_df = pd.DataFrame(network_descriptors)
    descriptors_df.to_csv(os.path.join(results_dir, 'network_descriptors.csv'), index=False)

    print(f'Network descriptors have been saved in {results_dir}')

In [14]:
# Cell 3: Set Paths and Run Analysis

# Path to the local project directory
project_path = '/Users/mwr/Downloads/CN_A1_RACCA'

# Directory to save the results
results_dir = '/Users/mwr/Downloads/CN_A1_RACCA_Results'

# Perform the network analysis
main(project_path, results_dir)


Network descriptors and histograms have been saved in /Users/mwr/Downloads/CN_A1_RACCA_Results


In [18]:
# Cell 4: task 2 working with airports_UW.net

# Load the network
network_path = '/Users/mwr/Downloads/CN_A1_RACCA/real/airports_UW.net'
# Read the network
G = nx.read_pajek(network_path)
# Convert the multigraph to a simple graph
G = nx.Graph(G)

# Initialize dictionaries to store the descriptors
# The keys will be the airport codes and the values will be the descriptors
# We'll use the degree, strength, clustering coefficient, betweenness, eigenvector centrality, and PageRank
degree_dict = dict(G.degree())
strength_dict = dict(G.degree(weight='weight'))
clustering_dict = nx.clustering(G)
betweenness_dict = nx.betweenness_centrality(G)
eigenvector_dict = nx.eigenvector_centrality(G)
pagerank_dict = nx.pagerank(G)

# For average and maximum path length, we need to consider each node
avg_path_length_dict = {}
max_path_length_dict = {}

for node in G.nodes:
    # Calculate shortest paths from this node to all others
    path_lengths = nx.single_source_shortest_path_length(G, node)
    # Calculate the average and maximum path lengths
    avg_path_length_dict[node] = sum(path_lengths.values()) / len(path_lengths)
    max_path_length_dict[node] = max(path_lengths.values())

# Create a DataFrame from the dictionaries
nodes_df = pd.DataFrame({
    'Airport': G.nodes(),
    'Degree': pd.Series(degree_dict),
    'Strength': pd.Series(strength_dict),
    'Clustering Coefficient': pd.Series(clustering_dict),
    'Average Path Length': pd.Series(avg_path_length_dict),
    'Maximum Path Length': pd.Series(max_path_length_dict),
    'Betweenness': pd.Series(betweenness_dict),
    'Eigenvector Centrality': pd.Series(eigenvector_dict),
    'PageRank': pd.Series(pagerank_dict)
})

# Format the DataFrame to use 8 decimal places for non-integer descriptors
nodes_df['Clustering Coefficient'] = nodes_df['Clustering Coefficient'].map(lambda x: f'{x:.8f}')
nodes_df['Average Path Length'] = nodes_df['Average Path Length'].map(lambda x: f'{x:.8f}')
nodes_df['Betweenness'] = nodes_df['Betweenness'].map(lambda x: f'{x:.8f}')
nodes_df['Eigenvector Centrality'] = nodes_df['Eigenvector Centrality'].map(lambda x: f'{x:.8f}')
nodes_df['PageRank'] = nodes_df['PageRank'].map(lambda x: f'{x:.8f}')

# Save the DataFrame to a CSV file
nodes_df.to_csv('/Users/mwr/Downloads/CN_A1_RACCA_Results/airports_node_descriptors.csv', index=False)

# Display the DataFrame for the specified airports
specific_airports = ['PAR', 'LON', 'FRA', 'AMS', 'CHI', 'NYC', 'ATL', 'HOU', 
                     'BCN', 'WAW', 'CHC', 'DJE', 'ADA', 'AGU', 'TBO', 'ZVA']

# Filter the DataFrame to include only the specified airports
specific_nodes_df = nodes_df[nodes_df['Airport'].isin(specific_airports)]


# Save the filtered DataFrame to a CSV file
specific_nodes_df.to_csv('/Users/mwr/Downloads/CN_A1_RACCA_Results/specific_airports_node_descriptors.csv', index=False)


In [20]:
# Display the filtered DataFrame
specific_nodes_df

Unnamed: 0,Airport,Degree,Strength,Clustering Coefficient,Average Path Length,Maximum Path Length,Betweenness,Eigenvector Centrality,PageRank
ADA,ADA,7,10704.0,0.71428571,3.63239359,11,1.317e-05,0.01068776,0.00020485
AGU,AGU,7,7678.0,0.76190476,3.6644555,11,5.76e-06,0.00513412,0.00011933
AMS,AMS,192,481335.0,0.14283377,2.73134328,10,0.04049213,0.1714516,0.00538375
ATL,ATL,172,1129605.0,0.1378349,2.91542289,11,0.02489618,0.12207139,0.00860287
BCN,BCN,80,289105.0,0.32848101,3.27307905,11,0.0019323,0.0891462,0.0028165
CHC,CHC,20,64158.5,0.25263158,3.56522941,10,0.00336745,0.00418781,0.00161537
CHI,CHI,184,1329505.0,0.13417676,2.80790492,11,0.0444435,0.13810203,0.01017965
DJE,DJE,20,10198.5,0.7,3.57822001,11,0.00014586,0.03183126,0.00018064
FRA,FRA,237,697513.5,0.11696346,2.68214483,10,0.06557771,0.19554603,0.0077043
HOU,HOU,144,654154.5,0.16336441,2.98313986,11,0.01745704,0.09628632,0.00516273
