In [1]:
import networkx as nx

In [2]:
# Load the network from the GraphML file
G = nx.read_graphml('bioneer2.graphml')
# Filter the graph to only include channels with subscriberCount between 50000 and 8318210
G = G.subgraph([node for node in G.nodes() if 50000 <= G.nodes[node]['subscribercount'] <= 8318210])

In [3]:
# Function to calculate homophily based on a given attribute
def calculate_homophily(graph, attribute):
    same_attr_edges = 0
    total_edges = 0

    for node in graph:
        node_attr_value = graph.nodes[node].get(attribute, None)
        if node_attr_value is not None:
            for neighbor in graph.neighbors(node):
                neighbor_attr_value = graph.nodes[neighbor].get(attribute, None)
                if neighbor_attr_value is not None:
                    total_edges += 1
                    if node_attr_value == neighbor_attr_value:
                        same_attr_edges += 1

    homophily_ratio = same_attr_edges / total_edges if total_edges > 0 else None
    return homophily_ratio


# Calculate homophily for 'country'
country_homophily = calculate_homophily(G, 'country')

# Calculate homophily for 'Modularity Class'
modularity_class_homophily = calculate_homophily(G, 'Modularity Class')

country_homophily, modularity_class_homophily

(0.28642384105960267, 0.6125827814569537)

In [4]:
calculate_homophily(G,'subscribercount')

0.0

In [15]:
def calculate_e_i_index_by_subscriber_count(G, threshold):
    internal_edges = 0
    external_edges = 0

    for u, v in G.edges():
        # Check if 'subscribercount' attribute exists for both nodes
        if 'subscribercount' in G.nodes[u] and 'subscribercount' in G.nodes[v]:
            # Determine if both nodes are above or below the threshold
            u_above_threshold = G.nodes[u]['subscribercount'] >= threshold
            v_above_threshold = G.nodes[v]['subscribercount'] >= threshold

            if u_above_threshold == v_above_threshold:
                internal_edges += 1  # Both nodes are either above or below the threshold
            else:
                external_edges += 1  # One node is above the threshold and the other is below

    total_edges = internal_edges + external_edges
    if total_edges > 0:
        e_i_index = (external_edges - internal_edges) / total_edges
    else:
        e_i_index = None  # E-I Index is not defined for graphs without edges

    return e_i_index


# Define a subscriber count threshold
subscriber_threshold = 3000000  # example threshold

# Calculate the E-I Index
e_i_index = calculate_e_i_index_by_subscriber_count(G, subscriber_threshold)
print("E-I Index:", e_i_index)


E-I Index: -0.38741721854304634


In [4]:
def calculate_homophily_by_country(G):
    internal_edges = 0
    total_edges = 0

    for u, v in G.edges():
        # Check if the 'country' attribute exists for both nodes
        if 'country' in G.nodes[u] and 'country' in G.nodes[v]:
            total_edges += 1  # Count every edge
            if G.nodes[u]['country'] == G.nodes[v]['country']:
                internal_edges += 1  # Count internal edge

    # Calculating the proportion of internal connections
    if total_edges > 0:
        homophily_index = internal_edges / total_edges
    else:
        homophily_index = None  # Homophily is not defined for graphs without edges

    return homophily_index
# Calculate the homophily for the country attribute
country_homophily = calculate_homophily_by_country(G)
print("Homophily based on country:", country_homophily)

Homophily based on country: 0.28642384105960267


In [5]:
def calculate_homophily_by_modularity_class(G):
    internal_edges = 0
    total_edges = 0

    for u, v in G.edges():
        # Check if the 'Modularity Class' attribute exists for both nodes
        if 'Modularity Class' in G.nodes[u] and 'Modularity Class' in G.nodes[v]:
            total_edges += 1  # Count every edge
            if G.nodes[u]['Modularity Class'] == G.nodes[v]['Modularity Class']:
                internal_edges += 1  # Count internal edge

    # Calculating the proportion of internal connections
    if total_edges > 0:
        homophily_index = internal_edges / total_edges
    else:
        homophily_index = None  # Homophily is not defined for graphs without edges

    return homophily_index

# Calculate the homophily for the Modularity Class attribute
modularity_class_homophily = calculate_homophily_by_modularity_class(G.to_directed())
print("Homophily based on Modularity Class:", modularity_class_homophily)

Homophily based on Modularity Class: 0.6125827814569537


In [6]:
# Get the graph density
density = nx.density(G)
density

0.012422871246400659