- **Interactive Network Graph Visualization**: This code brings network data to life through an interactive visualization that's both informative and easy to navigate. Here’s what it offers:

  - **Visual Impact**: Edges and nodes are scaled to reflect their capacities—thicker edges for larger channels and bigger nodes for higher capacities. This visual cue helps users quickly grasp the relative importance of different parts of the network.

  - **Details on Demand**: Just hover over any part of the graph to see detailed statistics about nodes and channels. This on-the-spot data delivery keeps the interface clean while providing all the details you need.

  - **Engaging User Experience**: Dive deep into the network structure through intuitive interactions like zooming, panning, and clicking on elements for more specifics. It’s designed to make the exploration of complex data not only possible but also enjoyable.




In [4]:
import csv
import igraph as ig
import leidenalg as la
import pickle
import colorsys
import random
import math
import numpy as np

import pandas as pd


from pyvis.network import Network

In [None]:
current_date = datetime.datetime.today().strftime('%Y%m%d')


In [6]:
nodes = pd.read_parquet('nodes_chap2.parquet')
edges = pd.read_parquet('edges_chap2.parquet')

In [60]:
print(nodes.columns)
print(edges.columns)
# print(edges.head(1))
print(nodes.dtypes)

Index(['last_update', 'pub_key', 'alias', 'addresses', 'color', 'features',
       'custom_records', 'node_key', 'source', 'Total_Channels',
       'Category_Counts', 'Total_Capacity', 'Formatted_Total_Capacity',
       'Avg_Channel_Size', 'Median_Channel_Size', 'Mode_Channel_Size',
       'Min_Channel_Size', 'Max_Channel_Size', 'Capacity_Rank',
       'Channel_Count_Rank', 'Capacity_Percentile', 'log_chnlcnt'],
      dtype='object')
Index(['channel_id', 'chan_point', 'last_update', 'node1_pub', 'node2_pub',
       'capacity', 'node1_policy', 'node2_policy', 'custom_records',
       'source_key', 'target_key', 'source', 'target', 'Formatted_Capacity',
       'Channel_Size_Tier', 'Channel_Size_Range', 'log_capacity'],
      dtype='object')
last_update                   int64
pub_key                      object
alias                        object
addresses                    object
color                        object
features                     object
custom_records               object

In [34]:
edges['log_capacity'] = np.log(edges['capacity'] + 1)  # Adding 1 to avoid log(0) which is undefined
nodes['log_chnlcnt'] = np.log(nodes['Total_Channels'] + 1)  

In [68]:

def create_graph_from_df(edges_df, nodes_df):
    """
    Create an igraph graph from edge and node dataframes.

    Parameters:
    - edges_df: DataFrame containing the edge data.
    - node_attributes_df: DataFrame containing the node attributes.
    - filter_condition: Optional function to filter edges_df based on a condition.

    Returns:
    - An igraph Graph object with nodes and edges added and attributes set.
    """
    edges_df = edges_df.copy()
    nodes_df = nodes_df.copy()
    
    # Initialize a graph
    g = ig.Graph()

    # Add vertices
    node_ids = pd.concat([edges_df['source'], edges_df['target']]).unique().astype(str)
    g.add_vertices(node_ids)

    # Add edges with attributes
    for _, row in edges_df.iterrows():
        g.add_edge(str(row['source']), str(row['target']),
                   Channel_Size_Tier=row['Channel_Size_Tier'],
                   capacity=row['capacity'],
                   log_capacity=row['log_capacity'],
                   Formatted_Capacity=row['Formatted_Capacity'])

    # Set node attributes from the node_attributes_df
    nodes_df['node_key'] = nodes_df['node_key'].astype(str)

        
    for node in g.vs:
        attrs = nodes_df[nodes_df['node_key'] == node['name']]
        if not attrs.empty:
            node['alias'] = attrs['alias'].values[0]
            node['Total_Channels'] = attrs['Total_Channels'].values[0]
            node['log_chnlcnt'] = attrs['log_chnlcnt'].values[0]
            node['Category_Counts'] = attrs['Category_Counts'].values[0]
            node['Total_Capacity'] = attrs['Total_Capacity'].values[0]
            node['Formatted_Total_Capacity'] = attrs['Formatted_Total_Capacity'].values[0]
            node['Capacity_Rank'] = attrs['Capacity_Rank'].values[0]
            node['Channel_Count_Rank'] = attrs['Channel_Count_Rank'].values[0]
            node['Capacity_Percentile'] = attrs['Capacity_Percentile'].values[0]
            node['pub_key'] = attrs['pub_key'].values[0]
        else:
            # Setting default values if node details are missing
            node['alias'] = "Unknown"
            node['Total_Channels'] = 0
            node['log_chnlcnt'] = 1
            node['Category_Counts'] = "Unknown"
            node['Total_Capacity'] = 0
            node['Formatted_Total_Capacity'] = "0 sats"
            node['Capacity_Rank'] = "Unknown"
            node['Channel_Count_Rank'] = "Unknown"
            node['Capacity_Percentile'] = "Unknown"
            node['pub_key'] = "Unknown"

    return g


In [72]:
# Creating a graph with edges where the Channel_Size_Tier is 'Freeway'
g = create_graph_from_df(edges_df=edges[edges['Channel_Size_Tier'] == 'Freeway'], nodes_df=nodes)

# Creating a graph with edges where the capacity is greater than 1,000,000
# and only including nodes that have more than 4 channels


gall = create_graph_from_df(edges_df=edges[edges['capacity'] > 3000000], 
                            nodes_df=nodes[(nodes['Total_Channels'] > 4) & (nodes['Total_Capacity'] > 5000000)])

# gallnoviz = create_graph_from_df(edges_df=edges,  nodes_df=nodes)


In [94]:
print(f"Number of vertices: {g.vcount()}")
print(f"Number of edges: {g.ecount()}")


print(f"Number of vertices: {gall.vcount()}")
print(f"Number of edges: {gall.ecount()}")

print(f"Number of vertices: {gallnoviz.vcount()}")
print(f"Number of edges: {gallnoviz.ecount()}")

# 3733632067
# node_attributes[node_attributes['Source'] == 3733632067].head(1)

# fedges_df[(fedges_df['Source'] == 2701564267) | (fedges_df['Target'] == 2701564267)].head(3)
# fedges_df.head(3)


Number of vertices: 116
Number of edges: 606
Number of vertices: 3558
Number of edges: 21617
Number of vertices: 13029
Number of edges: 50340


In [54]:

def add_community_attribute(graph):
    """
    Detects communities in the given igraph graph using the Leiden algorithm
    and adds the community membership as a node attribute.

    Parameters:
    - graph: An igraph Graph object.

    Returns:
    - None; modifies the graph object in place by adding a 'community' node attribute.
    """
    # Detect communities using the Leiden algorithm
    partition = la.find_partition(graph, la.ModularityVertexPartition)
    
    # Add community memberships as a node attribute
    graph.vs['community'] = partition.membership

    return graph


g = add_community_attribute(g)
gall = add_community_attribute(gall)
# gallnoviz = add_community_attribute(gallnoviz)

In [56]:
print("Vertex attributes:", g.vertex_attributes())

print("Attributes for the first two nodes:")
for v in g.vs[:1]:  # Adjust slice for more or fewer nodes
    print(f"Node {v.index} attributes:")
    for attribute in g.vertex_attributes():
        print(f"  {attribute}: {v[attribute]}")
    print()  # Adds a newline for better readability
    
print("Edge attributes:", g.edge_attributes())

print("Attributes for the first two edges:")
for e in g.es[:1]:  # Adjust slice for more or fewer edges
    print(f"Edge from {e.source} to {e.target} attributes:")
    for attribute in g.edge_attributes():
        print(f"  {attribute}: {e[attribute]}")
    print()  # Adds a newline for better readability

Vertex attributes: ['name', 'alias', 'Total_Channels', 'log_chnlcnt', 'Category_Counts', 'Total_Capacity', 'Formatted_Total_Capacity', 'Capacity_Rank', 'Channel_Count_Rank', 'Capacity_Percentile', 'pub_key', 'community']
Attributes for the first two nodes:
Node 0 attributes:
  name: 1625321142
  alias: Bitrefill Routing
  Total_Channels: 516
  log_chnlcnt: 6.248042874508429
  Category_Counts: {'Freeway': 3, 'Highway': 97, 'My Way': 416}
  Total_Capacity: 3088719038.0
  Formatted_Total_Capacity: 30.9 bitcoin
  Capacity_Rank: 53.0
  Channel_Count_Rank: 16.0
  Capacity_Percentile: Top 0.5%
  pub_key: 030c3f19d742ca294a55c00376b3b355c3c90d61c6b6b39554dbc7ac19b141c14f
  community: 0

Edge attributes: ['Channel_Size_Tier', 'capacity', 'log_capacity', 'Formatted_Capacity']
Attributes for the first two edges:
Edge from 0 to 12 attributes:
  Channel_Size_Tier: Freeway
  capacity: 200000000
  log_capacity: 19.11382792951231
  Formatted_Capacity: 2.0 bitcoin



In [58]:
def convert_igraph_to_pyvis(igraph_graph):
    net = Network(notebook=False, height="1200px", width="100%", heading='')

    # Add nodes with additional attributes and community-based coloring
    for node in igraph_graph.vs:
        # Construct tooltip text for node
        node_tooltip = (
            f"Alias: {node['alias']}\n"
            f"Chnl Size Dist: {node['Category_Counts']}\n"
            f"Total Node Capacity: {node['Formatted_Total_Capacity']}\n"
            f"Active Channel Count: {node['Total_Channels']}\n"
            f"Capacity Rank: {node['Capacity_Rank']}\n"
            f"Capacity Percentile: {node['Capacity_Percentile']}\n"
            f"Channel_Count_Rank: {node['Channel_Count_Rank']}\n"
            f"Public Key: {node['pub_key']}"
           
        )
        # Adjust size for visibility, size based on log channel count for demonstration
        net.add_node(
            node.index, label=node['alias'], size=node['log_chnlcnt']*9,
            title=node_tooltip, group=node['community']
        )

    # Add edges with weights and tooltips
    for edge in igraph_graph.es:
        # Construct tooltip text for edge
        edge_tooltip = (
            f"Channel Size: {edge['Formatted_Capacity']}\n"
            f"Chnl Size Category: {edge['Channel_Size_Tier']}"
            
        )

        
        net.add_edge(
            edge.source, edge.target, value=edge['log_capacity'],
            title=edge_tooltip
        )

    return net



g_viz = convert_igraph_to_pyvis(g)

# Set physics for better layout in Pyvis
g_viz.set_options("""
{
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -8000,
      "centralGravity": 0.3,
      "springLength": 100,
      "springConstant": 0.01,
      "damping": 0.09,
      "avoidOverlap": 0.1
    },
    "minVelocity": 0.75,
    "solver": "barnesHut"
  }
}
""")

g_viz.save_graph(f"Freewayjuly23.html")


In [86]:
g_all = convert_igraph_to_pyvis(gall)
g_all.set_options("""
{
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -8000,
      "centralGravity": 0.3,
      "springLength": 100,
      "springConstant": 0.01,
      "damping": 0.09,
      "avoidOverlap": 0.1
    },
    "minVelocity": 0.75,
    "solver": "barnesHut"
  }
}
""")

g_all.save_graph(f"ChnlGt3MNode4chnl5Msatsjuly23.html")

In [92]:
g.save("Freewayjuly23.graphml", format="graphml")
gall.save("ChnlGt3MNode4chnl5Msatsjuly23.graphml", format="graphml")
gallnoviz.save("graphall.graphml", format="graphml")


In [None]:
filtered_df = fedges_df[
    (fedges_df['Channel_Size_number'] > 1000000) &
    (fedges_df['capacity_source'] > 10000000) &
    (fedges_df['capacity_target'] > 10000000) &
    (fedges_df['chnl_cap_ctg'] == 'Freeway')
]

# Create a set of valid vertex names (IDs) for quick lookup
valid_vertices = set(g.vs['name'])

edge_ids = []
for source, target in zip(filtered_df['Source'].astype(str), filtered_df['Target'].astype(str)):
    if source in valid_vertices and target in valid_vertices:
        if g.are_adjacent(source, target):
            edge_ids.append(g.get_eid(source, target))

# Create the subgraph using these edge IDs
subgraph = g.subgraph_edges(edge_ids, delete_vertices=False)