# Elliptic Bitcoin Transaction Network Analysis

In [4]:
# Utility imports
import numpy as np
import pandas as pd
# Graph theory imports
import networkx as nx
# Visualization imports
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load datasets
class_df = pd.read_csv("elliptic_txs_classes.csv")
edge_df = pd.read_csv("elliptic_txs_edgelist.csv")
feature_df = pd.read_csv("elliptic_txs_features.csv", header=None)

# Preprocess data
feature_df.columns = ['txID'] + [f'feature_{i}' for i in range(1, len(feature_df.columns))]
class_df.columns = ['txID', 'class']

# Convert txID to numeric
class_df['txID'] = pd.to_numeric(class_df['txID'])
feature_df['txID'] = pd.to_numeric(feature_df['txID'])

# Merge class and feature DataFrames
merged_df = pd.merge(class_df, feature_df, on='txID')
merged_df['class'] = merged_df['class'].astype('category')
merged_df = merged_df.sample(frac=1, random_state=42).reset_index(drop=True)

def get_node_colors(G, merged_df):
    """Assign colors to nodes based on their class"""
    node_colors = {}
    for node in G.nodes():
        # Convert node to string to match txID type
        node_str = str(node)
        
        # Find the class for this node
        node_class_row = merged_df[merged_df['txID'] == int(node_str)]
        
        if not node_class_row.empty:
            node_class = node_class_row['class'].values[0]
            
            # Color mapping
            if node_class == '1':
                node_colors[node_str] = 'red'  # illicit
            elif node_class == '2':
                node_colors[node_str] = 'blue'  # licit
            else:
                node_colors[node_str] = 'gray'  # unknown
        else:
            node_colors[node_str] = 'gray'  # Node not found in class dataframe
    
    return node_colors

def visualize_network_with_classes(G, merged_df, title='Network Structure', max_nodes=1000):
    """Visualize network with nodes color-coded by class"""
    # Limit nodes if graph is too large
    if len(G.nodes()) > max_nodes:
        nodes = list(G.nodes())[:max_nodes]
        G = G.subgraph(nodes)
   
    # Get node colors with corrected mapping
    node_colors = get_node_colors(G, merged_df)
   
    # Prepare layout
    pos = nx.spring_layout(G, k=0.5, iterations=50)
    node_x = [pos[node][0] for node in G.nodes()]
    node_y = [pos[node][1] for node in G.nodes()]
   
    # Prepare edges
    edge_x, edge_y = [], []
    for edge in G.edges():
        source, target = edge
        x0, y0 = pos[source]
        x1, y1 = pos[target]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
   
    # Create visualization
    fig = go.Figure(data=[
        go.Scatter(x=edge_x, y=edge_y, mode='lines', line_shape='spline',
                   opacity=0.3, hoverinfo='none', name='Edges',
                   line=dict(color='black', width=0.5)),
        go.Scatter(x=node_x, y=node_y, mode='markers',
                   hoverinfo='text',
                   hovertext=[f"Node ID: {node}" for node in G.nodes()],
                   marker=dict(
                       size=6,
                       color=[node_colors.get(str(node), 'gray') for node in G.nodes()],
                       line=dict(width=1, color='black')
                   ),
                   name='Nodes')
    ])
    
    fig.update_layout(
        title=title,
        showlegend=True,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
   
    fig.show()

# Create full graph
G = nx.Graph()
G.add_edges_from(edge_df.values)

visualize_network_with_classes(G, merged_df, 'Network with Class Coloring')

In [5]:
def track_node_transactions(node_id, edge_df, class_df):
    # Convert node_id to string for consistent matching
    node_id = str(node_id)
    
    # Find all transactions involving this node
    transactions = edge_df[
        (edge_df['txId1'] == node_id) | (edge_df['txId2'] == node_id)
    ]
    
    # Prepare transaction details
    transaction_details = []
    for _, row in transactions.iterrows():
        # Determine the counterparty node
        counterparty = row['txId2'] if row['txId1'] == node_id else row['txId1']
        
        # Find class of counterparty
        counterparty_class = class_df[class_df['txID'] == int(counterparty)]['class'].values
        class_label = counterparty_class[0] if len(counterparty_class) > 0 else 'Unknown'
        
        # Classify transaction direction and class
        direction = 'Outgoing' if row['txId1'] == node_id else 'Incoming'
        
        transaction_details.append({
            'counterparty': counterparty,
            'counterparty_class': class_label,
            'direction': direction
        })
    
    return transaction_details


In [8]:
import networkx as nx
import random
import plotly.graph_objects as go
import numpy as np

def extract_directed_subgraph(G, start_node, max_hops=3):
    """
    Extract a subgraph from a starting node with a specified hop limit.
    
    Args:
        G (nx.Graph or nx.DiGraph): Input graph
        start_node (int/str): Starting node ID
        max_hops (int): Maximum number of hops from start node
    
    Returns:
        nx.Graph or nx.DiGraph: Extracted subgraph
    """
    # Create a new graph of the same type
    subgraph = G.__class__()
    
    # Add the start node to the subgraph
    subgraph.add_node(start_node)
    
    # Breadth-first search with hop tracking
    visited = {start_node: 0}
    queue = [(start_node, 0)]
    
    while queue:
        current_node, current_hop = queue.pop(0)
        
        # Stop if we've reached max hops
        if current_hop >= max_hops:
            continue
        
        # Get neighbors (works for both directed and undirected graphs)
        if hasattr(G, 'neighbors'):
            neighbors = list(G.neighbors(current_node))
        else:
            # For nx.DiGraph, combine successors and predecessors
            neighbors = list(G.successors(current_node)) + list(G.predecessors(current_node))
        
        # Add edges and new nodes to the subgraph
        for neighbor in neighbors:
            if neighbor not in visited or visited[neighbor] > current_hop + 1:
                subgraph.add_edge(current_node, neighbor)
                visited[neighbor] = current_hop + 1
                queue.append((neighbor, current_hop + 1))
    
    return subgraph

def visualize_directed_subgraph(G, start_node, merged_df, get_node_colors_func=None):
    """
    Visualize a subgraph with color-coded nodes and manual arrow edges.
    
    Args:
        G (nx.Graph or nx.DiGraph): Input graph
        start_node (int/str): Starting node ID
        merged_df (pd.DataFrame): DataFrame with node class information
        get_node_colors_func (callable, optional): Function to get node colors
    """
    # Extract subgraph
    subgraph = extract_directed_subgraph(G, start_node)
    
    # Get node colors
    if get_node_colors_func:
        node_colors = get_node_colors_func(subgraph, merged_df)
    else:
        # Default color mapping if no function provided
        node_colors = {str(node): 'gray' for node in subgraph.nodes()}
    
    # Layout
    pos = nx.spring_layout(subgraph, k=0.5, iterations=50)
    
    # Prepare edge traces
    edge_traces = []
    for edge in subgraph.edges():
        source, target = edge
        x0, y0 = pos[source]
        x1, y1 = pos[target]
        
        edge_trace = go.Scatter(
            x=[x0, x1, None],
            y=[y0, y1, None],
            mode='lines',
            line=dict(width=1, color='black'),
            hoverinfo='none'
        )
        edge_traces.append(edge_trace)
    
    # Node trace
    node_trace = go.Scatter(
        x=[pos[node][0] for node in subgraph.nodes()],
        y=[pos[node][1] for node in subgraph.nodes()],
        mode='markers+text',
        marker=dict(
            size=10,
            color=[node_colors.get(str(node), 'gray') for node in subgraph.nodes()],
            line=dict(width=1, color='black')
        ),
        text=[str(node) for node in subgraph.nodes()],
        textposition='top center'
    )
    
    # Combine all traces
    fig = go.Figure(data=edge_traces + [node_trace])
    
    fig.update_layout(
        title=f'Subgraph from Node {start_node}',
        showlegend=False,
        hovermode='closest',
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
    
    fig.show()

# Utility function to find a random class 1 node
def find_random_class_1_node(class_df):
    class_1_nodes = class_df[class_df['class'] == '1']['txID'].tolist()
    return random.choice(class_1_nodes)

# Example usage (uncomment and run)
random_class_1_node = find_random_class_1_node(class_df)
visualize_directed_subgraph(G, random_class_1_node, merged_df, get_node_colors)

In [6]:
import networkx as nx
import random
import plotly.graph_objects as go
import numpy as np

def extract_directed_subgraph(G, start_node, max_hops=3):
    """
    Extract a directed subgraph from a starting node including nodes that are within
    a given number of hops in either direction (incoming and outgoing).
    
    This is done by using the underlying undirected graph to compute all nodes within
    the hop limit, then extracting the corresponding subgraph from the original graph.
    
    Args:
        G (nx.DiGraph): Input directed graph.
        start_node (int/str): Starting node ID.
        max_hops (int): Maximum number of hops (in either direction) from the start node.
    
    Returns:
        nx.DiGraph: Extracted directed subgraph (with original edge directions preserved).
    """
    # Convert G to an undirected graph to find all nodes within max_hops in any direction.
    UG = G.to_undirected()
    nodes_within = nx.single_source_shortest_path_length(UG, start_node, cutoff=max_hops).keys()
    
    # Extract the subgraph from the original graph (so that edge directions are maintained)
    subgraph = G.subgraph(list(nodes_within)).copy()
    return subgraph

def visualize_directed_subgraph(G, start_node, merged_df, get_node_colors_func=None):
    """
    Visualize a directed subgraph with color-coded nodes and arrow edges.
    
    This version shows the full hop neighborhood (both incoming and outgoing)
    and draws arrows in the natural direction (from source to target) so you can see
    the hops out from your starting node.
    
    Args:
        G (nx.DiGraph): Input directed graph.
        start_node (int/str): Starting node ID.
        merged_df (pd.DataFrame): DataFrame with node class information.
        get_node_colors_func (callable, optional): A function to get node colors.
            It should accept the subgraph and merged_df and return a dict mapping
            node IDs (as strings) to color values.
    """
    # Extract the subgraph (including both incoming and outgoing hops)
    subgraph = extract_directed_subgraph(G, start_node)
    
    # Get node colors using the provided function, or default to 'gray'
    if get_node_colors_func:
        node_colors = get_node_colors_func(subgraph, merged_df)
    else:
        node_colors = {str(node): 'gray' for node in subgraph.nodes()}
    
    # Compute positions for each node using a spring layout
    pos = nx.spring_layout(subgraph, k=0.5, iterations=50)
    
    # Create a scatter trace for nodes (with labels)
    node_trace = go.Scatter(
        x=[pos[node][0] for node in subgraph.nodes()],
        y=[pos[node][1] for node in subgraph.nodes()],
        mode='markers+text',
        marker=dict(
            size=10,
            color=[node_colors.get(str(node), 'gray') for node in subgraph.nodes()],
            line=dict(width=1, color='black')
        ),
        text=[str(node) for node in subgraph.nodes()],
        textposition='top center',
        hoverinfo='text'
    )
    
    # Prepare arrow annotations for each edge (using the natural edge direction)
    arrow_annotations = []
    for source, target in subgraph.edges():
        x0, y0 = pos[source]
        x1, y1 = pos[target]
        arrow_annotations.append(
            dict(
                ax=x0,  # tail at the source
                ay=y0,
                x=x1,   # head at the target
                y=y1,
                xref='x',
                yref='y',
                axref='x',
                ayref='y',
                showarrow=True,
                arrowhead=3,
                arrowsize=1,
                arrowwidth=1,
                arrowcolor='black'
            )
        )
    
    # Build and display the figure
    fig = go.Figure(data=[node_trace])
    fig.update_layout(
        title=f'Directed Subgraph from Node {start_node}',
        showlegend=False,
        hovermode='closest',
        annotations=arrow_annotations,
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
    
    fig.show()

# Utility function to find a random class 1 node from a DataFrame
def find_random_class_1_node(class_df):
    class_1_nodes = class_df[class_df['class'] == '1']['txID'].tolist()
    return random.choice(class_1_nodes)

# Example usage (uncomment and adjust the following lines to your context)
random_class_1_node = find_random_class_1_node(class_df)
visualize_directed_subgraph(G, random_class_1_node, merged_df, get_node_colors_func=get_node_colors)