# Shuffling (resampling) functions for the temporal (timestamped) networks (graphs)
There are three conceptually different null models correspodning to these shuffling functions - network, link and node level null models. For each, one aspect of the structure is fixed, while the other aspects are randomized.

For example, we might keep the same interactions, but randomize the times at which they occur.

In [44]:
#Importing libraries
import random
import numpy as np
import pandas as pd
import collections

# Resampling (shuffling) the entire edgelist

In [24]:
def shuffle_timestamps_edgelist(edgelist, random_state):
    """
    Shuffles the timestamps of an edge list.
    
    Parameters:
    - edgelist (pd.DataFrame): The edge list with columns including 't_second' for timestamps.
    - random_state (int): A seed for the random number generator to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The shuffled edge list.
    """
    edgelist = edgelist.copy()
    edgelist['t_second'] = edgelist['t_second'].sample(frac=1, random_state=random_state + 5000).values
    edgelist.reset_index(inplace=True, drop=True)
    return edgelist

In [25]:
edgelist = pd.DataFrame({'from': [1, 2], 'to': [3, 4], 't_second': [100, 200]})
shuffle_timestamps_edgelist(edgelist, random_state=42)

Unnamed: 0,from,to,t_second
0,1,3,200
1,2,4,100


In [30]:
def rewire_shuffle_timestamps_edgelist(edgelist, random_state):
    """
    Shuffles 'from', 'to' nodes, and timestamps in the edgelist to simulate a rewiring of the edges.
    
    Parameters:
    - edgelist (pd.DataFrame): The edge list with 'from', 'to', and 't_second' columns.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The edge list with shuffled nodes and timestamps.
    
    """
    edgelist = edgelist.copy()
    edgelist['from'] = edgelist['from'].sample(frac=1, random_state=random_state).values
    edgelist['to'] = edgelist['to'].sample(frac=1, random_state=random_state + 2000).values
    edgelist['t_second'] = edgelist['t_second'].sample(frac=1, random_state=random_state + 3000).values
    edgelist.reset_index(inplace=True, drop=True)
    return edgelist


In [29]:
edgelist = pd.DataFrame({'from': [1, 2], 'to': [3, 4], 't_second': [100, 200]})
rewire_shuffle_timestamps_edgelist(edgelist, random_state=42)

Unnamed: 0,from,to,t_second
0,2,4,200
1,1,3,100


# Helper Functions

In [37]:
def self_loops_edgelist(df_edges):
    """
    Identifies self-loops in the edge list.
    
    Parameters:
    - df_edges (pd.DataFrame): The edge list with 'from' and 'to' columns.
    
    Returns:
    - list: A list of pairs representing self-loops.
    
        """
    return [[pair[0], pair[1]] for pair in zip(df_edges["from"], df_edges["to"]) if pair[0] == pair[1]]

In [6]:
df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [1, 2, 4]})
self_loops = self_loops_edgelist(df_edges)
print("The identified self-loops are",self_loops)

The identified self-loops are [[1, 1], [2, 2]]


In [11]:
def nodes_unique_pairs(df_edges):
    """
    Identifies unique node pairs and shuffles nodes in the edge list.
    
    Parameters:
    - df_edges (pd.DataFrame): The edge list with 'from' and 'to' columns.
    
    Returns:
    - tuple: A tuple containing a list of shuffled nodes and a list of unique node pairs.
    
    """
    link_pairs = list(zip(df_edges["from"], df_edges["to"]))
    nodes = pd.unique(list(df_edges['to']) + list(df_edges['from']))
    np.random.shuffle(nodes)
    unique_pairs = [sorted(pair) for pair in set(tuple(pair) for pair in link_pairs)]
    return nodes, unique_pairs

In [12]:
df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 2]})
nodes, unique_pairs = nodes_unique_pairs(df_edges)
print('Between the nodes',nodes,'these are the edges unique pairs (edges) that exist',unique_pairs)

Between the nodes [3 1 2] these are the edges unique pairs (edges) that exist [[2, 3], [1, 3], [1, 2]]


# Node-based shuffling (resampling)

In [29]:
def create_node_edgelist(edgelist, node_id):
    """
    Extracts an edge list for a specific node.
    
    Parameters:
    - edgelist (pd.DataFrame): The complete edge list dataframe with 'from' and 'to' columns.
    - node_id: The identifier of the node for which to create an edge list.
    
    Returns:
    - pd.DataFrame: A dataframe containing only the edges involving the specified node.

    """
    edgelist = edgelist.copy()
    node_edgelist = edgelist[(edgelist['from'].astype(str) == str(node_id)) | (edgelist['to'].astype(str) == str(node_id))]
    return node_edgelist.astype(str)


In [30]:
edgelist = pd.DataFrame({'from': [1, 2, 3], 'to': [4, 5, 6], 't_second': [100, 200, 300]})
create_node_edgelist(edgelist, 2)

Unnamed: 0,from,to,t_second
1,2,5,200


In [31]:
def shuffle_node_edgelist(node_edgelist, random_state):
    """
    Shuffles the timestamps of edges involving a specific node.
    
    Parameters:
    - node_edgelist (pd.DataFrame): The edge list of a specific node.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The edge list with shuffled timestamps.
    
    Example:
    >>> node_edgelist = pd.DataFrame({'from': ['2', '2'], 'to': ['3', '4'], 't_second': ['100', '200']})
    >>> shuffle_node_edgelist(node_edgelist, random_state=42)
    """
    node_edgelist = node_edgelist.copy()
    node_edgelist['t_second'] = node_edgelist['t_second'].sample(frac=1, random_state=random_state + 7000).values
    return node_edgelist



In [32]:
node_edgelist = pd.DataFrame({'from': ['2', '2'], 'to': ['3', '4'], 't_second': ['100', '200']})
shuffle_node_edgelist(node_edgelist, random_state=42)


Unnamed: 0,from,to,t_second
0,2,3,200
1,2,4,100


In [33]:
def shuffle_timestamps_at_nodes(df_edges, random_state):
    """
    Shuffles timestamps for all nodes in the edge list.
    
    Parameters:
    - df_edges (pd.DataFrame): The complete edge list.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The edge list with timestamps shuffled at the node level.
    
    Example:
    >>> df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 2], 't_second': [100, 200, 300]})
    >>> shuffle_timestamps_at_nodes(df_edges, random_state=42)
    """
    df_edges = df_edges.copy()
    nodes, unique_pairs = nodes_unique_pairs(df_edges)
    nodes_shuffled_list_df = []
    for node in nodes:
        node_edgelist = create_node_edgelist(df_edges, node)
        df_edges = df_edges.drop(node_edgelist.index)
        node_edgelist_shuffled = shuffle_node_edgelist(node_edgelist, random_state)
        nodes_shuffled_list_df.append(node_edgelist_shuffled)
    node_edgelist_shuffled = pd.concat(nodes_shuffled_list_df)
    node_edgelist_shuffled.reset_index(inplace=True, drop=True)
    return node_edgelist_shuffled

In [34]:
df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 2], 't_second': [100, 200, 300]})
shuffle_timestamps_at_nodes(df_edges, random_state=42)

Unnamed: 0,from,to,t_second
0,1,3,300
1,3,2,100
2,2,1,200


In [None]:
def rewire_shuffle_node_edgelist(node_edgelist, node_id, random_state):
    """
    Rewires and shuffles timestamps for edges of a specific node.
    
    Parameters:
    - node_edgelist (pd.DataFrame): Edge list for a specific node.
    - node_id: The node identifier.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The rewire-shuffled edge list for the specified node.
    """
    node_edgelist = node_edgelist.astype(str).copy()
    node_id = str(node_id)
    to_list = list(node_edgelist['to'])
    from_list = list(node_edgelist['from'])
    local_nodes_no_central_node = list(pd.unique(to_list + from_list))
    if node_id in local_nodes_no_central_node:
        local_nodes_no_central_node.remove(node_id)
    np.random.seed(random_state)


    node_edgelist['t_second'] = node_edgelist['t_second'].sample(frac=1, random_state=random_state + 13000).values
    node_edgelist.reset_index(inplace=True, drop=True)
    return node_edgelist

In [28]:
node_edgelist = pd.DataFrame({'from': ['1', '1'], 'to': ['2', '3'], 't_second': ['100', '200']})
rewire_shuffle_node_edgelist(node_edgelist, '1', random_state=42)

Unnamed: 0,from,to,t_second
0,1,2,200
1,1,3,100


In [None]:
def rewire_shuffle_timestamps_at_nodes(df_edges, random_state):
    """
    Applies rewire shuffling to the entire edge list at the node level.
    
    Parameters:
    - df_edges (pd.DataFrame): The complete edge list.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The edge list after rewire shuffling at the node level.
    
    """
    df_edges = df_edges.copy()
    nodes, unique_pairs = nodes_unique_pairs(df_edges)
    nodes_shuffled_list_df = []
    for node in nodes:
        node_edgelist = create_node_edgelist(df_edges, node)
        df_edges = df_edges.drop(node_edgelist.index)
        node_edgelist_shuffled = rewire_shuffle_node_edgelist(node_edgelist, node, random_state)
        nodes_shuffled_list_df.append(node_edgelist_shuffled)
    node_edgelist_shuffled = pd.concat(nodes_shuffled_list_df)
    node_edgelist_shuffled.reset_index(inplace=True, drop=True)
    return node_edgelist_shuffled

In [27]:
df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 2], 't_second': [100, 200, 300]})
rewire_shuffle_timestamps_at_nodes(df_edges, random_state=42)

Unnamed: 0,from,to,t_second
0,1,3,300
1,3,2,100
2,2,1,200


# Link-based shuffling (resampling)

In [3]:
def create_link_edgelist(edgelist, node_id_1, node_id_2):
    """
    Creates an edge list for edges between two specified nodes.
    
    Parameters:
    - edgelist (pd.DataFrame): The complete edge list with 'from' and 'to' columns.
    - node_id_1: Identifier for the first node.
    - node_id_2: Identifier for the second node.
    
    Returns:
    - pd.DataFrame: A dataframe containing only the edges between the specified nodes.
    """
    edgelist = edgelist.copy()
    link_edgelist = edgelist[((edgelist['from'].astype(str) == str(node_id_1)) & (edgelist['to'].astype(str) == str(node_id_2))) | ((edgelist['from'].astype(str) == str(node_id_2)) & (edgelist['to'].astype(str) == str(node_id_1)))]
    return link_edgelist

In [5]:
edgelist = pd.DataFrame({'from': [1, 2, 3,1], 'to': [4, 5, 6,4], 't_second': [100, 200, 300,400]})
create_link_edgelist(edgelist, 1, 4)

Unnamed: 0,from,to,t_second
0,1,4,100
3,1,4,400


In [7]:
def shuffle_link_edgelist(link_edgelist, random_state):
    """
    Shuffles the timestamps of edges in a link edge list.
    
    Parameters:
    - link_edgelist (pd.DataFrame): The edge list of links between two specific nodes.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The link edge list with shuffled timestamps.
    
    Example:
    >>> link_edgelist = pd.DataFrame({'from': ['1', '4'], 'to': ['4', '1'], 't_second': ['100', '200']})
    >>> shuffle_link_edgelist(link_edgelist, random_state=42)
    """
    link_edgelist = link_edgelist.copy()
    link_edgelist['t_second'] = link_edgelist['t_second'].sample(frac=1, random_state=random_state + 6000).values
    return link_edgelist

In [22]:
link_edgelist = pd.DataFrame({'from': ['1', '4','1'], 'to': ['4', '1','4'], 't_second': ['100', '200','300']})
shuffle_link_edgelist(link_edgelist, random_state=42)

Unnamed: 0,from,to,t_second
0,1,4,100
1,4,1,200
2,1,4,300


In [19]:
def shuffle_timestamps_on_links(df_edges, random_state):
    """
    Shuffles timestamps for all link edges in the edge list.
    
    Parameters:
    - df_edges (pd.DataFrame): The complete edge list.
    - random_state (int): Seed for randomness to ensure reproducibility.
    
    Returns:
    - pd.DataFrame: The edge list with timestamps shuffled for each link.
    
    Example:
    >>> df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 2], 't_second': [100, 200, 300]})
    >>> shuffle_timestamps_on_links(df_edges, random_state=42)
    """
    df_edges = df_edges.copy()
    nodes, unique_pairs = nodes_unique_pairs(df_edges)
    all_links_list_df = []
    for pair in unique_pairs:
        link_edgelist = create_link_edgelist(df_edges, pair[0], pair[1])
        link_edgelist_shuffled = shuffle_link_edgelist(link_edgelist, random_state)
        all_links_list_df.append(link_edgelist)
    link_edgelist_shuffled = pd.concat(all_links_list_df)
    link_edgelist_shuffled.reset_index(inplace=True, drop=True)
    link_edgelist_shuffled.drop_duplicates(inplace=True)
    return link_edgelist_shuffled


In [20]:
df_edges = pd.DataFrame({'from': [1, 2, 3], 'to': [3, 1, 1], 't_second': [100, 200, 300]})
shuffle_timestamps_on_links(df_edges, random_state=42)

Unnamed: 0,from,to,t_second
0,1,3,100
1,3,1,300
4,2,1,200
