# To run the code, first extract the .zip data in the same directory as this notebook

In [1]:
import matplotlib
matplotlib.use('Agg')
import csv
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import matplotlib.patches as mpatches
import numpy as np
from sys import argv
import networkx as nx


# Creation of co-presence network and dicitonary of ID classes
This is so that I can meet the format requirements defined by Holme in his github:
1. A sorted list of contacts. 
2. Each row has five numbers: time, id-1, id-2, node-type-1, node-type-2. 
3. The node types are 11 class labels 1A, 1B, 2A, etc. and Teachers.


In [5]:
def read_dat_file(file_path):
    """
    Read a .DAT file and return a list of tuples representing edges.
    Each tuple contains three elements: (node_i, node_j, timestamp).
    """
    edges = []
    with open(file_path, 'r') as file:
        for line in file:
            data = line.strip().split()
            if len(data) == 3:
                timestamp, node_i, node_j = data
                edges.append((int(timestamp), int(node_i), int(node_j)))
    return edges

In [89]:
path_copresence = r'contact\tij_LyonSchool.dat'
copresence = read_dat_file(path_copresence)
copresence[:5]

[(31220, 1558, 1567),
 (31220, 1560, 1570),
 (31220, 1567, 1574),
 (31220, 1632, 1818),
 (31220, 1632, 1866)]

In [91]:
# create dictionary mapping each ID to a class from metadata
path_metadata = r'metadata\metadata_LyonSchool.dat'
node_department = []

with open(path_metadata, 'r') as file:
        for line in file:
                id, dep = line.strip().split()
                # id, dep = line
                node_department.append((int(id), dep))
node_department_dict = dict(node_department)

In [94]:
node_department_dict

{1711: 'cpa',
 1752: 'cpa',
 1757: 'cpa',
 1759: 'cpa',
 1760: 'cpa',
 1761: 'cpa',
 1764: 'cpa',
 1766: 'cpa',
 1767: 'cpa',
 1768: 'cpa',
 1770: 'cpa',
 1772: 'cpa',
 1774: 'cpa',
 1775: 'cpa',
 1778: 'cpa',
 1783: 'cpa',
 1787: 'cpa',
 1789: 'cpa',
 1790: 'cpa',
 1792: 'cpa',
 1796: 'cpa',
 1798: 'cpa',
 1799: 'cpa',
 1656: 'cpb',
 1661: 'cpb',
 1663: 'cpb',
 1664: 'cpb',
 1665: 'cpb',
 1666: 'cpb',
 1670: 'cpb',
 1673: 'cpb',
 1674: 'cpb',
 1675: 'cpb',
 1680: 'cpb',
 1681: 'cpb',
 1682: 'cpb',
 1684: 'cpb',
 1687: 'cpb',
 1688: 'cpb',
 1695: 'cpb',
 1696: 'cpb',
 1697: 'cpb',
 1698: 'cpb',
 1765: 'cpb',
 1779: 'cpb',
 1908: 'cpb',
 1912: 'cpb',
 1920: 'cpb',
 1603: 'ce1a',
 1604: 'ce1a',
 1606: 'ce1a',
 1609: 'ce1a',
 1613: 'ce1a',
 1616: 'ce1a',
 1617: 'ce1a',
 1625: 'ce1a',
 1628: 'ce1a',
 1641: 'ce1a',
 1643: 'ce1a',
 1647: 'ce1a',
 1648: 'ce1a',
 1649: 'ce1a',
 1902: 'ce1a',
 1906: 'ce1a',
 1907: 'ce1a',
 1911: 'ce1a',
 1913: 'ce1a',
 1916: 'ce1a',
 1917: 'ce1a',
 1919: 'ce1a'

In [8]:
# augment the copresence network with the department of each node
network_full = [(t, i, j, node_department_dict[i], node_department_dict[j]) for t, i, j in copresence]
# make sure that the network is sorted by time
sorted_network_full = sorted(network_full, key=lambda x: x[0])

In [9]:
# create a dataframe from sorted_network_full
import pandas as pd
from skimpy import skim
df_sorted_network_full = pd.DataFrame(sorted_network_full, columns =['timestamp', 'node_i', 'node_j', 'dep_i', 'dep_j'])
skim(df_sorted_network_full)

# Now the actual modeling of the network as a graph object can commence

In [10]:
# if __name__ == "__main__":
#     global n

#     # if len(argv) != 2:
#     #     print('isage: python3 anime.py [file name]')
#     #     exit()

#     # name = argv[1]

#     # read and construct network
#     ts = read_data(sorted_network_full)

#     g = nx.Graph()

#     # Add nodes to the graph
#     g.add_nodes_from(range(n))

#     # Init layout
#     pos = init_layout(g)

#     # Edge weights
#     ew = {} # in the original code, we use property maps here which allow for direct association of data with edges and vertices
#     thick = {}

#     for u in range(1, n):
#         for v in range(u):
#             if node_class[u] == node_class[v]:
#                 g.add_edge(u, v, weight=group_weight)  # Add edge weight directly

#     # Get node colors

# 	# # edge weights

# 	# #classes = g.new_vertex_property('int')

# 	# for u in range(1,n):
# 	# 	for v in range(u):
# 	# 		if node_class[u] == node_class[v]:
# 	# 			e = g.add_edge(u,v)
# 	# 			ew[e] = group_weight

Event-based Graphs: This method maintains the exact timing of each interaction. Each edge has a timestamp representing when the interaction occurred. Given your goal and the literature, the event-based graph method is most suitable because it retains the complete temporal dynamics of the network. This aligns with Holme’s emphasis on preserving temporal information to understand the network's dynamics fully.



In [11]:
# plot histogram of class labels
# import matplotlib.pyplot as plt
# from collections import Counter



# # Dictionary to store the class for each unique node
# node_class_dict = {}

# # Extract class labels for each node
# for _, node_i, node_j, class_i, class_j in sorted_network_full:
#     node_class_dict[node_i] = class_i
#     node_class_dict[node_j] = class_j

# # Count occurrences of each class label
# class_counts = Counter(node_class_dict.values())

# # Extract labels and counts for plotting
# labels, counts = zip(*class_counts.items())

# # Create the histogram
# plt.figure(figsize=(10, 6))
# plt.bar(labels, counts, color='skyblue')

# # Add titles and labels
# plt.title('Distribution of Class Labels')
# plt.xlabel('Class Label')
# plt.ylabel('Number of Nodes')
# plt.xticks(rotation=45)  # Rotate x-axis labels for better readability

# # Save the plot as an image file
# plt.tight_layout()
# plt.savefig('class_distribution_histogram.png')

# # If you still want to try displaying the plot (e.g., in a Jupyter notebook), use this line
# # plt.show()


In [12]:
import networkx as nx
import matplotlib.pyplot as plt

# # Function to read the .DAT files
# def read_dat_file(file_path):
#     edges = []
#     with open(file_path, 'r') as file:
#         for line in file:
#             data = line.strip().split()
#             if len(data) == 3:
#                 timestamp, node_i, node_j = data
#                 edges.append((int(timestamp), int(node_i), int(node_j)))
#     return edges

# # Load the co-presence data
# path_copresence = 'path_to/tij_LyonSchool.dat'
# copresence = read_dat_file(path_copresence)

# # Load metadata
# path_metadata = 'path_to/metadata_LyonSchool.dat'
# node_department = []
# with open(path_metadata, 'r') as file:
#     for line in file:
#         id, dep = line.strip().split()
#         node_department.append((int(id), dep))
# node_department_dict = dict(node_department)

# # Augment network with department data
# network_full = [(t, i, j, node_department_dict[i], node_department_dict[j]) for t, i, j in copresence]

# # Sort the network by time
# sorted_network_full = sorted(network_full, key=lambda x: x[0])

import heapq
# Create a temporal graph using networkx
G_nx = nx.DiGraph()

# Add nodes with attributes
for node_id, department in node_department:
    G_nx.add_node(node_id, department=department)

# Add edges with timestamps as attributes
for t, i, j, dep_i, dep_j in sorted_network_full:
    G_nx.add_edge(i, j, timestamp=t, department_i=dep_i, department_j=dep_j)

def temporal_shortest_path(G, source, target):
    heap = [(0, source)]
    distances = {node: float('inf') for node in G.nodes}
    distances[source] = 0

    while heap:
        curr_time, u = heapq.heappop(heap)
        if u == target:
            return curr_time
        
        for v in G.neighbors(u):
            edge_time = G[u][v]['timestamp']
            if edge_time >= curr_time and edge_time < distances[v]:
                distances[v] = edge_time
                heapq.heappush(heap, (edge_time, v))
    
    return float('inf')  # No path found

source, target = 1558, 1866
shortest_time = temporal_shortest_path(G_nx, source, target)
print(f"Temporal shortest path time from {source} to {target}: {shortest_time}")


Temporal shortest path time from 1558 to 1866: 39160


# Details with PathPy

In [13]:
import pathpy as pp
import matplotlib.pyplot as plt

# Create a temporal network in pathpy
temporal_network = pp.TemporalNetwork()

# Add edges to the temporal network
for timestamp, node_i, node_j, class_i, class_j in sorted_network_full:
    temporal_network.add_edge(node_i, node_j, timestamp)

# Print basic information about the temporal network
print(temporal_network)

# # Visualize the temporal network
# pp.visualisation.plot(temporal_network, ts_per_frame=1000)
# plt.show()


Nodes:			242
Time-stamped links:	125773
Links/Nodes:		519.7231404958678
Observation period:	[31220, 148120]
Observation length:	 116900 
Time stamps:		 3100 
Avg. inter-event dt:	 37.72184575669571
Min/Max inter-event dt:	 20/54940


In [14]:
df_sorted_network_full

Unnamed: 0,timestamp,node_i,node_j,dep_i,dep_j
0,31220,1558,1567,ce2b,ce2b
1,31220,1560,1570,ce2b,ce2b
2,31220,1567,1574,ce2b,ce2b
3,31220,1632,1818,cm1b,cm1b
4,31220,1632,1866,cm1b,cm1b
...,...,...,...,...,...
125768,148120,1752,1783,cpa,cpa
125769,148120,1752,1789,cpa,cpa
125770,148120,1775,1798,cpa,cpa
125771,148120,1783,1796,cpa,cpa


# Testing TENETO degree and betweenness centrality
https://teneto.readthedocs.io/en/latest/teneto.networkmeasures.html#teneto-networkmeasures

In [15]:
from teneto.networkmeasures import temporal_degree_centrality, temporal_betweenness_centrality

## First create a teneto graph object
For this I had to go through some more preprocessing steps, as teneto cannot handle large integers. I had to create dictionaries, mapping each node and timestamp to a low integer value (their index in a unique array), from this I can finally create a graph object.

In [16]:
import teneto

# df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df_tnet = df_sorted_network_full[['node_i', 'node_j', 'timestamp']]
# Create a Teneto temporal network object
tnet = teneto.TemporalNetwork()
# rename columns to i, j, t
df_tnet.columns = ['i', 'j', 't']
df_tnet

Unnamed: 0,i,j,t
0,1558,1567,31220
1,1560,1570,31220
2,1567,1574,31220
3,1632,1818,31220
4,1632,1866,31220
...,...,...,...
125768,1752,1783,148120
125769,1752,1789,148120
125770,1775,1798,148120
125771,1783,1796,148120


In [17]:
unique_i = df_tnet['i'].unique()
unique_j = df_tnet['j'].unique()

# Concatenate and convert to list
unique_values = list(set(np.concatenate((unique_i, unique_j))))
unique_values_dict = {value: index for index, value in enumerate(unique_values)}


In [18]:
unique_t = df_tnet['t'].unique()
unique_times_dict = {value: index for index, value in enumerate(unique_t)}

In [19]:
# for each entry in column i and column j in df_tnet, replace the value with the corresponding value in node_dict
df_tnet_copy = df_tnet.copy()
df_tnet_copy['i'] = df_tnet['i'].apply(lambda x: unique_values_dict[x])
df_tnet_copy['j'] = df_tnet['j'].apply(lambda x: unique_values_dict[x])
df_tnet_copy['t'] = df_tnet['t'].apply(lambda x: unique_times_dict[x])
df_tnet_copy

Unnamed: 0,i,j,t
0,9,14,0
1,10,15,0
2,14,17,0
3,36,141,0
4,36,165,0
...,...,...,...
125768,103,123,3099
125769,103,125,3099
125770,118,131,3099
125771,123,129,3099


### I then copied the values to a list of edges, since this was handled better by the teneto library

In [20]:
# Create list of edges
edges = df_tnet_copy[['i', 'j', 't']].values.tolist()
edges

[[9, 14, 0],
 [10, 15, 0],
 [14, 17, 0],
 [36, 141, 0],
 [36, 165, 0],
 [53, 66, 0],
 [142, 150, 0],
 [142, 165, 0],
 [9, 14, 1],
 [14, 17, 1],
 [36, 141, 1],
 [36, 165, 1],
 [53, 66, 1],
 [94, 143, 1],
 [139, 145, 1],
 [142, 150, 1],
 [9, 13, 2],
 [9, 14, 2],
 [10, 15, 2],
 [13, 14, 2],
 [13, 17, 2],
 [14, 17, 2],
 [36, 165, 2],
 [53, 66, 2],
 [94, 143, 2],
 [114, 123, 2],
 [135, 137, 2],
 [135, 147, 2],
 [229, 2, 3],
 [9, 13, 3],
 [9, 14, 3],
 [13, 14, 3],
 [13, 17, 3],
 [14, 17, 3],
 [36, 141, 3],
 [36, 142, 3],
 [36, 165, 3],
 [53, 66, 3],
 [105, 107, 3],
 [135, 137, 3],
 [210, 232, 4],
 [9, 14, 4],
 [14, 17, 4],
 [31, 38, 4],
 [36, 165, 4],
 [53, 66, 4],
 [105, 108, 4],
 [139, 145, 4],
 [206, 225, 5],
 [206, 234, 5],
 [213, 234, 5],
 [225, 234, 5],
 [228, 236, 5],
 [229, 1, 5],
 [31, 38, 5],
 [39, 41, 5],
 [53, 66, 5],
 [103, 119, 5],
 [108, 123, 5],
 [112, 115, 5],
 [113, 124, 5],
 [117, 119, 5],
 [206, 216, 6],
 [206, 220, 6],
 [206, 225, 6],
 [208, 213, 6],
 [208, 228, 6],
 [21

In [21]:
tnet = teneto.TemporalNetwork(from_edgelist=edges)
tnet

<teneto.classes.network.TemporalNetwork at 0x1ebf56bfd50>

# check if I can do betweenness using teneto!

In [22]:
# temporal_betweenness_centrality(tnet, calc='overtime')

## Now I could finally compute the degree centrality. It gives me an array that contains the degree centrality for each node in my transformed data. Hence, the centrality on index 2 would correspond to the centrality of the node in  unique_values_dict that has as it's value 2

In [23]:
temp_deg_centrality = temporal_degree_centrality(tnet, axis=0, calc='overtime', communities=None, decay=0, ignorediagonal=True)

In [24]:
unique_values_dict.items()

dict_items([(1538, 0), (1539, 1), (1545, 2), (1546, 3), (1548, 4), (1549, 5), (1551, 6), (1552, 7), (1555, 8), (1558, 9), (1560, 10), (1562, 11), (1563, 12), (1564, 13), (1567, 14), (1570, 15), (1572, 16), (1574, 17), (1578, 18), (1579, 19), (1580, 20), (1585, 21), (1592, 22), (1594, 23), (1601, 24), (1603, 25), (1604, 26), (1606, 27), (1609, 28), (1613, 29), (1616, 30), (1617, 31), (1618, 32), (1625, 33), (1628, 34), (1630, 35), (1632, 36), (1637, 37), (1641, 38), (1643, 39), (1647, 40), (1648, 41), (1649, 42), (1650, 43), (1653, 44), (1656, 45), (1661, 46), (1663, 47), (1664, 48), (1665, 49), (1666, 50), (1668, 51), (1670, 52), (1673, 53), (1674, 54), (1675, 55), (1680, 56), (1681, 57), (1682, 58), (1684, 59), (1685, 60), (1687, 61), (1688, 62), (1695, 63), (1696, 64), (1697, 65), (1698, 66), (1700, 67), (1702, 68), (1704, 69), (1705, 70), (1706, 71), (1707, 72), (1708, 73), (1709, 74), (1710, 75), (1711, 76), (1712, 77), (1713, 78), (1714, 79), (1715, 80), (1718, 81), (1719, 82), (1

In [25]:
# create a dict with the node as key and centrality as value
# temp_deg_centrality_dict = {node: centrality for node, centrality in enumerate(temp_deg_centrality)}
temp_deg_centrality_dict = {}
for key, value in unique_values_dict.items():
    temp_deg_centrality_dict[key] = temp_deg_centrality[value]

In [26]:
temp_deg_centrality_dict

{1538: 239.0,
 1539: 438.0,
 1545: 754.0,
 1546: 419.0,
 1548: 374.0,
 1549: 422.0,
 1551: 1412.0,
 1552: 1607.0,
 1555: 1173.0,
 1558: 1891.0,
 1560: 1254.0,
 1562: 741.0,
 1563: 248.0,
 1564: 650.0,
 1567: 784.0,
 1570: 430.0,
 1572: 707.0,
 1574: 1013.0,
 1578: 229.0,
 1579: 1162.0,
 1580: 539.0,
 1585: 91.0,
 1592: 79.0,
 1594: 763.0,
 1601: 254.0,
 1603: 811.0,
 1604: 733.0,
 1606: 1235.0,
 1609: 384.0,
 1613: 2121.0,
 1616: 296.0,
 1617: 1358.0,
 1618: 259.0,
 1625: 1605.0,
 1628: 1049.0,
 1630: 234.0,
 1632: 381.0,
 1637: 40.0,
 1641: 501.0,
 1643: 479.0,
 1647: 464.0,
 1648: 734.0,
 1649: 344.0,
 1650: 681.0,
 1653: 154.0,
 1656: 989.0,
 1661: 1277.0,
 1663: 2005.0,
 1664: 1225.0,
 1665: 1755.0,
 1666: 1708.0,
 1668: 81.0,
 1670: 530.0,
 1673: 1101.0,
 1674: 821.0,
 1675: 1343.0,
 1680: 722.0,
 1681: 870.0,
 1682: 603.0,
 1684: 1049.0,
 1685: 312.0,
 1687: 297.0,
 1688: 900.0,
 1695: 834.0,
 1696: 361.0,
 1697: 1531.0,
 1698: 817.0,
 1700: 580.0,
 1702: 1385.0,
 1704: 1088.0,
 

### In conclusion: Teneto isn't handling my graph as undirected or smth. Fuck that shit


# Now trying with the Temporal_Metrics file
for this I need to use the NetworkX objects, which I define using 

In [27]:
G = nx.MultiGraph()
for row in df_sorted_network_full.iterrows():
    G.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])
list(G.edges(data=True))

[(1558, 1567, {'time': 31220, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31240, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31260, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31280, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31300, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31340, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31560, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31580, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31600, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31660, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31700, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31880, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31920, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31940, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 32000, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 15

## Define a smaller graph using only the first 8h of the dataset, for betweenness centrality sake

In [29]:
df_sorted_network_full

Unnamed: 0,timestamp,node_i,node_j,dep_i,dep_j
0,31220,1558,1567,ce2b,ce2b
1,31220,1560,1570,ce2b,ce2b
2,31220,1567,1574,ce2b,ce2b
3,31220,1632,1818,cm1b,cm1b
4,31220,1632,1866,cm1b,cm1b
...,...,...,...,...,...
125768,148120,1752,1783,cpa,cpa
125769,148120,1752,1789,cpa,cpa
125770,148120,1775,1798,cpa,cpa
125771,148120,1783,1796,cpa,cpa


In [30]:
# create a dataframe from df_sorted_network_full with only the rows where timestamp is larger than 34820 and smaller than 38420
df_sorted_network_full_FIRST_hour = df_sorted_network_full[(df_sorted_network_full['timestamp'] > 31220) & (df_sorted_network_full['timestamp'] < 34820)]


len(df_sorted_network_full_FIRST_hour), len(df_sorted_network_full)

(4298, 125773)

In [31]:
G_firstHOUR = nx.MultiGraph()
for row in df_sorted_network_full_FIRST_hour.iterrows():
    G_firstHOUR.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])
list(G_firstHOUR.edges(data=True))

[(1558, 1567, {'time': 31240, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31260, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31280, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31300, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31340, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31560, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31580, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31600, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31660, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31700, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31880, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31920, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 31940, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 32000, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 1567, {'time': 32060, 'dep_i': 'ce2b', 'dep_j': 'ce2b'}),
 (1558, 15

In [32]:
len(G_firstHOUR.edges(data=True))

4298

In [33]:
# Faris code didn't work properly, so I did this:
from itertools import combinations
from collections import defaultdict, deque
import networkx as nx
from pandas import Timestamp
# closeness centrality made by chat
def temporal_closeness_centrality(G):
    closeness = {}
    nodes = list(G.nodes())
    
    for s in nodes:
        distance = {node: float('inf') for node in nodes}
        distance[s] = 0
        Q = deque([(s, 0)])  # (node, current_time)
        
        while Q:
            current_node, current_time = Q.popleft()
            neighbors = G.successors(current_node) if G.is_directed() else G.neighbors(current_node)
            
            for neighbor in neighbors:
                # Iterate over all edges between current_node and neighbor
                for key in G[current_node][neighbor]:
                    edge_time = G[current_node][neighbor][key]['time'] # take the time of the edge
                    if isinstance(edge_time, Timestamp):
                        print('edge_time is a timestamp')
                        edge_time = edge_time.timestamp()  # Convert Timestamp to seconds
                    # if edge is after current time but before any distance to neighbor previously recorded:
                    if current_time <= edge_time < distance[neighbor]: 
                        distance[neighbor] = edge_time
                        Q.append((neighbor, edge_time))
        
        total_distance = sum([d for d in distance.values() if d != float('inf')])
        reachable_nodes = len([d for d in distance.values() if d != float('inf')]) - 1
        # closeness[s] = total_distance / reachable_nodes if reachable_nodes > 0 else 0
        closeness[s] = (1/ (total_distance / reachable_nodes)) if reachable_nodes > 0 else 0
    
    return closeness

In [34]:
import Temporal_Metrics as tm
import importlib
importlib.reload(tm)


<module 'Temporal_Metrics' from 'c:\\Users\\20214658\\Desktop\\Uni\\THESIS\\Code\\BEP-Temporal-Networks\\infectious_socio_patterns\\Temporal_Metrics.py'>

## Calculate metrics for first hour

In [35]:
import Temporal_Metrics as tm
import importlib
importlib.reload(tm)

temp_degree_firstHOUR = tm.temporal_degree(G_firstHOUR)
# temp_closeness = tm.temporal_closeness(G) # give inf values for some nodes and isn't the reciprocal
temp_closeness_firstHOUR = tm.temporal_closeness(G_firstHOUR)

betweenness_firstHOUR = tm.temporal_betweenness(G_firstHOUR)


## After caluclating the metrics, put them in a dataframe so I can also group by class and get the statistics of metrics per class as well as per node

In [36]:
import pandas as pd

# Create DataFrame from temp_degree dictionary
df_metrics = pd.DataFrame(list(temp_degree_firstHOUR.items()), columns=['node_id', 'temp_degree'])

# Add new column that maps node_id to corresponding value in node_department_dict
df_metrics['closeness'] = df_metrics['node_id'].map(temp_closeness_firstHOUR)
df_metrics['betweenness'] = df_metrics['node_id'].map(betweenness_firstHOUR)
df_metrics['department'] = df_metrics['node_id'].map(node_department_dict)
df_metrics

Unnamed: 0,node_id,temp_degree,closeness,betweenness,department
0,1558,100,0.000003,0.000168,ce2b
1,1567,96,0.000003,0.000626,ce2b
2,1564,109,0.000003,0.000088,ce2b
3,1579,19,0.000003,0.000021,ce2b
4,1574,121,0.000003,0.000345,ce2b
...,...,...,...,...,...
223,1749,26,0.000003,0.000240,ce1b
224,1883,15,0.000003,0.000152,ce1b
225,1855,23,0.000003,0.000280,ce1b
226,1861,38,0.000003,0.000000,ce1b


### Descriptive statistics of the network metrics

In [37]:
descriptive_stats = df_metrics.describe()
print(descriptive_stats)

           node_id  temp_degree     closeness  betweenness
count   228.000000   228.000000  2.280000e+02   228.000000
mean   1684.508772    37.701754  4.073794e-06     0.000858
std     145.273932    28.212513  2.078941e-06     0.001907
min    1426.000000     1.000000  1.271000e-07     0.000000
25%    1557.250000    16.000000  2.822196e-06     0.000107
50%    1703.000000    33.000000  3.054975e-06     0.000318
75%    1801.250000    51.250000  4.957888e-06     0.000737
max    1922.000000   184.000000  8.429616e-06     0.016184


## put metrics of first hour into df

In [39]:
import pandas as pd

# Create DataFrame from temp_degree dictionary
df_metrics_H1 = pd.DataFrame(list(temp_degree_firstHOUR.items()), columns=['node_id', 'temp_degree'])

# Add new column that maps node_id to corresponding value in node_department_dict
df_metrics_H1['closeness'] = df_metrics_H1['node_id'].map(temp_closeness_firstHOUR)
df_metrics_H1['betweenness'] = df_metrics_H1['node_id'].map(betweenness_firstHOUR)
df_metrics_H1['department'] = df_metrics_H1['node_id'].map(node_department_dict)
df_metrics_H1

Unnamed: 0,node_id,temp_degree,closeness,betweenness,department
0,1558,100,0.000003,0.000168,ce2b
1,1567,96,0.000003,0.000626,ce2b
2,1564,109,0.000003,0.000088,ce2b
3,1579,19,0.000003,0.000021,ce2b
4,1574,121,0.000003,0.000345,ce2b
...,...,...,...,...,...
223,1749,26,0.000003,0.000240,ce1b
224,1883,15,0.000003,0.000152,ce1b
225,1855,23,0.000003,0.000280,ce1b
226,1861,38,0.000003,0.000000,ce1b


In [40]:
descriptive_stats = df_metrics_H1[['temp_degree', 'closeness', 'betweenness']].describe().loc[['mean', 'std', 'min', '25%', '50%', '75%', 'max']]

# Display as LaTeX table
print(descriptive_stats.to_latex())

\begin{tabular}{lrrr}
\toprule
 & temp_degree & closeness & betweenness \\
\midrule
mean & 37.701754 & 0.000004 & 0.000858 \\
std & 28.212513 & 0.000002 & 0.001907 \\
min & 1.000000 & 0.000000 & 0.000000 \\
25% & 16.000000 & 0.000003 & 0.000107 \\
50% & 33.000000 & 0.000003 & 0.000318 \\
75% & 51.250000 & 0.000005 & 0.000737 \\
max & 184.000000 & 0.000008 & 0.016184 \\
\bottomrule
\end{tabular}



In [41]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
avg_degree_by_department = df_metrics_H1.groupby('department')['temp_degree'].mean().reset_index()

# Plotting the bar plot for average closeness by department
plt.figure(figsize=(12, 6))
sns.barplot(data=avg_degree_by_department, x='department', y='temp_degree', palette='pastel')
plt.title('Average Temporal Degree Centrality by Department')
plt.xlabel('Department')
plt.ylabel('Average Temporal Degree Centrality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
plt.savefig('LyonSchool_degree_H1.png')

  plt.show()


In [42]:
avg_closeness_by_department = df_metrics_H1.groupby('department')['closeness'].mean().reset_index()

# Plotting the bar plot for average closeness by department
plt.figure(figsize=(12, 6))
sns.barplot(data=avg_closeness_by_department, x='department', y='closeness', palette='pastel')
plt.title('Average Temporal Closeness Centrality by Department')
plt.xlabel('Department')
plt.ylabel('Average Temporal Closeness Centrality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
plt.savefig('LyonSchool_closeness_H1.png')

  plt.show()


In [43]:
avg_betweenness_by_department = df_metrics_H1.groupby('department')['betweenness'].mean().reset_index()

# Plotting the bar plot for average closeness by department
plt.figure(figsize=(12, 6))
sns.barplot(data=avg_betweenness_by_department, x='department', y='betweenness', palette='pastel')
plt.title('Average Temporal Betweenness Centrality by Department')
plt.xlabel('Department')
plt.ylabel('Average Temporal Betweenness Centrality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
plt.savefig('LyonSchool_betweenness_H1.png')

  plt.show()


In [44]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group by department and calculate mean metrics
department_metrics = df_metrics.groupby('department').mean()

# Plotting average Temporal Degree Centrality by department
plt.figure(figsize=(12, 8))
sns.barplot(x=department_metrics.index, y=department_metrics['temp_degree'])
plt.title('Average Temporal Degree Centrality by Department', fontsize=18)
plt.xlabel('Department', fontsize=14)
plt.ylabel('Average Temporal Degree Centrality', fontsize=14)
plt.xticks(rotation=45)
plt.show()


  plt.show()


In [45]:
# Plotting average Betweenness by department
plt.figure(figsize=(12, 8))
sns.barplot(x=department_metrics.index, y=department_metrics['betweenness'])
plt.title('Average Temporal Betweenness Centrality by Department', fontsize=18)
plt.xlabel('Department', fontsize=14)
plt.ylabel('Average Temporal Betweenness Centrality', fontsize=14)
plt.xticks(rotation=45)
plt.show()


  plt.show()


In [46]:
# Plotting average closeness by department
plt.figure(figsize=(12, 8))
sns.barplot(x=department_metrics.index, y=department_metrics['closeness'])
plt.title('Average Temporal Closeness Centrality by Department', fontsize=18)
plt.xlabel('Department', fontsize=14)
plt.ylabel('Average Temporal Closeness Centrality', fontsize=14)
plt.xticks(rotation=45)
plt.show()


  plt.show()


## Plotting the distributions of the measures

In [50]:
# plot a histogram showing the distribution of temporal degrees in temp_degree
import matplotlib.pyplot as plt
matplotlib.use('TkAgg')  # or 'Qt5Agg', 'nbAgg', etc.


# Extract the temporal degrees from the dictionary
degrees = list(temp_degree_firstHOUR.values())

# Plot the histogram
plt.hist(degrees, bins=len(set(temp_degree_firstHOUR.values())), edgecolor='black')

# Set labels and title
plt.xlabel('Temporal Degree')
plt.ylabel('Frequency')
plt.title('Distribution of Temporal Degrees LyonSchool network (First Hour)')

# Display the histogram
plt.show()


# Make visualizations of the first hour by getting snapshots for each 15 minute time-step

In [51]:
# create a dataframe from df_sorted_network_full with only the rows where timestamp is larger than 34820 and smaller than 38420
df_sorted_network_full_15 = df_sorted_network_full[(df_sorted_network_full['timestamp'] > 31220) & (df_sorted_network_full['timestamp'] < 32120)]
df_sorted_network_full_30 = df_sorted_network_full[(df_sorted_network_full['timestamp'] > 32120) & (df_sorted_network_full['timestamp'] < 33020)]
df_sorted_network_full_45 = df_sorted_network_full[(df_sorted_network_full['timestamp'] > 33020) & (df_sorted_network_full['timestamp'] < 33920)]
df_sorted_network_full_60 = df_sorted_network_full[(df_sorted_network_full['timestamp'] > 33920) & (df_sorted_network_full['timestamp'] < 34820)]


print(len(df_sorted_network_full_15), len(df_sorted_network_full_30), len(df_sorted_network_full_45), len(df_sorted_network_full_60), len(df_sorted_network_full))

816 973 1070 1353 125773


In [52]:
# make a nx.MultiGraph for each time period
G_15 = nx.MultiGraph()
for row in df_sorted_network_full_15.iterrows():
    G_15.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])

G_30 = nx.MultiGraph()
for row in df_sorted_network_full_30.iterrows():
    G_30.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])

G_45 = nx.MultiGraph()
for row in df_sorted_network_full_45.iterrows():
    G_45.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])

G_60 = nx.MultiGraph()
for row in df_sorted_network_full_60.iterrows():
    G_60.add_edge(row[1]['node_i'], row[1]['node_j'], time=row[1]['timestamp'], dep_i=row[1]['dep_i'], dep_j=row[1]['dep_j'])


In [53]:
# Create a common layout for all graphs to maintain consistent node positions
pos = nx.spring_layout(G_firstHOUR)  # Calculate layout for the first graph and reuse it

# Plotting
fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# Plot 15-minute graph
nx.draw(G_15, pos, ax=axes[0, 0], with_labels=True, node_color='lightblue', edge_color='gray', alpha=0.7)
axes[0, 0].set_title('15 Minutes')

# Plot 30-minute graph
nx.draw(G_30, pos, ax=axes[0, 1], with_labels=True, node_color='lightgreen', edge_color='gray', alpha=0.7)
axes[0, 1].set_title('30 Minutes')

# Plot 45-minute graph
nx.draw(G_45, pos, ax=axes[1, 0], with_labels=True, node_color='lightcoral', edge_color='gray', alpha=0.7)
axes[1, 0].set_title('45 Minutes')

# Plot 60-minute graph
nx.draw(G_60, pos, ax=axes[1, 1], with_labels=True, node_color='lightsalmon', edge_color='gray', alpha=0.7)
axes[1, 1].set_title('60 Minutes')

plt.tight_layout()
plt.show()

plt.savefig('LyonSchool_15_30_45_60.png')


# Save graphs to pickle files

In [54]:
import pickle

def save_graph_to_pickle(graph, filename):
    with open(filename, 'wb') as f:
        pickle.dump(graph, f)

# Save each graph to a pickle file
save_graph_to_pickle(G_15, 'G_15.pkl')
save_graph_to_pickle(G_30, 'G_30.pkl')
save_graph_to_pickle(G_45, 'G_45.pkl')
save_graph_to_pickle(G_60, 'G_60.pkl')
save_graph_to_pickle(G_firstHOUR, 'G_firstHOUR.pkl')
save_graph_to_pickle(G, 'G_FULL.pkl')

print("Graphs saved to pickle files successfully.")


Graphs saved to pickle files successfully.
