In [None]:
import sys
import os



import numpy as np
from SynthTempNetwork import Individual, SynthTempNetwork
from TemporalNetwork import ContTempNetwork, StaticTempNetwork
from FlowStability import SparseClustering, FlowIntegralClustering, run_multi_louvain, avg_norm_var_information
import parallel_clustering
import pickle

import matplotlib.pyplot as plt
import matplotlib
from matplotlib.gridspec import GridSpec
import matplotx

from scipy.sparse import (lil_matrix, dok_matrix, diags, eye, isspmatrix_csr, isspmatrix,
                          csr_matrix, coo_matrix, csc_matrix)
from scipy.sparse.linalg import eigsh

from SparseStochMat import sparse_autocov_mat

In [None]:
net_rw = ContTempNetwork.load('/home/b/skoove/Desktop/entropy/paper_data/socio_pat_primary_school/primaryschoolnet',
                              attributes_list=['node_to_label_dict',
                      'events_table',
                      'times',
                      'time_grid',
                      'num_nodes',
                      '_overlapping_events_merged',
                      'start_date',
                      'node_label_array',
                      'male_array',
                      'female_array',
                      'node_first_start_array',
                      'node_last_end_array',
                      'node_class_array',
                      'datetimes'])

In [None]:
import pandas as pd
df = pd.read_csv('../paper_data/socio_pat_primary_school/primaryschool.csv',
                 header=None, sep='\t', names=['time','id1','id2','class1','class2'])

In [None]:
df['hour'] = df['time'] // 3600
df['minute'] = (df['time'] % 3600) / 60

In [None]:
df['hour']

In [None]:
net_times_hours = net_rw.times / 3600
flag10 = np.argmax(net_times_hours > 10)
flag12 = np.argmax(net_times_hours > 12)
flag14 = np.argmax(net_times_hours > 14)
flag16 = np.argmax(net_times_hours > 16)
flagday1 = np.argmax(net_times_hours > 18)
print(flag10, flag12, flag14, flag16, flagday1)

In [None]:
net_rw.times

In [None]:
print(net_times_hours[960])
print(net_rw.times[960])

In [None]:
plt.matshow(net_rw.compute_static_adjacency_matrix(start_time= net_rw.times[0], end_time= net_rw.times[240]).toarray())
plt.matshow(net_rw.compute_static_adjacency_matrix(start_time= net_rw.times[240], end_time= net_rw.times[600]).toarray())
plt.matshow(net_rw.compute_static_adjacency_matrix(start_time= net_rw.times[600], end_time= net_rw.times[960]).toarray())
plt.colorbar()
plt.matshow(net_rw.compute_static_adjacency_matrix(start_time= net_rw.times[960], end_time= net_rw.times[1556]).toarray())

In [None]:
lunch_break = net_rw.compute_static_adjacency_matrix(start_time= net_rw.times[600], end_time= net_rw.times[960]).toarray()
lunch_break = np.where(lunch_break == 0, 1, lunch_break)
lunch_break = np.log(lunch_break)
plt.matshow(lunch_break)
plt.colorbar()
#plt.savefig('/home/b/skoove/Desktop/primary_school/a_matrix_lunch_break.png', format='png', dpi=300) 

In [None]:
import networkx as nx

graph_lunch_break = nx.Graph(lunch_break)

In [None]:
nx.number_connected_components(graph_lunch_break)

In [None]:
[len(c) for c in sorted(nx.connected_components(graph_lunch_break), key=len, reverse=True)]

# Figure Flow stability - Hourly Aggregation

In [None]:
from datetime import timedelta, datetime
start_datetime = datetime(2009,10,1,9,0,0)
end_datetime = datetime(2009,10,2,17,0,0)
daterange = pd.date_range(start=start_datetime, freq='H', 
              end=end_datetime)

timerange = [(date_i - net_rw.start_date).total_seconds() + net_rw.start_time for date_i in daterange]

adjacencies = []
time_slices = []
for ts, te in zip(timerange[:-1], timerange[1:]):
    A = net_rw.compute_static_adjacency_matrix(start_time=ts, 
                                            end_time=te).toarray()
    if not (A == np.zeros_like(A)).all():
        adjacencies.append((A))
        time_slices.append((ts,te))

In [None]:
time_slices[3][1] / 3600

In [None]:
time_slices[0][0] / 3600

In [None]:
graph_12_14 = nx.Graph(adjacencies[3] + adjacencies[4])
nx.number_connected_components(graph_12_14)

In [None]:
from FlowStability import (norm_mutual_information, 
                               Partition, static_clustering,
                               norm_var_information)
from itertools import combinations

In [None]:
num_repeat = 3

ts = np.round(np.logspace(-1,1,num=20),2)
ts = np.round(np.logspace(1,1.699,num=10),2)

#%%
static_clust_list = []

for i, A in enumerate(adjacencies):
    print(i)
    # presents, = (A.sum(1) > 0).nonzero()
    # A = A[presents,:][:,presents]
    
    static_clust_scan = dict()
    for t in ts:
        print(t)
        clusts = []
        stabs = []
        
        static_clust_scan[t] = dict()
        
        for _ in range(num_repeat):
            
            stat_clust = static_clustering(A, t=t, linearized=True)
            stat_clust.find_louvain_clustering()
    
            clusts.append(stat_clust.partition.cluster_list)
            stabs.append(stat_clust.compute_stability())
            
        static_clust_scan[t]['best_clust'] = clusts[np.argmax(stabs)]
        
        static_clust_scan[t]['nvarinf'] = np.mean([norm_var_information(c1,c2) for c1,c2 in combinations(clusts,2)])
            
        static_clust_scan[t]['avg_nclust'] = np.mean([len(c) for c in clusts])
        
    static_clust_list.append(static_clust_scan)

In [None]:
for i in ts:
    print(static_clust_list[4][i]['avg_nclust'])

# Clustering

In [None]:
lamdas_growing = np.logspace(-5,0,200)

In [None]:
multi_res_rw0_240 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot0_240/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw0_240[lamda] = cluster
avg_nclusters_forw_rw0_240 = [np.mean([len(c) for c in \
                   multi_res_rw0_240[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw0_240 = [avg_norm_var_information(multi_res_rw0_240[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw240_600 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot240_600/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw240_600[lamda] = cluster
avg_nclusters_forw_rw240_600 = [np.mean([len(c) for c in \
                   multi_res_rw240_600[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw240_600 = [avg_norm_var_information(multi_res_rw240_600[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw600_960 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot600_960/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw600_960[lamda] = cluster
avg_nclusters_forw_rw600_960 = [np.mean([len(c) for c in \
                   multi_res_rw600_960[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw600_960 = [avg_norm_var_information(multi_res_rw600_960[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw960_1320 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot960_1320/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw960_1320[lamda] = cluster
avg_nclusters_forw_rw960_1320 = [np.mean([len(c) for c in \
                   multi_res_rw960_1320[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw960_1320 = [avg_norm_var_information(multi_res_rw960_1320[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw960_1320 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot960_1320/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw960_1320[lamda] = cluster
avg_nclusters_forw_rw960_1320 = [np.mean([len(c) for c in \
                   multi_res_rw960_1320[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw960_1320 = [avg_norm_var_information(multi_res_rw960_1320[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw1320_1556 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot1320_1556/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw1320_1556[lamda] = cluster
avg_nclusters_forw_rw1320_1556 = [np.mean([len(c) for c in \
                   multi_res_rw1320_1556[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_forw_rw1320_1556 = [avg_norm_var_information(multi_res_rw1320_1556[lamda]) for lamda in lamdas_growing]

In [None]:
fig, axs = plt.subplots(3,2, sharex=False, figsize=(10, 7))

####### First Plot
color = 'tab:red'
axs[0,0].plot(lamdas_growing, NVI_forw_rw0_240, ':', color=color, label='static norm NVI')

axs[0,0].set_xscale('log')
axs[0,0].set_xlabel(r'$\lambda$ [s]')
axs[0,0].set_ylabel('Norm. Var. Inf.', color=color)
axs[0,0].tick_params(axis='y', labelcolor=color)
axs[0,0].text(-0.1,1.1, '(A): 08:30-10:00 (0-240)', transform=axs[0,0].transAxes)

ax1 = axs[0,0].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax1.plot(lamdas_growing, avg_nclusters_forw_rw0_240, ':', color=color, label='edge-centric')

ax1.set_xlabel(r'$\lambda$ [s]')
ax1.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax1.tick_params(axis='y', labelcolor=color)

####### Second Plot
color = 'tab:red'
axs[0,1].plot(lamdas_growing, NVI_forw_rw240_600, ':', color=color, label='static norm NVI')

axs[0,1].set_xscale('log')
axs[0,1].set_xlabel(r'$\lambda$ [s]')
axs[0,1].set_ylabel('Norm. Var. Inf.', color=color)
axs[0,1].tick_params(axis='y', labelcolor=color)
axs[0,1].text(-0.1,1.1, '(B): 10:00-12:00 (240-600)', transform=axs[0,1].transAxes)

ax2 = axs[0,1].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.plot(lamdas_growing, avg_nclusters_forw_rw240_600, ':', color=color, label='edge-centric')

ax2.set_xlabel(r'$\lambda$ [s]')
ax2.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax2.tick_params(axis='y', labelcolor=color)

####### Third Plot
color = 'tab:red'
axs[1,0].plot(lamdas_growing, NVI_forw_rw600_960, ':', color=color, label='static norm NVI')

axs[1,0].set_xscale('log')
axs[1,0].set_xlabel(r'$\lambda$ [s]')
axs[1,0].set_ylabel('Norm. Var. Inf.', color=color)
axs[1,0].tick_params(axis='y', labelcolor=color)
axs[1,0].text(-0.1,1.1, '(C): 12:00-14:00 (600-960)', transform=axs[1,0].transAxes)

ax3 = axs[1,0].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax3.plot(lamdas_growing, avg_nclusters_forw_rw600_960, ':', color=color, label='edge-centric')

ax3.set_xlabel(r'$\lambda$ [s]')
ax3.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax3.tick_params(axis='y', labelcolor=color)

####### Fourth Plot
color = 'tab:red'
axs[1,1].plot(lamdas_growing, NVI_forw_rw960_1320, ':', color=color, label='static norm NVI')

axs[1,1].set_xscale('log')
axs[1,1].set_xlabel(r'$\lambda$ [s]')
axs[1,1].set_ylabel('Norm. Var. Inf.', color=color)
axs[1,1].tick_params(axis='y', labelcolor=color)
axs[1,1].text(-0.1,1.1, '(D):14:00-16:00 (960-1320)', transform=axs[1,1].transAxes)

ax4 = axs[1,1].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax4.plot(lamdas_growing, avg_nclusters_forw_rw960_1320, ':', color=color, label='edge-centric')

ax4.set_xlabel(r'$\lambda$ [s]')
ax4.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax4.tick_params(axis='y', labelcolor=color)

####### Fifth Plot
color = 'tab:red'
axs[2,0].plot(lamdas_growing, NVI_forw_rw1320_1556, ':', color=color, label='static norm NVI')

axs[2,0].set_xscale('log')
axs[2,0].set_xlabel(r'$\lambda$ [s]')
axs[2,0].set_ylabel('Norm. Var. Inf.', color=color)
axs[2,0].tick_params(axis='y', labelcolor=color)
axs[2,0].text(-0.1,1.1, '(E):16:00-17:30 (1320-1556)', transform=axs[2,0].transAxes)

ax5 = axs[2,0].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax5.plot(lamdas_growing, avg_nclusters_forw_rw1320_1556, ':', color=color, label='edge-centric')

ax5.set_xlabel(r'$\lambda$ [s]')
ax5.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax5.tick_params(axis='y', labelcolor=color)



fig.suptitle("Forward Clustering Day 1", fontsize=12)
fig.tight_layout()  # otherwise the right y-label is slightly clipped
fig.subplots_adjust(top=0.88) # # Tight layout requires the title to be spaced accordingly
#plt.savefig('/home/b/skoove/Desktop/primary_school/forward_clustering_split.png', format='png', dpi=300)

In [None]:
print(avg_nclusters_forw_rw0_240)
print(avg_nclusters_forw_rw240_600)
print(avg_nclusters_forw_rw600_960)
print(avg_nclusters_forw_rw960_1320)
print(avg_nclusters_forw_rw1320_1556)

# BACKWARD CLUSTERING

In [None]:
multi_res_rw_bw0_240 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot_bw0_240/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw_bw0_240[lamda] = cluster
avg_nclusters_bw_rw0_240 = [np.mean([len(c) for c in \
                   multi_res_rw_bw0_240[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_bw_rw0_240 = [avg_norm_var_information(multi_res_rw_bw0_240[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw_bw0_240

In [None]:
multi_res_rw_bw240_600 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot_bw240_600/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw_bw240_600[lamda] = cluster
avg_nclusters_bw_rw240_600 = [np.mean([len(c) for c in \
                   multi_res_rw_bw240_600[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_bw_rw240_600 = [avg_norm_var_information(multi_res_rw_bw240_600[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw_bw600_960 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot_bw600_960/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw_bw600_960[lamda] = cluster
avg_nclusters_bw_rw600_960 = [np.mean([len(c) for c in \
                   multi_res_rw_bw600_960[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_bw_rw600_960 = [avg_norm_var_information(multi_res_rw_bw600_960[lamda]) for lamda in lamdas_growing]

In [None]:
multi_res_rw_bw960_1556 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot_bw960_1556/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)
        
    multi_res_rw_bw960_1556[lamda] = cluster
avg_nclusters_bw_rw960_1556 = [np.mean([len(c) for c in \
                   multi_res_rw_bw960_1556[lamda] if len(c)>1]) for lamda in lamdas_growing]

NVI_bw_rw960_1556 = [avg_norm_var_information(multi_res_rw_bw960_1556[lamda]) for lamda in lamdas_growing]

In [None]:
fig, axs = plt.subplots(2,2, sharex=False, figsize=(10, 5))

####### First Plot
color = 'tab:red'
axs[0,0].plot(lamdas_growing, NVI_bw_rw0_240, ':', color=color, label='static norm NVI')

axs[0,0].set_xscale('log')
axs[0,0].set_xlabel(r'$\lambda$ [s]')
axs[0,0].set_ylabel('Norm. Var. Inf.', color=color)
axs[0,0].tick_params(axis='y', labelcolor=color)
axs[0,0].text(-0.1,1.1, '(A): 08:30-10:00 (0-240)', transform=axs[0,0].transAxes)

ax1 = axs[0,0].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax1.plot(lamdas_growing, avg_nclusters_bw_rw0_240, ':', color=color, label='edge-centric')

ax1.set_xlabel(r'$\lambda$ [s]')
ax1.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax1.tick_params(axis='y', labelcolor=color)

####### Second Plot
color = 'tab:red'
axs[0,1].plot(lamdas_growing, NVI_bw_rw240_600, ':', color=color, label='static norm NVI')

axs[0,1].set_xscale('log')
axs[0,1].set_xlabel(r'$\lambda$ [s]')
axs[0,1].set_ylabel('Norm. Var. Inf.', color=color)
axs[0,1].tick_params(axis='y', labelcolor=color)
axs[0,1].text(-0.1,1.1, '(B): 10:00-12:00 (240-600)', transform=axs[0,1].transAxes)

ax2 = axs[0,1].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.plot(lamdas_growing, avg_nclusters_bw_rw240_600, ':', color=color, label='edge-centric')

ax2.set_xlabel(r'$\lambda$ [s]')
ax2.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax2.tick_params(axis='y', labelcolor=color)

####### Third Plot
color = 'tab:red'
axs[1,0].plot(lamdas_growing, NVI_bw_rw600_960, ':', color=color, label='static norm NVI')

axs[1,0].set_xscale('log')
axs[1,0].set_xlabel(r'$\lambda$ [s]')
axs[1,0].set_ylabel('Norm. Var. Inf.', color=color)
axs[1,0].tick_params(axis='y', labelcolor=color)
axs[1,0].text(-0.1,1.1, '(C): 12:00-14:00 (600-960)', transform=axs[1,0].transAxes)

ax3 = axs[1,0].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax3.plot(lamdas_growing, avg_nclusters_bw_rw600_960, ':', color=color, label='edge-centric')

ax3.set_xlabel(r'$\lambda$ [s]')
ax3.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax3.tick_params(axis='y', labelcolor=color)

####### Fourth Plot
color = 'tab:red'
axs[1,1].plot(lamdas_growing, NVI_bw_rw960_1556, ':', color=color, label='static norm NVI')

axs[1,1].set_xscale('log')
axs[1,1].set_xlabel(r'$\lambda$ [s]')
axs[1,1].set_ylabel('Norm. Var. Inf.', color=color)
axs[1,1].tick_params(axis='y', labelcolor=color)
axs[1,1].text(-0.1,1.1, '(D):14:00-17:30 (960-1556)', transform=axs[1,1].transAxes)

ax4 = axs[1,1].twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax4.plot(lamdas_growing, avg_nclusters_bw_rw960_1556, ':', color=color, label='edge-centric')

ax4.set_xlabel(r'$\lambda$ [s]')
ax4.set_ylabel('Avg. no. clusters', color=color)  # we already handled the x-label with ax1
ax4.tick_params(axis='y', labelcolor=color)



fig.suptitle("Backward Clustering Day 1", fontsize=12)
fig.tight_layout()  # otherwise the right y-label is slightly clipped
fig.subplots_adjust(top=0.88) # # Tight layout requires the title to be spaced accordingly
#plt.savefig('/home/b/skoove/Desktop/primary_school/forward_clustering_split.png', format='png', dpi=300)

In [None]:
multi_res_rw0_240 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot0_240/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)

    multi_res_rw0_240[lamda] = cluster


multi_res_rw_bw0_240 = {}
for lamda in lamdas_growing:
    with open(f'//scratch/tmp/180/skoove/primaryschoolnet_rw/clustersplot_bw0_240/cluster{lamda:.11f}', 'rb') as f:
        cluster = pickle.load(f)

    multi_res_rw_bw0_240[lamda] = cluster

In [None]:
def sort_clusters(cluster_list_to_sort, cluster_list_model):
        
    clust_similarity_lists = []
    for clust in cluster_list_to_sort:
        jaccs = []
        for class_clust in cluster_list_model:
            jaccs.append(len(clust.intersection(class_clust))/len(clust.union(class_clust)))    
        clust_similarity_lists.append(jaccs)
        
    #now sort
    clust_similarity_matrix = np.array(clust_similarity_lists)
    new_clust_order = []
    all_clusts = list(range(clust_similarity_matrix.shape[0]))
    
    while len(new_clust_order) < len(cluster_list_to_sort):
        for cla in range(clust_similarity_matrix.shape[1]):
            # loop on classes and sort according to most similar
            comm = clust_similarity_matrix[all_clusts,cla].argmax()
            if all_clusts[comm] not in new_clust_order:
                new_clust_order.append(all_clusts[comm])
        for n in new_clust_order:
            if n in all_clusts:
                all_clusts.remove(n)
                
    return [cluster_list_to_sort[i] for i in new_clust_order]

In [None]:
##all data
int_start = 0
int_stop = 1

#%% load forward and backward partitions

tau_w = lamdas_growing[100]

color_list =["#ffffff",
"#4ba706",
"#a2007e",
"#806dcb",
"#5eb275",
"#ca3b01",
"#01a4d6",
"#b77600",
"#a39643",
"#cc6ea9",
"#1e5e39",
"#cb5b5a"]

from matplotlib.colors import ListedColormap
cmap = ListedColormap(color_list)

# find active nodes during this time:
active_nodes = set(net_rw.events_table.loc[np.logical_and(\
               net_rw.events_table.starting_times >= time_slices[int_start][0], 
               net_rw.events_table.ending_times < time_slices[int_stop][1])].source_nodes.tolist())
    
active_nodes.update(net_rw.events_table.loc[np.logical_and(\
               net_rw.events_table.starting_times >= time_slices[int_start][0], 
               net_rw.events_table.ending_times < time_slices[int_stop][1])].target_nodes.tolist())

clustres = multi_res_rw0_240[tau_w]
clustres_rev = multi_res_rw240_600[tau_w]

In [None]:
# #%% plot node_class vs communities plot

# sortorder = np.argsort(net_rw.node_class_array)
# sorted_class_array = net_rw.node_class_array[sortorder]


# sorted_active_nodes = [n for n in sortorder if n in active_nodes]
# sorted_active_nodes_class = net_rw.node_class_array[sorted_active_nodes]
# unique_class_array = np.unique(sorted_active_nodes_class)

# #class sizes taking into account only active nodes
# classes_size = {cla : (sorted_active_nodes_class == cla).sum() for cla in unique_class_array}
# classes_size_list = [(sorted_active_nodes_class == cla).sum() for cla in unique_class_array]
# assert sum(classes_size_list) == len(active_nodes)

# yticks = np.cumsum(classes_size_list) - classes_size_list[0]//2
# yticks_labels = np.unique(sorted_class_array)

# class_2_number ={cla : i+1 for i, cla in enumerate(unique_class_array)}

# # sort the two partition in the same way = similuraty with class partition
# class_cluster_list = []
# for clas in unique_class_array:
#     class_cluster_list.append(set([n for n in active_nodes if net_rw.node_class_array[n] == clas]))

# # partitions with only active nodes
# best_part = Partition(len(active_nodes), [clust.intersection(active_nodes) for \
#                                       clust in clustres[0]])
# best_part = Partition(len(active_nodes), sort_clusters(best_part.cluster_list, class_cluster_list))
    
# best_partrev = Partition(len(active_nodes), [clust.intersection(active_nodes) for \
#                                       clust in clustres_rev[0]])
# best_partrev = Partition(len(active_nodes), sort_clusters(best_partrev.cluster_list, class_cluster_list))



# node2comm = np.zeros((len(active_nodes), best_part.get_num_clusters()))
# for i,n in enumerate(sorted_active_nodes):
#     node2comm[i,best_part.node_to_cluster_dict[n]] = class_2_number[sorted_active_nodes_class[i]]


# node2comm_rev = np.zeros((len(active_nodes), best_partrev.get_num_clusters()))
# for  i,n in enumerate(sorted_active_nodes):
#     node2comm_rev[i,best_partrev.node_to_cluster_dict[n]] = class_2_number[sorted_active_nodes_class[i]]


# fig, (ax1,ax2) = plt.subplots(1,2, figsize=(13.5,6), gridspec_kw={'top':0.9, 'wspace':0.25})

# ax1.imshow(node2comm, aspect='auto', cmap=cmap)

# ax1.set_yticks(yticks)
# ax1.set_yticklabels(yticks_labels)
# ax1.set_ylim((len(active_nodes),0))
# ax1.set_xticks(range(best_part.get_num_clusters()))

# ax1.set_xlabel('communities')
# ax1.set_ylabel('nodes')

# ax2.imshow(node2comm_rev, aspect='auto', cmap=cmap)

# ax2.set_yticks(yticks)
# ax2.set_yticklabels(yticks_labels)
# ax2.set_ylim((len(active_nodes),0))
# ax2.set_xticks(range(best_partrev.get_num_clusters()))

# ax2.set_xlabel('communities')

In [None]:
index_best0_240 = np.min(NVI_forw_rw0_240)
index_best240_600 = np.min(NVI_forw_rw240_600)
index_best600_960 = np.min(NVI_forw_rw600_960)
index_best960_1320 = np.min(NVI_forw_rw960_1320)
index_best1320_1556 = np.min(NVI_forw_rw1320_1556)

In [None]:
print(NVI_forw_rw0_240 == index_best0_240)
print(lamdas_growing[NVI_forw_rw0_240 == index_best0_240])

In [None]:
print(lamdas_growing[170])

In [None]:
print(NVI_forw_rw240_600 == index_best240_600)
print(lamdas_growing[NVI_forw_rw240_600 == index_best240_600])

In [None]:
lamdas_growing[135]

In [None]:
print(NVI_forw_rw600_960 == index_best600_960)
print(lamdas_growing[NVI_forw_rw600_960 == index_best600_960])

In [None]:
lamdas_growing[150]

In [None]:
print(NVI_forw_rw960_1320 == index_best960_1320)
print(lamdas_growing[NVI_forw_rw960_1320 == index_best960_1320])

In [None]:
lamdas_growing[125]

In [None]:
print(NVI_forw_rw1320_1556 == index_best1320_1556)
print(lamdas_growing[NVI_forw_rw1320_1556 == index_best1320_1556])

In [None]:
lamdas_growing[90]

# Forward 240_600 Alluvial Diagram

In [None]:

bestcluster0 = multi_res_rw240_600[lamdas_growing[79]][0]
bestcluster1 = multi_res_rw240_600[lamdas_growing[92]][0]
bestcluster2 = multi_res_rw240_600[lamdas_growing[150]][0]
bestcluster3 = multi_res_rw240_600[lamdas_growing[135]][0]

bestclusters = [bestcluster0, bestcluster1, bestcluster2, bestcluster3]

#%% make data for flow diagram

source_comms = bestcluster0
target_comms = bestcluster1
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows0 = pd.DataFrame.from_dict(flows)

#%% make data for flow diagram

source_comms = bestcluster1
target_comms = bestcluster2
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows1 = pd.DataFrame.from_dict(flows)

#%% make data for flow diagram

source_comms = bestcluster2
target_comms = bestcluster3
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows2 = pd.DataFrame.from_dict(flows)


In [None]:
df_flows0['target_label'] = df_flows0['target'] + int(np.max(df_flows0['source']) + 1)

df_flows1['source'] = df_flows1['source'] + int(np.max(df_flows0['source']) + 1)
df_flows1['target_label'] = df_flows1['target'] + int(np.max(df_flows1['source']) + 1)

df_flows2['source'] = df_flows2['source'] + int(np.max(df_flows1['source']) + 1)
df_flows2['target_label'] = df_flows2['target'] + int(np.max(df_flows2['source']) + 1)

df_flows = pd.concat([df_flows0, df_flows1, df_flows2], ignore_index=True)

In [None]:
import plotly.graph_objects as go

# Sample data
data = {
    'source': list(df_flows['source']),
    'target': list(df_flows['target_label']),
    'value': list(df_flows['value'])
}

# Create DataFrame
df = pd.DataFrame(data)

# Define a mapping of old labels to new labels
label_mapping = {}
for i in range(df_flows.shape[0]):
    label_mapping[df_flows['source'][i]] = df_flows['source'][i]
    label_mapping[df_flows['target_label'][i]] = df_flows['target'][i]

# Create lists of unique source and target labels
all_nodes = list(pd.concat([df_flows['source'], df_flows['target_label']]).unique())

# Apply the label mapping to the node list
all_nodes_renamed = [label_mapping[node] for node in all_nodes]

# Create mapping of nodes to indices
node_map = {node: idx for idx, node in enumerate(all_nodes)}

# Map source and target nodes to their indices
df['source_id'] = df['source'].map(node_map)
df['target_id'] = df['target'].map(node_map)

# Define colors for each link
color_list =["#4ba706",
"#a2007e",
"#806dcb",
"#5eb275",
"#ca3b01",
"#01a4d6",
"#b77600",
"#a39643",
"#cc6ea9",
"#1e5e39",
"#cb5b5a"]

dict_color = {}
for i, df_type in enumerate(df_flows['type'].unique()):
    dict_color[df_type] = color_list[i]


link_color = [dict_color[i] for i in df_flows['type']]

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=10,
        thickness=1,
        line=dict(color="black", width=0.5),
        #label=all_nodes_renamed 
    ),
    link=dict(
        source=df['source_id'],
        target=df['target_id'],
        value=df['value'],
        color=link_color,
        customdata=df_flows['type'],
        #hovertemplate='Source: %{source.label}<br>Target: %{target.label}<br>Value: %{value}<br>Label: %{customdata}<extra></extra>'
        hovertemplate='Value: %{value}<br> %{customdata}<extra></extra>'
    )
)])

#fig.update_layout(title_text="Sankey Diagram", font_size=10)
fig.show()

# Froward Alluvial Diagram

In [None]:
bestcluster0_240 = multi_res_rw0_240[lamdas_growing[170]][0]
bestcluster240_600 = multi_res_rw240_600[lamdas_growing[135]][0]
bestcluster600_960 = multi_res_rw600_960[lamdas_growing[180]][0]
bestcluster960_1320 = multi_res_rw960_1320[lamdas_growing[125]][0]
bestcluster1320_1556 = multi_res_rw1320_1556[lamdas_growing[90]][0]
bestclusters = [bestcluster0_240, bestcluster240_600, bestcluster600_960, bestcluster960_1320, bestcluster1320_1556]

#%% make data for flow diagram

source_comms = bestcluster0_240
target_comms = bestcluster240_600
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows0_600 = pd.DataFrame.from_dict(flows)

#%% make data for flow diagram

source_comms = bestcluster240_600
target_comms = bestcluster600_960
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows240_960 = pd.DataFrame.from_dict(flows)
 
#%% make data for flow diagram

source_comms = bestcluster600_960
target_comms = bestcluster960_1320
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows600_1320 = pd.DataFrame.from_dict(flows)

#%% make data for flow diagram

source_comms = bestcluster960_1320
target_comms = bestcluster1320_1556
class_dict = {clas : set(net_rw.node_array[net_rw.node_class_array == clas]) for \
                  clas in np.unique(net_rw.node_class_array)}

flows = []
for clas, clas_set in class_dict.items():
    for s, comm_s in enumerate(source_comms):
        for t, comm_t in enumerate(target_comms):
            val = len(clas_set.intersection(comm_s).intersection(comm_t))
            if val > 0:
                flows.append({'source': s, 'target': t, 'type': clas, 'value': val})

df_flows960_1556 = pd.DataFrame.from_dict(flows)


In [None]:
df_flows0_600['target_label'] = df_flows0_600['target'] + int(np.max(df_flows0_600['source']) + 1)

df_flows240_960['source'] = df_flows240_960['source'] + int(np.max(df_flows0_600['source']) + 1)
df_flows240_960['target_label'] = df_flows240_960['target'] + int(np.max(df_flows240_960['source']) + 1)

df_flows600_1320['source'] = df_flows600_1320['source'] + int(np.max(df_flows240_960['source']) + 1)
df_flows600_1320['target_label'] = df_flows600_1320['target'] + int(np.max(df_flows600_1320['source']) + 1)

df_flows960_1556['source'] = df_flows960_1556['source'] + int(np.max(df_flows600_1320['source']) + 1)
df_flows960_1556['target_label'] = df_flows960_1556['target'] + int(np.max(df_flows960_1556['source']) + 1)

df_flows = pd.concat([df_flows0_600, df_flows240_960, df_flows600_1320, df_flows960_1556], ignore_index=True)

In [None]:
import plotly.graph_objects as go

# Sample data

list_value = df_flows['value'].copy()
list_value[df_flows['value'] == 1] = 0.01

data = {
    'source': list(df_flows['source']),
    'target': list(df_flows['target_label']),
    'value': list_value
}

# Create DataFrame
df = pd.DataFrame(data)

# Define a mapping of old labels to new labels
label_mapping = {}
for i in range(df_flows.shape[0]):
    label_mapping[df_flows['source'][i]] = df_flows['source'][i]
    label_mapping[df_flows['target_label'][i]] = df_flows['target'][i]

# Create lists of unique source and target labels
all_nodes = list(pd.concat([df_flows['source'], df_flows['target_label']]).unique())

# Apply the label mapping to the node list
all_nodes_renamed = [label_mapping[node] for node in all_nodes]

# Create mapping of nodes to indices
node_map = {node: idx for idx, node in enumerate(all_nodes)}

# Map source and target nodes to their indices
df['source_id'] = df['source'].map(node_map)
df['target_id'] = df['target'].map(node_map)

# Define colors for each link
color_list =["#4ba706",
"#a2007e",
"#806dcb",
"#5eb275",
"#ca3b01",
"#01a4d6",
"#b77600",
"#a39643",
"#cc6ea9",
"#1e5e39",
"#cb5b5a"]

dict_color = {}
for i, df_type in enumerate(df_flows['type'].unique()):
    dict_color[df_type] = color_list[i]


link_color = [dict_color[i] for i in df_flows['type']]

# Create the Sankey diagram
sankey = go.Sankey(
    node=dict(
        pad=10,
        thickness=10,
        line=dict(color="black", width=0.5),
        #label=all_nodes_renamed 
    ),
    link=dict(
        source=df['source_id'],
        target=df['target_id'],
        value=df['value'],
        color=link_color,
        customdata=df_flows['type'],
        #hovertemplate='Source: %{source.label}<br>Target: %{target.label}<br>Value: %{value}<br>Label: %{customdata}<extra></extra>'
        hovertemplate='Value: %{value}<br> %{customdata}<extra></extra>'
    )
)



# Create dummy scatter traces for the legend

seen = set()
unique_link_color = [x for x in link_color if not (x in seen or seen.add(x))]

legend_entries = []
for color, label in zip(unique_link_color, df_flows['type'].unique()):
    legend_entries.append(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        marker=dict(size=8, color=color),
        legendgroup=label,
        showlegend=True,
        name=label
    ))

# Combine Sankey diagram and legend entries
fig = go.Figure(data=[sankey] + legend_entries)

fig.update_layout(
    #title_text="Sankey Diagram with Custom Legend",
    font_size=10,
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    plot_bgcolor='rgba(255,255,255,1)',
    paper_bgcolor='rgba(255,255,255,1)',
    showlegend=True,
    legend=dict(
        font=dict(size=10),  # Adjust the font size of the legend
        itemwidth=30,        # Adjust the width of the legend items
        itemsizing='constant',
        traceorder='normal',
        orientation='h',     # Arrange legend items horizontally
        yanchor='bottom',
        y=-0.2,              # Position legend below the plot
        xanchor='center',
        x=0.5
    ),
    margin=dict(l=10, r=10, t=50, b=50)  # Adjust margins to fit the legend
)

fig.show()

In [None]:
from sankeyflow import Sankey

# Sample data
list_value = df_flows['value'].copy()
list_value[df_flows['value'] == 1] = 1

# Define the nodes and flows
nodes = [
    [(node, df_flows0_600[df_flows0_600['source'] == node]['value'].sum(), dict(color="black")) for node in df_flows0_600['source'].unique()],
    [(node, df_flows240_960[df_flows240_960['source'] == node]['value'].sum(), dict(color="black")) for node in df_flows240_960['source'].unique()],
    [(node, df_flows600_1320[df_flows600_1320['source'] == node]['value'].sum(), dict(color="black")) for node in df_flows600_1320['source'].unique()],
    [(node, df_flows960_1556[df_flows960_1556['source'] == node]['value'].sum(), dict(color="black")) for node in df_flows960_1556['source'].unique()],
    [(node, df_flows960_1556[df_flows960_1556['target_label'] == node]['value'].sum(), dict(color="black")) for node in df_flows960_1556['target_label'].unique()],
    #[(label_mapping[node], df_flows[df_flows['target'] == node]['value'].sum()) for node in df_flows['target_label'].unique()]
]


# Define colors for each link
color_list = [
    "#4ba706", "#a2007e", "#806dcb", "#5eb275", "#ca3b01",
    "#01a4d6", "#b77600", "#a39643", "#cc6ea9", "#1e5e39", "#cb5b5a"
]

dict_color = {df_type: color_list[i] for i, df_type in enumerate(df_flows['type'].unique())}
link_colors = [dict_color[df_flows['type'][i]] for i in range(df_flows.shape[0])]

flows = [(df_flows['source'][index], df_flows['target_label'][index], df_flows['value'][index], {'color': link_colors[index]}) for index in df_flows.index]





# Create the Sankey diagram
plt.figure(figsize=(10, 6))
s = Sankey(flows=flows, nodes=nodes, node_opts=dict(label_format=''))

# Draw the Sankey diagram
s.draw()

# Create legend
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=dict_color[df_type], markersize=10)
           for df_type in df_flows['type'].unique()]

plt.legend(handles, df_flows['type'].unique(), title='Flow Types', loc='center left', bbox_to_anchor=(1, 0.5))

# Save the figure as a PNG file
#plt.savefig("sankey_diagram_with_legend_sankeyflow.png", bbox_inches='tight')

# Optionally save as a different format (e.g., PDF, SVG)
# plt.savefig("sankey_diagram_with_legend_sankeyflow.pdf", bbox_inches='tight')
#plt.savefig("/home/b/skoove/Desktop/primary_school/sankey_diagram_with_legend_sankeyflow.png", bbox_inches='tight')

# Show the plot
plt.show()