In [31]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

In [32]:
import numpy as np
import pandas as pd
import os
from os.path import join
from datetime import date, timedelta

In [33]:
def calculate_network_density(adj_matrix):
    return np.sum(adj_matrix) / (adj_matrix.shape[0] * (adj_matrix.shape[0] - 1))

In [34]:
def calculate_global_cc(adj_matrix):
    number_of_variables = adj_matrix.shape[0]

    # Assume non-changing network, thus all nodes have all other nodes as possible neighbours
    nCr = number_of_variables * (number_of_variables - 1) / 2

    c_t_vector = list()
    for node_index in range(number_of_variables):
        luk = np.array(adj_matrix[node_index, :] != 1)
        neighbor_node_indices = np.arange(number_of_variables)[luk]

        ix_luk_grid = np.ix_(neighbor_node_indices, neighbor_node_indices)
        sub_matrix = adj_matrix[ix_luk_grid]

        pairs_among_neighbors = np.sum(sub_matrix) / 2
        c_t = pairs_among_neighbors / nCr

        c_t_vector.append(c_t)
    c_t_vector = np.array(c_t_vector)

    C_t = np.sum( np.multiply(nCr, c_t_vector) ) / ( nCr * len(c_t_vector) )
    return C_t

# For normal/untreated network

In [41]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.4
is_srd = False
start_date = date.fromisoformat("2020-01-30")
end_date   = date.fromisoformat("2022-02-03")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:

    filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}", f'{current_date}.pkl')
    if is_srd:
        filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}-srd", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(filepath)

    # G = nx.from_numpy_matrix(df.to_numpy())
    # D_t = nx.function.density(G)
    D_t = calculate_network_density(df.to_numpy())
    GC_t= calculate_global_cc(df.to_numpy())

    print(end_date, D_t, GC_t)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": GC_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'../../data/processed/netstats/netstats-threshold-{threshold}.csv', index = False)

current_date=datetime.date(2020, 1, 30)
2022-02-03 0.25786382255197465 0.13159742136855393
netstats_df.shape=(1, 3)
current_date=datetime.date(2020, 1, 31)
2022-02-03 0.2744802534971791 0.1329504300204875
netstats_df.shape=(2, 3)
current_date=datetime.date(2020, 2, 1)
2022-02-03 0.2883916840559549 0.1347274932442785
netstats_df.shape=(3, 3)
current_date=datetime.date(2020, 2, 2)
2022-02-03 0.2744029677718525 0.1358411873256834
netstats_df.shape=(4, 3)
current_date=datetime.date(2020, 2, 3)
2022-02-03 0.2543859649122807 0.13563390565446773
netstats_df.shape=(5, 3)
current_date=datetime.date(2020, 2, 4)
2022-02-03 0.2514877502125357 0.13494884227339352
netstats_df.shape=(6, 3)
current_date=datetime.date(2020, 2, 5)
2022-02-03 0.25740010820001546 0.13484630309834406
netstats_df.shape=(7, 3)
current_date=datetime.date(2020, 2, 6)
2022-02-03 0.26381482340211765 0.1347596956298312
netstats_df.shape=(8, 3)
current_date=datetime.date(2020, 2, 7)
2022-02-03 0.2711569673081382 0.1349291818695823

# For SRD network

In [36]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.5
is_srd = True
start_date = date.fromisoformat("2020-01-31")
end_date   = date.fromisoformat("2022-02-03")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:

    filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}", f'{current_date}.pkl')
    if is_srd:
        filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}-srd", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(filepath)

    G = nx.from_numpy_matrix(df.to_numpy())
    D_t = nx.function.density(G)
    # C_t = nx.degree_assortativity_coefficient(G)
    # T_t = nx.transitivity(G)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": T_t,
        "assortativity": C_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'../../data/processed/netstats/netstats-threshold-{threshold}-srd.csv', index = False)

current_date=datetime.date(2020, 1, 31)
netstats_df.shape=(1, 4)
current_date=datetime.date(2020, 2, 1)
netstats_df.shape=(2, 4)
current_date=datetime.date(2020, 2, 2)
netstats_df.shape=(3, 4)
current_date=datetime.date(2020, 2, 3)
netstats_df.shape=(4, 4)
current_date=datetime.date(2020, 2, 4)
netstats_df.shape=(5, 4)
current_date=datetime.date(2020, 2, 5)
netstats_df.shape=(6, 4)
current_date=datetime.date(2020, 2, 6)
netstats_df.shape=(7, 4)
current_date=datetime.date(2020, 2, 7)
netstats_df.shape=(8, 4)
current_date=datetime.date(2020, 2, 8)
netstats_df.shape=(9, 4)
current_date=datetime.date(2020, 2, 9)
netstats_df.shape=(10, 4)
current_date=datetime.date(2020, 2, 10)
netstats_df.shape=(11, 4)
current_date=datetime.date(2020, 2, 11)
netstats_df.shape=(12, 4)
current_date=datetime.date(2020, 2, 12)
netstats_df.shape=(13, 4)
current_date=datetime.date(2020, 2, 13)
netstats_df.shape=(14, 4)
current_date=datetime.date(2020, 2, 14)
netstats_df.shape=(15, 4)
current_date=datetime.date(2