In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')

# Create an output folder in the root of the repository
OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()))
Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from matplotlib import pyplot as plt

In [None]:
NODES = pd.read_csv('data/01_raw/city_nodes_annual_all.csv')
EDGES = pd.read_csv('data/01_raw/city_edges_annual_all.csv')

POSITION = NODES[['PlaceID', 'XCOORD', 'YCOORD']].drop_duplicates().set_index('PlaceID').to_dict('index')
POSITION = {key: (value['XCOORD'], value['YCOORD']) for key, value in POSITION.items()}

YEARS = np.sort(EDGES.Year.unique())
PLACES = NODES.PlaceID.unique()

In [None]:
def singular_network(year, edges, places):
    
    edges = edges[edges.Year == year]
    edges = edges[['from', 'to']].values.tolist()
    
    G = nx.Graph()
    G.add_nodes_from(places)
    G.add_edges_from(edges)
    
    return G

def temporal_network_analysis(edges, places, years, min_community_size=10):

    results = []
    old_G = None
    hamming_distance = np.nan
    
    for i, year in enumerate(years):
        print(year, end='\r')
        
        G = singular_network(year, edges, places)
        
        components = nx.connected_components(G)
        n_components = len(list(components))

        largest_cc = max(nx.connected_components(G), key=len)
        size_largest_component = len(largest_cc)

        communities = nx.community.louvain_communities(G)
        large_communites = sum([len(community) >= min_community_size for community in communities])

        if old_G is not None:
            hamming_distance = np.abs(nx.adjacency_matrix(G) - nx.adjacency_matrix(old_G)).sum()
        old_G = G
        
        result = [
            year,
            nx.transitivity(G),
            nx.average_clustering(G),
            2 * G.number_of_edges() / G.number_of_nodes(),
            n_components,
            size_largest_component / len(places), 
            nx.average_shortest_path_length(G.subgraph(largest_cc)),
            communities,
            hamming_distance,
            large_communites
        ]

        results.append(result)

    columns = [
        'year', 
        'transitivity', 
        'clustering', 
        'degree', 
        'nComponents', 
        'shareLargestComponent', 
        'pathlength', 
        'communities', 
        'hammingdistance', 
        'nMeaningfulCommunities'
    ]

    results = pd.DataFrame(results, columns=columns)
    results.set_index('year', inplace=True)
    
    return results 

In [None]:
RESULTS = temporal_network_analysis(places=PLACES, edges=EDGES, years=YEARS)

RESULTS['transition1'] = RESULTS.hammingdistance > (RESULTS.hammingdistance.mean() + RESULTS.hammingdistance.std())
RESULTS['transition2'] = RESULTS.shareLargestComponent.diff().abs() > 0.05
RESULTS['transition3'] = RESULTS.clustering.diff().abs() > 0.02
RESULTS['transition'] = RESULTS.transition1 | RESULTS.transition2 | RESULTS.transition3

In [None]:
PLOT_SEQUENCE = [
    'degree', 
    'transitivity', 
    'clustering', 
    'hammingdistance', 
    'nComponents', 
    'shareLargestComponent', 
    'pathlength', 
    'nMeaningfulCommunities'
]

f, axarr = plt.subplots(4, 2, figsize=(10, 8))
axarr = axarr.flatten()

print(RESULTS[RESULTS.transition].index)

for ax, measure in zip(axarr, PLOT_SEQUENCE):
    ax.plot(RESULTS[measure])
    ax.set_ylabel(measure)

for t in RESULTS[RESULTS.transition].index:
    for ax in axarr:
        if ((t-1) % 50) == 0:
            ax.axvline(t, c='k', alpha=0.5, ls=':')
        else:
            ax.axvline(t, c='r', alpha=0.5, ls=':')

plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'temporal_networks_yearly.png')

print([y for y in RESULTS[RESULTS.transition].index if ((y-1) % 50) != 0])
print([y for y in RESULTS[RESULTS.transition].index if ((y-1) % 50) == 0])