In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')

# Create an output folder in the root of the repository
OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()))
Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from matplotlib import pyplot as plt
from src.utils.styling import hide_and_move_axis

In [None]:
class TemporalNetwork():

    def __init__(self, nodes_file='data/01_raw/city_nodes_annual_all.csv', edges_file='data/01_raw/city_edges_annual_all.csv'):

        self._nodes = pd.read_csv(nodes_file)
        self._edges = pd.read_csv(edges_file)
        self._edges['ruling_party_category'] = self._edges.PartyID.str[0]

        self._position = self._nodes[['PlaceID', 'XCOORD', 'YCOORD']].drop_duplicates().set_index('PlaceID').to_dict('index')
        self._position = {key: (value['XCOORD'], value['YCOORD']) for key, value in self._position.items()}

        self._years = np.sort(self._edges.Year.unique())
        self._places = self._nodes.PlaceID.unique()
        
        self.construct_networks()
        self.compute_hamming_distance()
        
    def construct_networks(self):
        
        print('Constructing networks...')
        self._networks = {}
        
        for year in self._years:
            edges = self._edges[self._edges.Year == year]
            edges = edges[['from', 'to']].values.tolist()
            G = nx.Graph()
            G.add_nodes_from(self._places)
            G.add_edges_from(edges)
            self._networks[year] = G

    def compute_hamming_distance(self):

        print('Computing hamming distance...')
        self._hamming_distance = np.zeros(len(self._years))
        self._hamming_distance[0] = np.nan
        
        for i, year in enumerate(self._years[1:]):
            A1 = nx.adjacency_matrix(self.G(year))
            A2 = nx.adjacency_matrix(self.G(year - 1))
            self._hamming_distance[i+1] = np.abs(A1 - A2).sum()

    def collapse(self, year0, year1, aggregation='binarize'):

        A = nx.adjacency_matrix(self.G(year0))
        for year in range(year0, year1+1):
            A += nx.adjacency_matrix(self.G(year))

        if aggregation == 'binarize':
            A = (A >= 1).astype(int)
        elif aggregate == 'always':
            A = (A == A.max()).astype(int)
        elif aggregate == 'majority':
            A = (A >= (0.5 * A.max())).astype(int)
        else:
            assert False
        
        G = nx.Graph(A)
        mapping = {i: place for i, place in enumerate(self._places)}
        G = nx.relabel_nodes(G, mapping)
        
        return G

    def get_city_names(self, node_list=[]):

        return self._nodes[self._nodes.PlaceID.isin(node_list)][['PlaceID', 'PlaceName']].drop_duplicates().reset_index(drop=True)

    
    def G(self, year):
        return self._networks[year]

    def years(self):
        return self._years

    def hamming_distance(self):
        return self._hamming_distance

    def position(self):
        return self._position

    def nodes_df(self):
        return self._nodes

In [None]:
network = TemporalNetwork()

In [None]:
f, ax = plt.subplots(figsize=(4.5, 3))
for year in network.years()[network.hamming_distance() > 400]:
    if year not in np.arange(1001, 1551, 50):
        print(year)
        ax.axvline(year, c='r')
    else:
        ax.axvline(year, c='r', ls=':', alpha=0.3)
    
ax.plot(network.years(), network.hamming_distance(), c='k')
hide_and_move_axis(ax)
ax.set_xlabel('Year')
ax.set_ylabel('Hamming distance')
plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'hamming_distance.jpg', dpi=400)

In [None]:
aggregate = 'majority'

G1 = network.collapse(1351, 1354, aggregation=aggregate)
G2 = network.collapse(1355, 1400, aggregation=aggregate)
G3 = network.collapse(1401, 1414, aggregation=aggregate)
G4 = network.collapse(1415, 1417, aggregation=aggregate)
G5 = network.collapse(1418, 1450, aggregation=aggregate)

networks = [(G1, 1351, 1354), (G2, 1355, 1400), (G3, 1401, 1414), (G4, 1415, 1417), (G5, 1418, 1450)]

f, axarr = plt.subplots(4, 4, figsize=(12, 14))

for i in range(len(networks) - 1):

    axs = axarr[i]
    
    G_1, year0_1, year1_1 = networks[i]
    G_2, year0_2, year1_2 = networks[i+1]
    
    added_edges = G_2.copy()
    added_edges.remove_edges_from(G_1.edges())

    removed_edges = G_1.copy()
    removed_edges.remove_edges_from(G_2.edges())

    for i, G in enumerate([G_1, G_2]):
        nx.draw_networkx_nodes(G, network.position(), node_size=4, node_color='k', ax=axs[i])
        nx.draw_networkx_edges(G, network.position(), alpha=0.05, ax=axs[i])
    
    nx.draw_networkx_nodes(added_edges, network.position(), node_size=4, node_color='k', ax=axs[2])
    nx.draw_networkx_edges(added_edges, network.position(), alpha=0.1, edge_color='b', ax=axs[2])
    
    nx.draw_networkx_nodes(removed_edges, network.position(), node_size=4, node_color='k', ax=axs[3])
    nx.draw_networkx_edges(removed_edges, network.position(), alpha=0.1, edge_color='r', ax=axs[3])

    axs[0].set_title(f'{year0_1} - {year1_1}')
    axs[1].set_title(f'{year0_2} - {year1_2}')
    axs[2].set_title(str(len(added_edges.edges())) + ' added edges')
    axs[3].set_title(str(len(removed_edges.edges())) + ' removed edges')

plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / f'temporal_network_changes_{aggregate}.jpg', dpi=400)

In [None]:
for i in range(len(networks) - 1):

    G_1, year0_1, year1_1 = networks[i]
    G_2, year0_2, year1_2 = networks[i+1]

    if year1_1 == 1400:
        continue

    added_edges = G_2.copy()
    added_edges.remove_edges_from(G_1.edges())

    removed_edges = G_1.copy()
    removed_edges.remove_edges_from(G_2.edges())

    for edges, y0, y1, action in ((added_edges, year0_2, year1_2, 'added'), (removed_edges, year0_1, year1_1, 'removed')):
        
        most_common = np.array(sorted(edges.degree, key=lambda x: x[1], reverse=True))
        most_common = pd.DataFrame(most_common, columns=['PlaceID', 'changed_edges'])
        print(f'Total number of edge changes from {year1_1} to {year0_2}:', most_common.changed_edges.sum() / 2)

        names = network.get_city_names(most_common.PlaceID)
        most_common = pd.merge(names, most_common, on='PlaceID')
        most_common.sort_values(by='changed_edges', ascending=False, inplace=True)
        most_common = most_common[most_common.changed_edges >= 10]

        affected_parties_df = []

        for node_id in most_common.PlaceID:
        
            to = [n for n in edges.neighbors(node_id)]
            edge = network._edges[(network._edges['from'] == node_id) & network._edges['to'].isin(to) & network._edges['Year'].between(y0, y1)]
            affected_parties = edge.sort_values(['to','Year'],ascending=False).groupby('to').head(1)['PartyID'].value_counts()
    
            result = ''
            for row in affected_parties.items():
                result += str(row[0]) + ': ' + str(row[1]) + '; '
            result = result[:-2]

            affected_parties_df.append((node_id, result))

        affected_parties_df = pd.DataFrame(affected_parties_df, columns=['PlaceID', 'affected_parties'])
        most_common = pd.merge(most_common, affected_parties_df, on='PlaceID')

        print(f'Cities with more than 10 edges {action} from {year1_1} to {year0_2}:', end='\n\n')
        print(most_common.to_markdown(index=False), end='\n\n')

# Sandbox

In [None]:

def temporal_network_analysis(edges, places, years, min_community_size=10):

    results = []
    old_G = None
    hamming_distance = np.nan
    
    for i, year in enumerate(years):
        print(year, end='\r')
        
        G = singular_network(year, edges, places)
        
        components = nx.connected_components(G)
        n_components = len(list(components))

        largest_cc = max(nx.connected_components(G), key=len)
        size_largest_component = len(largest_cc)

        communities = nx.community.louvain_communities(G)
        large_communites = sum([len(community) >= min_community_size for community in communities])

        if old_G is not None:
            hamming_distance = np.abs(nx.adjacency_matrix(G) - nx.adjacency_matrix(old_G)).sum()
        old_G = G
        
        result = [
            year,
            nx.transitivity(G),
            nx.average_clustering(G),
            2 * G.number_of_edges() / G.number_of_nodes(),
            n_components,
            size_largest_component / len(places), 
            nx.average_shortest_path_length(G.subgraph(largest_cc)),
            communities,
            hamming_distance,
            large_communites
        ]

        results.append(result)

    columns = [
        'year', 
        'transitivity', 
        'clustering', 
        'degree', 
        'nComponents', 
        'shareLargestComponent', 
        'pathlength', 
        'communities', 
        'hammingdistance', 
        'nMeaningfulCommunities'
    ]

    results = pd.DataFrame(results, columns=columns)
    results.set_index('year', inplace=True)
    
    return results 

In [None]:
RESULTS = temporal_network_analysis(places=PLACES, edges=EDGES, years=YEARS)

RESULTS['transition1'] = RESULTS.hammingdistance > (RESULTS.hammingdistance.mean() + RESULTS.hammingdistance.std())
RESULTS['transition2'] = RESULTS.shareLargestComponent.diff().abs() > 0.05
RESULTS['transition3'] = RESULTS.clustering.diff().abs() > 0.02
RESULTS['transition'] = RESULTS.transition1 | RESULTS.transition2 | RESULTS.transition3

In [None]:
PLOT_SEQUENCE = [
    'degree', 
    'transitivity', 
    'clustering', 
    'hammingdistance', 
    'nComponents', 
    'shareLargestComponent', 
    'pathlength', 
    'nMeaningfulCommunities'
]

f, axarr = plt.subplots(4, 2, figsize=(10, 8))
axarr = axarr.flatten()

print(RESULTS[RESULTS.transition].index)

for ax, measure in zip(axarr, PLOT_SEQUENCE):
    ax.plot(RESULTS[measure])
    ax.set_ylabel(measure)

for t in RESULTS[RESULTS.transition].index:
    for ax in axarr:
        if ((t-1) % 50) == 0:
            ax.axvline(t, c='k', alpha=0.5, ls=':')
        else:
            ax.axvline(t, c='r', alpha=0.5, ls=':')

plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'temporal_networks_yearly.png')

print([y for y in RESULTS[RESULTS.transition].index if ((y-1) % 50) != 0])
print([y for y in RESULTS[RESULTS.transition].index if ((y-1) % 50) == 0])