In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')

# Create an output folder in the root of the repository
OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()))
Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
from matplotlib import pyplot as plt
import networkx as nx
import pandas as pd
from src.utils.styling import hide_and_move_axis
import numpy as np
from itertools import product, permutations

In [None]:
class WeightedNetwork():

    def __init__(self, nodes_file='data/01_raw/city_nodes_annual_all.csv', edges_file='data/01_raw/city_edges_annual_all.csv'):

        self._nodes = pd.read_csv(nodes_file)
        self._edges = pd.read_csv(edges_file)
        self._edges['ruling_party_category'] = self._edges.PartyID.str[0]

        self._position = self._nodes[['PlaceID', 'XCOORD', 'YCOORD']].drop_duplicates().set_index('PlaceID').to_dict('index')
        self._position = {key: (value['XCOORD'], value['YCOORD']) for key, value in self._position.items()}

        self.construct_network()
        self.compute_communities()
        
    
    def construct_network(self, normalize=True):

        edge_weights = self._edges.groupby(['from', 'to']).Year.count().reset_index().rename(columns={'Year': 'weight'})

        if normalize:
            edge_weights.weight /= edge_weights.weight.max()

        edge_weights = edge_weights.values.tolist()
        edge_weights = [[int(e[0]), int(e[1]), e[2]] for e in edge_weights]
    
        G = nx.Graph()
        G.add_weighted_edges_from(edge_weights)
    
        self._G = G

    
    def compute_communities(self, max_communities=10, seed=0):

        comms = nx.community.louvain_communities(self._G, seed=seed)
        T = np.sort([len(c) for c in comms])[-max_communities]
        comms = [c for c in comms if len(c) >= T]
        assert len(comms) <= max_communities

        self._comms = comms
        

    def nodes(self):

        return self._nodes

    def edges(self):

        return self._edges

    def position(self):

        return self._position

    def G(self):

        return self._G

    def communities(self):

        return self._comms


def get_standard_colors():
    return plt.rcParams['axes.prop_cycle'].by_key()['color']


def draw_community_structure(network):

    colors = get_standard_colors()

    f, ax = plt.subplots()

    nx.draw_networkx_nodes(network.G(), network.position(), node_size=12, node_color='k')
    nx.draw_networkx_edges(network.G(), network.position(), alpha=0.025)

    for i, comm in enumerate(network.communities()):
        nx.draw_networkx_nodes(network.G(), network.position(), nodelist=comm, node_size=15, node_color=colors[i])

    plt.tight_layout()
    plt.savefig(OUTPUT_FOLDER / 'communities.jpg', dpi=400)


def plot_community_characteristics(network, aggregate=True):

    if aggregate:
        fig, axarr = plt.subplots(2, 5, sharey=True, sharex=True, figsize=(8, 4))
        output_file = 'governing_parties.jpg'
    else:
        fig, axarr = plt.subplots(2, 5, sharey=True, figsize=(10, 4))
        output_file = 'governing_parties_distinct.jpg'

    flatax = axarr.flatten()
    colors = get_standard_colors()

    communities = network.communities()
    edges = network.edges()
    
    for i, community in enumerate(communities):

        in_edges = edges[edges['from'].isin(community) & edges['to'].isin(community)]
        
        if aggregate:
            ruling_parties = in_edges.PartyID.str[0]
            hist = ruling_parties.value_counts()
        else:
            hist = in_edges.PartyID.value_counts()
        
        hist.sort_index(inplace=True)

        x, y = hist.index, hist.values        
        y = y / y.sum()

        flatax[i].bar(x, y, color=colors[i])
        hide_and_move_axis(flatax[i])
        
        if not aggregate:
            flatax[i].set_xticks(x, x, rotation='vertical', size=6)
    
    axarr[1, 2].set_xlabel('Governing party')
    axarr[0, 0].set_ylabel('Relative frequency')
    axarr[1, 0].set_ylabel('Relative frequency')

    plt.tight_layout()
    
    plt.savefig(OUTPUT_FOLDER / output_file, dpi=400)


def plot_temporal_evolution_of_governance(network):

    fig, axarr = plt.subplots(2, 5, sharey=True, sharex=True, figsize=(9, 4))
    flatax = axarr.flatten()
    colors = get_standard_colors()
    
    communities = network.communities()
    edges = network.edges()
        
    for i, community in enumerate(communities):
    
        in_edges = edges[edges['from'].isin(community) & edges['to'].isin(community)]
        timeseries = in_edges.groupby('Year').ruling_party_category.value_counts().unstack().fillna(0)
    
        if 'R' not in timeseries.columns:
            timeseries['R'] = 0
        if 'S' not in timeseries.columns:
            timeseries['S'] = 0
    
        norm = timeseries.S + timeseries.R
        
        flatax[i].plot(timeseries.index, timeseries.S / norm , label='S', c=colors[i])
        flatax[i].plot(timeseries.index, timeseries.R / norm, label='R', c=colors[i], ls='--')
        hide_and_move_axis(flatax[i])
            
    flatax[i].legend()
    axarr[1, 2].set_xlabel('Year')
    axarr[0, 0].set_ylabel('Relative frequency')
    axarr[1, 0].set_ylabel('Relative frequency')
    
    plt.tight_layout()

    plt.savefig(OUTPUT_FOLDER / 'timeseries_governance.jpg', dpi=400)

In [None]:
network = WeightedNetwork()
draw_community_structure(network)
plot_community_characteristics(network)
plot_community_characteristics(network, aggregate=False)
plot_temporal_evolution_of_governance(network)

# Sandbox

In [None]:
def plot_edge_weight_distribution(network):

    weights = [network.get_edge_data(u, v)['weight'] for u, v in G.edges()]

    f, ax = plt.subplots()

    ax.hist(weights, bins=np.arange(0, 1, 0.05), width=0.04)
    ax.set_xlabel('Edge weight')
    ax.set_ylabel('Count')
    hide_and_move_axis(ax)

    plt.savefig(OUTPUT_FOLDER / 'edge_weight_histrogram.jpg')

In [None]:
plot_edge_weight_distribution(network=NETWORK)

In [None]:
network.edges().groupby('Year')['from'].count().plot()