In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')
    OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()))
    Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
from matplotlib import pyplot as plt
import networkx as nx
import pandas as pd
from src.utils.styling import hide_and_move_axis, get_standard_colors
import numpy as np
from itertools import product, permutations
from src.weighted_network import WeightedNetwork

In [None]:
def plot_community_characteristics(network, aggregate=True):

    if aggregate:
        fig, axarr = plt.subplots(2, 5, sharey=True, sharex=True, figsize=(8, 4))
        output_file = 'governing_parties.jpg'
    else:
        fig, axarr = plt.subplots(2, 5, sharey=True, figsize=(10, 4))
        output_file = 'governing_parties_distinct.jpg'

    flatax = axarr.flatten()
    colors = get_standard_colors()

    communities = network.communities()
    edges = network.edges()

    for i, community in enumerate(communities):

        in_edges = edges[edges['from'].isin(community) & edges['to'].isin(community)]

        if aggregate:
            ruling_parties = in_edges.PartyID.str[0]
            hist = ruling_parties.value_counts()
        else:
            hist = in_edges.PartyID.value_counts()

        hist.sort_index(inplace=True)

        x, y = hist.index, hist.values
        y = y / y.sum()

        flatax[i].bar(x, y, color=colors[i])
        hide_and_move_axis(flatax[i])

        if not aggregate:
            flatax[i].set_xticks(x, x, rotation='vertical', size=6)

    axarr[1, 2].set_xlabel('Governing party')
    axarr[0, 0].set_ylabel('Relative frequency')
    axarr[1, 0].set_ylabel('Relative frequency')

    plt.tight_layout()

    plt.savefig(OUTPUT_FOLDER / output_file, dpi=400)

In [None]:
RAW_DATA = Path(CONFIG.data.raw)

network = WeightedNetwork(nodes_file=RAW_DATA / CONFIG.data.filenames.nodes, edges_file=RAW_DATA / CONFIG.data.filenames.edges)
plot_community_characteristics(network)
plot_community_characteristics(network, aggregate=False)

In [None]:
f, axarr = plt.subplots(2, 5, sharex=True, figsize=(12, 6), sharey=True)

colors = get_standard_colors()
nodes = network.nodes()

global_df = nodes.groupby('Year').Juden.sum() / network.nodes().PlaceID.unique().shape[0]

for i, community in enumerate(network.communities()):

    df = nodes[nodes.PlaceID.isin(community)]
    df = df.groupby('Year').Juden.mean() #/ len(community)
    axarr.flatten()[i].plot(df.index, df.values, c=colors[i])
    #axarr.flatten()[i].plot(global_df.index, global_df.values, zorder=0, c='k', alpha=0.5)

plt.tight_layout()

# Sandbox

In [None]:
def plot_edge_weight_distribution(network):

    weights = [network.get_edge_data(u, v)['weight'] for u, v in G.edges()]

    f, ax = plt.subplots()

    ax.hist(weights, bins=np.arange(0, 1, 0.05), width=0.04)
    ax.set_xlabel('Edge weight')
    ax.set_ylabel('Count')
    hide_and_move_axis(ax)

    plt.savefig(OUTPUT_FOLDER / 'edge_weight_histrogram.jpg')

In [None]:
plot_edge_weight_distribution(network=NETWORK)

In [None]:
network.edges().groupby('Year')['from'].count().plot()