In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')
    # Create an output folder in the root of the repository
    OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()).replace(' ', '_'))
    Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from src.utils.colors import flatuicolors
from src.utils.styling import get_standard_colors
from src.weighted_network import WeightedNetwork
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import cartopy.feature as cf
from src.utils.styling import hide_and_move_axis
from src.temporal_network import TemporalNetwork
import itertools

In [None]:
def map_setup(ax):
    ax.coastlines()
    ax.add_feature(cf.BORDERS, ls=':')
    ax.set_extent([4, 11.5, 44, 53.5])
    #ax.add_feature(cf.LAND, color='k', alpha=0.05)

def plot_community(ax, network, comms=None, reference_network=None):

    if not reference_network:
        reference_network = network
        
    colors = get_standard_colors()
    G = network.G()
    pos = network.position()
    G_ref = reference_network.G()

    if not comms:
        comms = network.communities()
    
    nx.draw_networkx_edges(G, pos, alpha=0.01, ax=ax)
    sc = nx.draw_networkx_nodes(G_ref, pos, node_size=8, node_color='w', alpha=0.25, ax=ax)
    sc.set_edgecolor('k')
    
    for i, comm in enumerate(comms):
        sc = nx.draw_networkx_nodes(G, pos, nodelist=comm, node_size=12, node_color=colors[i], ax=ax)
        sc.set_edgecolor('k')
        sc.set_linewidth(.15)

def load(year0=None, year1=None, edge_filter=None):
    input_path = Path(CONFIG.data.raw)
    nodes_file = input_path / CONFIG.data.filenames.nodes
    edges_file = input_path / CONFIG.data.filenames.edges
    return WeightedNetwork(nodes_file=nodes_file, edges_file=edges_file, year0=year0, year1=year1, edge_filter=edge_filter)


def load_temporal(edge_filter=None):
    input_path = Path(CONFIG.data.raw)
    nodes_file = input_path / CONFIG.data.filenames.nodes
    edges_file = input_path / CONFIG.data.filenames.edges
    return TemporalNetwork(nodes_file=nodes_file, edges_file=edges_file, edge_filter=edge_filter)

def finalize(filename):
    os.system(f'pdfcrop --margin 5 {filename} {filename}')
    os.system(f'convert -density 400 {filename} {filename}.jpg')

def get_temporal_communities(bins, edge_filter=None):

    # Get list of communities for each time slice
    comm_list = []
    for year0, year1 in bins:
        G = load(year0=year0, year1=year1, edge_filter=edge_filter)
        comm_list.append(G.communities())

    sorted_comm_list = [comm_list[0]]
    
    for i in range(1, len(comm_list)):
        
        old_comm = sorted_comm_list[i-1]
        new_comm = comm_list[i]
        sorted_new_comm = []
    
        for oc in old_comm:
            overlap = [len(oc.intersection(nc)) for nc in new_comm]
            index = np.argmax(overlap)
            sorted_new_comm.append(new_comm[index])
            new_comm = new_comm[:index] + new_comm[index+1:]
    
        for nc in new_comm:
            sorted_new_comm.append(nc)
    
        sorted_comm_list.append(sorted_new_comm)

    return sorted_comm_list

def plot_temporal_evolution_of_governance(network, normalize=True):

    fig, axarr = plt.subplots(2, 5, sharey=True, sharex=True, figsize=(8, 4))
    flatax = axarr.flatten()
    colors = get_standard_colors()
    
    communities = network.communities()
    edges = network.edges()
        
    for i, community in enumerate(communities):
    
        in_edges = edges[edges['from'].isin(community) & edges['to'].isin(community)]
        timeseries = in_edges.groupby('Year').ruling_party_category.value_counts().unstack().fillna(0)
    
        if 'R' not in timeseries.columns:
            timeseries['R'] = 0
        if 'S' not in timeseries.columns:
            timeseries['S'] = 0

        if normalize:
            norm = timeseries.S + timeseries.R
            label = 'Share of links\nin Community'
            filename = 'timeseries_governance_relative.jpg'
        else:
            norm = 1
            label = 'Number of links\nin Community'
            filename = 'timeseries_governance_absolute.jpg'
            
        flatax[i].plot(timeseries.index, timeseries.S / norm , label='Secular', c=colors[i])
        flatax[i].plot(timeseries.index, timeseries.R / norm, label='Religious', c=colors[i], ls='--')
        hide_and_move_axis(flatax[i])
            
    flatax[i].legend()

    for ax in axarr[1, :]:
        ax.set_xlabel('Year')
    
    for ax in axarr[:, 0]:
        ax.set_ylabel(label)
    
    plt.tight_layout()

    plt.savefig(OUTPUT_FOLDER / filename, dpi=400)

In [None]:
EDGE_FILTER = 'S238'

G = load(edge_filter=EDGE_FILTER)
G_temp = load_temporal(edge_filter=EDGE_FILTER)

CRS = ccrs.LambertAzimuthalEqualArea(
    central_latitude=CONFIG.grid.central_latitidue, 
    central_longitude=CONFIG.grid.central_longitude, 
    false_easting=CONFIG.grid.false_easting, 
    false_northing=CONFIG.grid.false_northing
)

In [None]:
ax = plt.axes(projection=CRS)
map_setup(ax)

plot_community(ax, G)
filename = OUTPUT_FOLDER / 'communities.pdf'
plt.savefig(filename)

finalize(filename)

In [None]:
plot_temporal_evolution_of_governance(G)
plot_temporal_evolution_of_governance(G, normalize=False)

In [None]:
# Work in progress. 
# TODO: Don't hardcode number of axes and extent.
def plot_temporal_communities(bins, network, temporal_communites, nrows=2, ncols=3, filename='temporal_communities.pdf', axis_order=None, edge_filter=None):

    f, axarr = plt.subplots(nrows, ncols, subplot_kw={'projection': CRS}, figsize=(2.5 * ncols * 1.25, 4 * nrows * 1.25))
    plt.subplots_adjust(wspace=.05, hspace=.1)
    
    axarr = axarr.flatten()
    if axis_order is not None:
        axarr = axarr.flatten()[axis_order]
    
    for bin, ax, comm in zip(bins, axarr, temporal_communites):
        ax.set_title(f'{bin[0]} to {bin[1]}')
        sliced_network = load(year0=bin[0], year1=bin[1], edge_filter=edge_filter)
        map_setup(ax)
        plot_community(ax, sliced_network, comm, reference_network=network)
    
    filename = OUTPUT_FOLDER / filename
    plt.savefig(filename)
    finalize(filename)

In [None]:
BINS = [(1351, 1354), (1355, 1400), (1401, 1414), (1415, 1417), (1415, 1417), (1418, 1450)]
comm_list = get_temporal_communities(BINS, edge_filter=EDGE_FILTER)
plot_temporal_communities(BINS, G, comm_list, filename='temporal_networks_hamming.pdf', axis_order=[0, 3, 1, 2, 4, 5], edge_filter=EDGE_FILTER) 

In [None]:
BINS = [(1251, 1300), (1301, 1400), (1400, 1520)]
comm_list = get_temporal_communities(BINS, edge_filter=EDGE_FILTER)
plot_temporal_communities(BINS, G, comm_list, nrows=1, ncols=3, filename='temporal_networks.pdf', edge_filter=EDGE_FILTER)

In [None]:
f, axarr = plt.subplots(2, 1, figsize=(5, 3.5), sharex=True)

yrs = G_temp.years()
H = G_temp.hamming_distance()
K = G_temp.average_degree_sequence()

mask = yrs >= 1200
yrs = yrs[mask]
H = H[mask]
K = K[mask]

mask1 = ((yrs - 1) % 50) != 0
mask2 = H > 400
print(yrs[mask1 & mask2])

axarr[0].plot(yrs, K, c=flatuicolors.wetasphalt)
axarr[1].plot(yrs[mask1], H[mask1], c=flatuicolors.wetasphalt)

for ax in axarr:
    hide_and_move_axis(ax)

for yr in yrs[mask1 & mask2]:
    for ax in axarr:
        ax.axvline(yr, zorder=0, c=flatuicolors.pomegranate, lw=4, alpha=0.3)

        
axarr[1].axhline(400, ls=':', c='k')
axarr[1].set_xlabel('Year')
axarr[0].set_ylabel('Average Degree')
axarr[1].set_ylabel('Hamming Distance')
plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'temporal_evolution.png', dpi=400)

# Plot expulsion statistics

In [None]:
def get_coocurrences(network, require_edge=False):

    nodes = network.nodes()
    c = network.communities()
    
    if require_edge:
        edges = network.edges()    
        df = pd.merge(edges, nodes, left_on=['from', 'Year'], right_on=['PlaceID', 'Year'])
        df.drop(columns=['PartyID', 'ruling_party_category', 'Unnamed: 0_x', 'Unnamed: 0_y', 'PlaceID', 'XCOORD', 'YCOORD', 'PlaceName'], inplace=True)

    else:
        edges = []
        for comm in c:
            edges += list(itertools.combinations(comm, 2))
        edges = pd.DataFrame(edges, columns=['from', 'to'])
        df = pd.merge(edges, nodes, left_on=['from'], right_on=['PlaceID'])
        df.drop(columns=['Unnamed: 0', 'PlaceID', 'XCOORD', 'YCOORD', 'PlaceName'], inplace=True)

    df = pd.merge(df, nodes, left_on=['to', 'Year'], right_on=['PlaceID', 'Year'])
    df.drop(columns=['Unnamed: 0', 'PlaceID', 'XCOORD', 'YCOORD', 'PlaceName'], inplace=True)
        
    df['co-occur'] = (df.Juden_x + df.Juden_y) == 2
    df['coherence'] = (df.Juden_x - df.Juden_y) == 0
    df['fromto'] = df['from'].astype(str) + '_' + df['to'].astype(str)

    return df

In [None]:
require_edge = False
df = get_coocurrences(G, require_edge=require_edge)

## Histograms of shared status across cities

In [None]:
f, axarr = plt.subplots(2, 5, sharex=True, sharey=True, figsize=(8, 4.5))
color = get_standard_colors()

key = 'coherence'

n_bins = 5
bw = 1/n_bins
n_values = 10

if key == 'co-occur':
    label = 'Probability\nco-presence of Jews'
elif key == 'coherence':
    label = 'Probability\nof coherence'
if require_edge:
    outfile = OUTPUT_FOLDER / f'prob_{key}_only_real_edges.jpg'
else:
    outfile = OUTPUT_FOLDER / f'prob_{key}_commnuity_structure.jpg'

for i, ax in enumerate(axarr.flatten()):
    
    c = G.communities()[i]
    
    e = df[df['from'].isin(c) & df['to'].isin(c)]
    g = e.groupby('fromto')[key].agg(['mean', 'count'])
    g = g[g['count'] > n_values]

    count, bins = np.histogram(g['mean'], bins=np.linspace(0, 1, n_bins+1))
    count = count / count.sum()
    bins = .5 * (bins[1:] + bins[:-1])
    ax.bar(bins - .4 * bw + 0.015, count, width=bw * .4, color=color[i], alpha=0.95)
    
    g = df.groupby('fromto')[key].agg(['mean', 'count'])
    g = g[g['count'] > n_values]
    
    count, bins = np.histogram(g['mean'], bins=np.linspace(0, 1, n_bins+1))
    count = count / count.sum()
    bins = .5 * (bins[1:] + bins[:-1])
    ax.bar(bins - 0.015, count, width=bw * .4, color='k', alpha=0.25, zorder=0)
    

for ax in axarr[1]:
    ax.set_xlabel(label)

for ax in axarr[:, 0]:
    ax.set_ylabel('Frequency')

plt.tight_layout()

plt.savefig(outfile)

## Timeseries of percentage point changes

In [None]:
f, axarr = plt.subplots(2, 5, sharex=True, sharey=True, figsize=(8, 5))

nodes = G.nodes()

for i, ax in enumerate(axarr.flatten()):
    
    c = G.communities()[i]
    data = nodes[nodes.PlaceID.isin(c)].groupby('Year')['Juden'].agg(['mean', 'sum', 'count'])
    assert (data.index.diff().fillna(1) == 1).all()
    
    data['diff'] = data['mean'].diff() * 100
    
    data = data[data['count'] > (.25 * len(c))]

    mask = ((data.index - 1 ) % 50) != 0
    data = data[mask]

    data = data['diff']
    data.plot(ax=ax, c=color[i])

    for year, value in data[data < -20].items():
        ax.text(year+5, value, str(year - 1), ha='center', va='top')
        
    for year, value in data[data > 20].items():
        ax.text(year+5, value, str(year - 1))
    
    hide_and_move_axis(ax)

for ax in axarr[:, 0]:
    ax.set_ylabel('Percentage point change\nin presence of Jews')
plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'percentage_point_changes.jpg')

## Time series for share of cities with presence of Jews per community

In [None]:
f, axarr = plt.subplots(2, 5, sharex=True, sharey=True, figsize=(8, 5))

for i, ax in enumerate(axarr.flatten()):
    c = G.communities()[i]
    data = nodes[nodes.PlaceID.isin(c)].groupby('Year')['Juden'].agg(['mean', 'sum', 'count'])
    data = data[data['count'] > (.25 * len(c))]
    data = data['sum'] / len(c)
    
    data.plot(ax=ax, c=color[i])
    
    hide_and_move_axis(ax)

for ax in axarr[:, 0]:
    ax.set_ylabel('Share of cities with\npresence of Jews')
plt.tight_layout()

## Bar chart of total changes in presence of Jews

In [None]:
data_list = []

for i, ax in enumerate(axarr.flatten()):
    c = G.communities()[i]
    data = nodes[nodes.PlaceID.isin(c)].groupby('Year')['Juden'].agg(['mean', 'sum', 'count'])
    data = data['sum'].diff()
    assert (data.index.diff().fillna(1) == 1).all()
    data_list.append(data)

df = pd.DataFrame(data_list[0])
df.rename(columns={'sum': 'c0'}, inplace=True)

for i in range(1, 10):
    df = pd.merge(df, data_list[i], on='Year', how='outer')
    df.rename(columns={'sum': f'c{i}'}, inplace=True)

mask = ((df.index - 1 ) % 50) != 0
df = df[mask]

df = df.fillna(0)
df = df[((df < -3) | (df > 3)).any(axis=1)]
#df = df[((df < -2)).any(axis=1)]

In [None]:
df_below = df.copy()
df_below[df_below > 0] = 0

df_above = df.copy()
df_above[df_above < 0] = 0

f, ax = plt.subplots()

bottom_below = np.zeros(len(df))
bottom_above = np.zeros(len(df))

for i in range(10):
    data = df_below[f'c{i}'].copy()
    p = ax.bar(range(len(data)), data, bottom=bottom_below, color=color[i])
    bottom_below += data

    data = df_above[f'c{i}'].copy()
    p = ax.bar(range(len(data)), data, bottom=bottom_above, color=color[i])
    bottom_above += data

ax.set_xticks(range(len(data)), data.index - 1)
ax.tick_params(axis='x', labelrotation=90)
hide_and_move_axis(ax)
ax.axhline(0, c='k')
ax.set_ylabel('Total change in cities with Jews')
plt.tight_layout()
plt.savefig(OUTPUT_FOLDER / 'total_change_jews.jpg')

# Video of temporal evolution

In [None]:
import gif

@gif.frame
def plot_frame(year):
    ax = plt.axes(projection=CRS)
    map_setup(ax)
    ax.set_title(year)
    nx.draw_networkx_nodes(G.G(), G.position(), node_size=4, node_color='k', alpha=0.25)
    X, Y = nodes[(nodes.Year == year) & (nodes.Juden == 1)][['XCOORD', 'YCOORD']].drop_duplicates().values.T
    ax.scatter(X, Y)

frames = []
for year in range(1000, 1521, 1):
    print(year, end='\r')
    frame = plot_frame(year)
    frames.append(frame)

gif.save(frames, "test.gif", duration=100)