In [37]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression as LinearRegression
# Add the src directory t\ the Python path

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
# Now you can import the TimeSeriesDataset class
from src.TimeSeriesDataset import TimeSeriesDataset as TSD
from src.Template import Template
from src.utils import download_data, weighted_mean, fit_logit_to_variables
from src.mappings import value_mapping, ideology_mapping, social_complexity_mapping

In [38]:
use_resid = False

if use_resid:
    # Load the residuals
    dataset_filename = '..\datasets\power_transitions_with_residuals.xlsx'
else:
    dataset_filename = '..\datasets\power_transitions.xlsx'

dataset_PT = TSD(file_path=dataset_filename)
dataset_PT.raw = dataset_PT.raw.groupby('dataset').get_group('PT')
dataset_PT.scv = dataset_PT.scv.groupby('dataset').get_group('PT')
dataset_PT.scv_imputed = dataset_PT.scv_imputed.groupby('dataset').get_group('PT')

dataset_FULL = TSD(file_path=dataset_filename)
dataset = TSD(file_path=dataset_filename)
dataset.raw = dataset.raw.groupby('dataset').get_group('100y')
dataset.scv = dataset.scv.groupby('dataset').get_group('100y')
dataset.scv_imputed = dataset.scv_imputed.groupby('dataset').get_group('100y')


Dataset loaded from ..\datasets\power_transitions.xlsx
Dataset loaded from ..\datasets\power_transitions.xlsx
Dataset loaded from ..\datasets\power_transitions.xlsx


In [39]:
polaris_filename = "..\datasets\Polaris_data.xlsx"
religion_df = pd.read_excel(polaris_filename, sheet_name='Religion')

dataset_PT.scv['religion'] = dataset_PT.scv.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].religion.values[0] if x in religion_df.PolID.values else np.nan)
dataset_PT.scv_imputed['religion'] = dataset_PT.scv_imputed.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].religion.values[0] if x in religion_df.PolID.values else np.nan)
dataset_PT.scv['religion_family'] = dataset_PT.scv.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].RelFam.values[0] if x in religion_df.PolID.values else np.nan)
dataset_PT.scv_imputed['religion_family'] = dataset_PT.scv_imputed.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].RelFam.values[0] if x in religion_df.PolID.values else np.nan)
dataset_PT.scv['MSP'] = dataset_PT.scv.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].MSP_imp.values[0] if x in religion_df.PolID.values else np.nan)
dataset_PT.scv_imputed['MSP'] = dataset_PT.scv_imputed.PolityName.apply(lambda x: religion_df[religion_df.PolID == x].MSP_imp.values[0] if x in religion_df.PolID.values else np.nan)

In [40]:
crisis_vars = ['overturn',
              'predecessor_assassination'
              ,'intra_elite'
              ,'military_revolt'
              ,'popular_uprising'
              ,'separatist_rebellion'
              ,'external_invasion'
              ,'external_interference'
              ]
# columns to sum into crisis variable
crisis_keep = ['overturn', 'predecessor_assassination', 'intra_elite', 'military_revolt']
PT_mapping = {'PT_types':{}}
for c,col in enumerate(crisis_vars):
    if col in crisis_keep:
        PT_mapping['PT_types'][col] = 1
    dataset_PT.scv[col] = dataset_PT.raw[col]
    dataset_PT.scv_imputed[col] = dataset_PT.scv[col]

dataset_PT.scv['Crisis'] = dataset_PT.raw.apply(lambda row: weighted_mean(row, PT_mapping, "PT_types", nan_handling='remove'), axis=1)
dataset_PT.scv_imputed['Crisis'] = dataset_PT.scv['Crisis']
dataset_PT.scv['duration'] = dataset_PT.raw['duration']
dataset_PT.scv_imputed['duration'] = dataset_PT.scv['duration']

In [45]:
X_cols =  ['Pop','Terr','Cap','Hierarchy','Government', 'Infrastructure', 'Information', 'Money', 'MSP', 'Miltech','Scale_1','Comp']
Xy = dataset_PT.scv_imputed[X_cols + crisis_vars]
for col in crisis_vars:
    X_cols =  ['Pop','Terr','Cap','Hierarchy','Government', 'Infrastructure', 'Information', 'Money', 'MSP', 'Miltech']

    fit_logit_to_variables(Xy, col, X_cols)


Optimization terminated successfully.
         Current function value: 0.633802
         Iterations 5
Removing Hierarchy with p-value 0.861111326333412
Optimization terminated successfully.
         Current function value: 0.633814
         Iterations 5
Removing Pop with p-value 0.649432990271561
Optimization terminated successfully.
         Current function value: 0.633896
         Iterations 5
Removing MSP with p-value 0.49749985316882417
Optimization terminated successfully.
         Current function value: 0.634749
         Iterations 5
Removing Cap with p-value 0.36968033560146785
Optimization terminated successfully.
         Current function value: 0.635066
         Iterations 5
Removing Money with p-value 0.35414117323568894
Optimization terminated successfully.
         Current function value: 0.635404
         Iterations 5
Removing Information with p-value 0.19419882905648578
Optimization terminated successfully.
         Current function value: 0.636076
         Iterations 