In [10]:
import pandas as pd
import numpy as np

import sklearn.linear_model as sklearn_linear_model
import sklearn.model_selection as sklearn_model_selection

In [2]:
from pathlib import Path
results_path = Path('/home/selker/eop/eop/run_simulations')

In [12]:
df = pd.read_csv(results_path / 'results_for_subnational_analysis' / 'malawi.csv')
df['total_cost'] = df['policy_cost_per_capita'] * df['weight_adjusted_population']
df['initial_poverty_count'] = df['initial_poverty_rate'] * df['weight_adjusted_population']
df['post_transfer_poverty_count'] = df['post_transfer_poverty_rate'] * df['weight_adjusted_population']
df['initial_total_poverty_gap'] = df['initial_poverty_gap'] * df['weight_adjusted_population']
df['post_transfer_total_poverty_gap'] = df['post_transfer_poverty_gap'] * df['weight_adjusted_population']

df['poverty_count_reduction'] = (df.initial_poverty_count- df.post_transfer_poverty_count)
df['total_poverty_gap_reduction'] = (df.initial_total_poverty_gap - df.post_transfer_total_poverty_gap)

df['cost_per_poverty_count_reduction'] = df['total_cost'] / df.poverty_count_reduction
df['cost_per_total_poverty_gap_reduction'] = df['total_cost'] / df.total_poverty_gap_reduction

df['poverty_rate_reduction'] = (df.initial_poverty_rate- df.post_transfer_poverty_rate)
df['poverty_gap_reduction'] = (df.initial_poverty_gap - df.post_transfer_poverty_gap)

full_country = df[df.features == 'full_country']
df = df[df.features != 'full_country']

## Simple models predicting per-improvement and per-capita costs

In [13]:
df.columns

Index(['initial_poverty_rate', 'initial_poverty_gap',
       'post_transfer_poverty_gap', 'post_transfer_poverty_rate',
       'policy_cost_per_capita', 'weight_adjusted_population', 'method',
       'unconditional_tolerance', 'conditional_tolerance', 'd', 'nclass',
       'features', 'with_weights', 'total_cost', 'initial_poverty_count',
       'post_transfer_poverty_count', 'initial_total_poverty_gap',
       'post_transfer_total_poverty_gap', 'poverty_count_reduction',
       'total_poverty_gap_reduction', 'cost_per_poverty_count_reduction',
       'cost_per_total_poverty_gap_reduction', 'poverty_rate_reduction',
       'poverty_gap_reduction'],
      dtype='object')

In [24]:
regression = sklearn_linear_model.LinearRegression()
results = dict()

for outcome in ['cost_per_poverty_count_reduction', 'cost_per_total_poverty_gap_reduction', 'policy_cost_per_capita']:
    for predictor in ['initial_poverty_rate', 'initial_poverty_gap', 'weight_adjusted_population']:
        score = sklearn_model_selection.cross_val_score(
            regression,
            df[[predictor]],
            df[outcome],
            cv=sklearn_model_selection.KFold(n_splits=2, shuffle=True, random_state=11)
        )
        
        results[(predictor, outcome)] = np.mean(score)

In [23]:
results

{('initial_poverty_rate',
  'cost_per_poverty_count_reduction'): -2.2185991126257574,
 ('initial_poverty_gap',
  'cost_per_poverty_count_reduction'): -1.8605002071807484,
 ('weight_adjusted_population',
  'cost_per_poverty_count_reduction'): -0.3053648087811922,
 ('initial_poverty_rate',
  'cost_per_total_poverty_gap_reduction'): -2.920140546057258,
 ('initial_poverty_gap',
  'cost_per_total_poverty_gap_reduction'): -2.3974693037811043,
 ('weight_adjusted_population',
  'cost_per_total_poverty_gap_reduction'): -0.3146007857514576,
 ('initial_poverty_rate', 'policy_cost_per_capita'): -0.3182960129293514,
 ('initial_poverty_gap', 'policy_cost_per_capita'): -0.26134340449082466,
 ('weight_adjusted_population',
  'policy_cost_per_capita'): -0.18151332223003566}