In [1]:
import pandas as pd

## Assumptions:

* Total Flood Risk = Perc_Cropland_Flooded * 0.5 + Perc_People_Flooded * 0.5
* Those ranking on the top 30% of this list = High Flood Risk Districts
* Those ranking on the bottom 70% of this list = Low Flood Risk Districts


In [2]:
## load in Perc_People_Flooded_NRT

path_input = '/Users/srilakshmi/Desktop/Thesis/Treatment-Control-Assignment/Perc-Flooded-NRT/People/'
name_input_file = 'Perc_People_Flooded.csv'

people_NRT = pd.read_csv(path_input+name_input_file, header=0)


In [3]:
## load in Perc_Cropland_Flooded_NRT

path_input = '/Users/srilakshmi/Desktop/Thesis/Treatment-Control-Assignment/Perc-Flooded-NRT/Cropland/'
name_input_file = 'Perc_Cropland_Flooded.csv'

cropland_NRT = pd.read_csv(path_input+name_input_file, header=0)
dist_to_drop = cropland_NRT[cropland_NRT['Perc_Flooded'].isna()]['Region'].tolist() # get list of districts from cropland data that have missing values
cropland_NRT = cropland_NRT.dropna()

dist_to_drop

['Lawra',
 'Nandom',
 'Ho Municipal',
 'Agotime Ziope',
 'Suaman',
 'Ekumfi',
 'Effutu',
 'Hohoe Municipal',
 'Mfantsiman',
 'La Dade Kotopon',
 'Komenda Edna Eguafo / Abirem',
 'Jasikan',
 'Ga Central Municipal',
 'Ashaiman']

In [4]:
# drop these districts from the people data

for i in people_NRT['Region']: # if row contains missing data
    if i in dist_to_drop:
        people_NRT = people_NRT[people_NRT['Region'] != i] # drop it from the study
        
people_NRT # only working with 202 districts for the treatment-control analysis

Unnamed: 0,Region,Perc Population Flooded
0,Abura / Asebu / Kwamankese,0.001296
1,Accra Metropolis,0.002028
2,Ada East,0.010217
3,Ada West,0.013472
4,Adaklu,0.000266
...,...,...
211,West Gonja,0.000662
212,West Mamprusi,0.005232
213,Yendi Municipal,0.008060
214,Yilo Krobo,0.000000


In [5]:
people_NRT = people_NRT.sort_values(by=['Region']) # sort values by district name
cropland_NRT = cropland_NRT.sort_values(by=['Region'])

assert list(people_NRT['Region']) == list(cropland_NRT['Region']) # ensure districts match up

In [19]:
import numpy as np

# calculate total flood risk by applying arbitrary 50-50 formula

df_floodrisk = cropland_NRT.merge(people_NRT, left_on='Region', right_on='Region')
df_floodrisk.columns = ['Region', 'Perc_Cropland_Flooded', 'Perc_Population_Flooded']

x = pd.DataFrame(np.array(df_floodrisk)[:, 1:2] * 0.5 + np.array(df_floodrisk)[:, 2:3] * 0.5) # apply arbitrary risk formula
x.columns = ['Total_Flood_Risk']

tot_flood_risk = pd.concat([df_floodrisk,x], axis=1).sort_values(by=['Total_Flood_Risk'], ascending=False)
tot_flood_risk = tot_flood_risk[['Region', 'Total_Flood_Risk']]
tot_flood_risk['Treatment'] = None

In [39]:
n_treatment = round(len(tot_flood_risk) * 0.3) # flood prone districts -- districts ranking in the top 30% of the total flood risk metric
n_control = len(tot_flood_risk) - n_treatment # non-flood prone districts

# generate boolean treatment-control assignment variable to use moving forward
tot_flood_risk['Treatment'].iloc[tot_flood_risk.index[0:n_treatment]] = 1
tot_flood_risk['Treatment'].iloc[tot_flood_risk.index[n_treatment:len(tot_flood_risk)]] = 0

tot_flood_risk

Unnamed: 0,Region,Total_Flood_Risk,Treatment
147,Saboba,0.0581014,0
198,West Mamprusi,0.0422144,0
94,Karaga,0.0410139,1
52,Binduri,0.0356679,1
67,Chereponi,0.0354792,0
...,...,...,...
89,Jaman South,0,0
119,Ledzokuku / Krowor,0,1
28,Asokore Mampong Municipal,0,1
50,Bia West,0,0
