In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import pathlib


In [2]:
# file paths and file names
path_input = '/Users/srilakshmi/Desktop/Thesis/Data/District-level/Population/Data-Recurrence/'
path_output = '/Users/srilakshmi/Desktop/Thesis/Data/District-level/Population/'
name_output_file = 'District-level_Population_Recurrence.csv'


In [3]:
# data cleaning functions

col_name_old = 'People Impacted'

def clean_csv_recurrence(string):
    
    df = pd.read_csv(path_input+string, header=0)
    df = df.drop_duplicates()
    df = df.rename(columns={col_name_old: string[42:48]})
    df = df.sort_values(by=['Region'])

    return df

def special_merge(left,right):
    merged = left.set_index('Region').join(right.set_index('Region')) # merged file 0 and file 1 together    
    merged.index.name = 'Region'
    merged.reset_index(inplace=True)
    
    return merged


In [4]:
# get list of file names
files = [f for f in listdir(path_input) if isfile(join(path_input, f))]
files.sort(reverse = False)
files

['People Impacted in Ghana Ghana_Recurrence_02Year.csv',
 'People Impacted in Ghana Ghana_Recurrence_05Year.csv',
 'People Impacted in Ghana Ghana_Recurrence_10Year.csv',
 'People Impacted in Ghana Ghana_Recurrence_15Year.csv',
 'People Impacted in Ghana Ghana_Recurrence_20Year.csv']

In [5]:
# merge files
for i in range(0, 1):
    x = clean_csv_recurrence(files[i])
    y = clean_csv_recurrence(files[i+1])
    merged = special_merge(left = x, right = y)
    
for i in range(2, 5):
    z = clean_csv_recurrence(files[i])
    merged = special_merge(left = merged, right = z)
    i += 1

merged

Unnamed: 0,Region,02Year,05Year,10Year,15Year,20Year
0,Abura / Asebu / Kwamankese,0,0,0,0,0
1,Accra Metropolis,41,356,807,1071,1276
2,Ada East,0,80,230,328,410
3,Ada West,18,384,658,801,989
4,Adaklu,0,0,0,0,0
...,...,...,...,...,...,...
211,West Gonja,0,0,0,0,0
212,West Mamprusi,10,20,40,77,87
213,Yendi Municipal,0,0,26,26,26
214,Yilo Krobo,0,0,0,0,0


In [6]:
merged.to_csv(path_output+name_output_file, index = False, header=True)


In [7]:
# read in total populations data
path_input = '/Users/srilakshmi/Desktop/Thesis/Data/District-level/Population/'
name_input_file = 'Impacts-Ghana_MaxFlood_20200715_20201014.csv'

path_output = '/Users/srilakshmi/Desktop/Thesis/Treatment-Control-Assignment/Flood-Risk-Classification/'
name_output_file = 'District_Avg_Perc_Pop_Impacted_Per_Year.csv'

tot_pop = pd.read_csv(path_input+name_input_file, header=0)

colnames = []
for col in tot_pop.columns:
    colnames.append(col)
    
tot_pop = tot_pop.dropna(subset=[colnames[2]])
tot_pop = tot_pop[[colnames[1], colnames[2]]]
tot_pop = tot_pop.rename(columns={colnames[1]: "Region", colnames[2]: "Total Pop"})

# merge with existing file
new_merge = tot_pop.set_index('Region').join(merged.set_index('Region')) # merged file 0 and file 1 together
new_merge = new_merge.sort_values(by=['Region'])

new_merge.index.name = 'Region'
new_merge.reset_index(inplace=True)

In [8]:
# recalculate totals
perc_imp = pd.DataFrame({'Region': list(new_merge['Region'])})

perc_imp['02Year'] = list(new_merge['02Year']/new_merge['Total Pop'])
perc_imp['05Year'] = list(new_merge['05Year']/new_merge['Total Pop'])
perc_imp['10Year'] = list(new_merge['10Year']/new_merge['Total Pop'])
perc_imp['15Year'] = list(new_merge['15Year']/new_merge['Total Pop'])
perc_imp['20Year'] = list(new_merge['20Year']/new_merge['Total Pop'])

perc_imp


Unnamed: 0,Region,02Year,05Year,10Year,15Year,20Year
0,Abura / Asebu / Kwamankese,0.000000,0.000000,0.000000,0.000000,0.000000
1,Accra Metropolis,0.000022,0.000194,0.000439,0.000583,0.000695
2,Ada East,0.000000,0.000937,0.002695,0.003843,0.004804
3,Ada West,0.000248,0.005295,0.009073,0.011045,0.013637
4,Adaklu,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
211,West Gonja,0.000000,0.000000,0.000000,0.000000,0.000000
212,West Mamprusi,0.000066,0.000132,0.000263,0.000507,0.000573
213,Yendi Municipal,0.000000,0.000000,0.000181,0.000181,0.000181
214,Yilo Krobo,0.000000,0.000000,0.000000,0.000000,0.000000


In [9]:
# save file to disk

perc_imp.to_csv(path_output+name_output_file, index = False, header=True)