In [71]:
"""

In this dataset i merge all the dataframes gathered throughout the thesis and create a so called 'Mastertable' from which all subsequent analysis will be done. These datasets are:
- Corruption 
- Conservation funding
- Deforestation
- Protected area coverage

All these datasets have been pre-processed in other Jupyter notebooks which can be found in the same GitHub repository as this one. 

"""

In [72]:
# Import nessescary libraries and define directory
import pandas as pd
import pathlib
pathlib.Path()
path = pathlib.Path()
path = path.resolve()
DATA_DIRECTORY = path / "desktop" / "Gradu" / "Processed data"

PosixPath('/Users/patrick/desktop/Gradu/Processed data')

In [97]:
# Import datasets
corruption = pd.read_csv(DATA_DIRECTORY / "Corruption_tropical_2005-2015.csv", index_col = 0)
funding = pd.read_csv(DATA_DIRECTORY / "relative funding data.csv", usecols= ['ISO3', 'log_funding_2000', 'log_funding_2005', 'log_funding_2010','log_funding_2015'])
deforestation = pd.read_csv(DATA_DIRECTORY / "Deforestation_master.csv")
protectedareas = pd.read_csv(DATA_DIRECTORY / "Protected_areas.csv")

In [98]:
# Changed the ISO3 column for some datasets to string because python read them as object and it was causing problems in the data merge
corruption["ISO3"] = corruption["ISO3"].astype(str)
funding["ISO3"] = funding["ISO3"].astype(str)
protectedareas["ISO3"] = protectedareas["ISO3"].astype(str)

In [99]:
# I merged the datasets one at a time so that I can see if there is any dataloss
mastertable_tropical = corruption.merge(funding, on = "ISO3")

In [101]:
mastertable_tropical = mastertable_tropical.merge(protectedareas, on = "ISO3")

In [102]:
# Renaming some countries in both dataframes to make merging easier, the deforestation data dosen't have an ISO3 column so it's important to carefully check the names of each country that they match.
mastertable_tropical["Country"] = mastertable_tropical["Country"].replace({"Cote d'Ivoire":"Ivory Coast", "Congo, Republic of": "Republic of Congo", "Congo, Democratic Republic":"Democratic Republic of the Congo"})
deforestation["Country"] = deforestation["Country"].replace({"CostaRica":"Costa Rica"})

In [103]:
# Now I merge the last one
mastertable_tropical = mastertable_tropical.merge(deforestation, on = "Country")

Unnamed: 0,Country,ISO3,CPI 2005,CPI 2009,CPI 2015,log_funding_2000,log_funding_2005,log_funding_2010,log_funding_2015,pa_baseline,...,def_outside_pa_2011,def_fraction_in_2001,def_fraction_in_2006,def_fraction_in_2011,def_fraction_out_2001,def_fraction_out_2006,def_fraction_out_2011,First_period_dif,Second_period_dif,Third_period_dif
0,Uruguay,URY,59,67,74,2.114651,2.127819,2.545529,2.785032,4310.67,...,112358.0,0.057728,0.049566,0.078413,0.039599,0.074828,0.0868,-0.01813,0.025262,0.008388
1,Costa Rica,CRI,42,53,55,0.072826,0.350503,0.695877,0.964335,21085.4,...,25149.01,0.006961,0.008924,0.0065,0.025106,0.032418,0.017493,0.018145,0.023494,0.010993
2,Rwanda,RWA,31,33,54,-2.124974,-1.564327,-0.988663,-0.483308,2503.93,...,4630.355,0.002132,0.002227,0.002449,0.039397,0.04945,0.055182,0.037265,0.047222,0.052733
3,Malaysia,MYS,51,45,50,4.092608,4.426626,4.742555,5.115834,15584.18,...,2484975.0,0.003811,0.00886,0.011015,0.056315,0.079863,0.095991,0.052504,0.071003,0.084976
4,Ghana,GHA,35,39,47,1.131915,1.486448,1.941583,2.432136,36598.97,...,204894.1,0.012533,0.014988,0.027267,0.053674,0.037916,0.07283,0.041141,0.022928,0.045563
5,Panama,PAN,35,34,39,0.557774,0.862093,1.380259,1.917523,16651.99,...,68258.64,0.002765,0.00531,0.006537,0.019154,0.036247,0.019431,0.016389,0.030936,0.012894
6,India,IND,29,34,38,2.717399,3.168404,3.657446,4.111216,4497.4,...,455800.5,0.003914,0.011806,0.006451,0.009132,0.01073,0.015225,0.005218,-0.001076,0.008774
7,Thailand,THA,0,34,38,1.680376,2.062662,2.328168,2.538033,96214.83,...,440264.7,0.003689,0.006771,0.005831,0.02782,0.045455,0.048298,0.02413,0.038684,0.042466
8,Zambia,ZMB,26,30,38,-0.907092,-0.473069,0.129066,0.491775,286694.48,...,156970.6,0.008387,0.013582,0.015656,0.013055,0.031191,0.028484,0.004669,0.01761,0.012828
9,Benin,BEN,29,29,37,0.831246,1.107564,1.379473,1.703944,27249.44,...,531.356,1.063407,0.32275,0.081133,0.958881,0.548519,0.145776,-0.104526,0.225769,0.064644


In [108]:
# Clening up the data a bit
mastertable_tropical = mastertable_tropical.sort_values(by="Country")
mastertable_tropical = mastertable_tropical.reset_index()

In [111]:
mastertable_tropical.columns

Index(['index', 'Country', 'ISO3', 'CPI 2005', 'CPI 2009', 'CPI 2015',
       'log_funding_2000', 'log_funding_2005', 'log_funding_2010',
       'log_funding_2015', 'pa_baseline', '2001_increase', '2006_increase',
       '2011_increase', 'LandArea', 'PA_2001_share', 'PA_2006_share',
       'PA_2011_share', 'forest_area_inside_2001', 'forest_area_outside_2001',
       'forest_area_inside_2006', 'forest_area_outside_2006',
       'forest_area_inside_2011', 'forest_area_outside_2011', '2001-2005',
       'Total 2006-2010', 'Total 2011-2015', '2001-2005_ref', '2006-2010_ref',
       '2011-2015_ref', 'def_outside_pa_2001', 'def_outside_pa_2006',
       'def_outside_pa_2011', 'def_fraction_in_2001', 'def_fraction_in_2006',
       'def_fraction_in_2011', 'def_fraction_out_2001',
       'def_fraction_out_2006', 'def_fraction_out_2011', 'First_period_dif',
       'Second_period_dif', 'Third_period_dif'],
      dtype='object')

In [114]:
# After checking the columns I now only choose the ones I need for my analysis
mastertable_tropical = mastertable_tropical[['Country', 'ISO3', 'log_funding_2000', 'log_funding_2005', 'log_funding_2010','log_funding_2015', 'pa_baseline', '2001_increase', '2006_increase',
                                             '2011_increase', 'PA_2001_share', 'PA_2006_share','PA_2011_share', 'def_fraction_in_2001', 'def_fraction_in_2006','def_fraction_in_2011',
                                             'def_fraction_out_2001', 'def_fraction_out_2006', 'def_fraction_out_2011', 'First_period_dif','Second_period_dif', 'Third_period_dif']]

In [120]:
mastertable_tropical.to_csv(DATA_DIRECTORY / "Mastertable.csv", index=False)