In [4]:
import pandas as pd
import numpy as np

# SSP Dataset
- <b>Goal</b>: Compile the SSP2 dataset for the random forest analysis
    - using the SSP2_v9_130219 Scenario; the unit is billion US dollar 2005/yr for GDP|PPP and million for Population
    - convereted the USD 2005 to USD 2017 from the BLS inflation calculator (https://data.bls.gov/cgi-bin/cpicalc.pl?cost1=1&year1=200501&year2=201701) 1 USD 2005 = 1.27 USD 2017

In [2]:
ssp_data = pd.read_csv("../../data/SSP/SspDb_country_data_2013-06-12/SspDb_country_data_2013-06-12.csv")
ssp_data

Unnamed: 0,MODEL,SCENARIO,REGION,VARIABLE,UNIT,1950,1955,1960,1965,1970,...,2105,2110,2115,2120,2125,2130,2135,2140,2145,2150
0,IIASA-WiC POP,SSP3_v9_130115,BHS,Population,million,,,,,,...,,,,,,,,,,
1,IIASA-WiC POP,SSP3_v9_130115,BHS,Population|Female,million,,,,,,...,,,,,,,,,,
2,IIASA-WiC POP,SSP3_v9_130115,BHS,Population|Female|Aged0-4,million,,,,,,...,,,,,,,,,,
3,IIASA-WiC POP,SSP3_v9_130115,BHS,Population|Female|Aged0-4|No Education,million,,,,,,...,,,,,,,,,,
4,IIASA-WiC POP,SSP3_v9_130115,BHS,Population|Female|Aged10-14,million,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193505,IIASA-WiC POP,SSP4d_v9_130115,ABW,Population|Male|Aged95-99,million,,,,,,...,,,,,,,,,,
193506,IIASA-WiC POP,SSP4d_v9_130115,ABW,Population|Male|Aged95-99|No Education,million,,,,,,...,,,,,,,,,,
193507,IIASA-WiC POP,SSP4d_v9_130115,ABW,Population|Male|Aged95-99|Primary Education,million,,,,,,...,,,,,,,,,,
193508,IIASA-WiC POP,SSP4d_v9_130115,ABW,Population|Male|Aged95-99|Secondary Education,million,,,,,,...,,,,,,,,,,


In [8]:
ssp2_data = ssp_data.query("SCENARIO=='SSP2_v9_130219'")
for varname in ["GDP|PPP", "Population"]:
    ssp2_data_melt = pd.melt(ssp2_data.query("VARIABLE==@varname"), id_vars=['REGION'], 
                             value_vars=['2025', '2030', '2035', '2040', '2045','2050'], var_name='year', value_name=varname)
    ssp2_data_melt["year"] = ssp2_data_melt["year"].astype(int)
    ssp2_data_melt = ssp2_data_melt.rename(columns={"REGION":"Country Code"})
    ssp2_data_melt = ssp2_data_melt.set_index(["Country Code", "year"])
    if varname == "GDP|PPP":
        ssp2_data_combined = ssp2_data_melt
    else:
        ssp2_data_combined[varname] = ssp2_data_melt[varname]
ssp2_data_combined = ssp2_data_combined.dropna()
ssp2_data_combined["GDP|PPP_2017"] = ssp2_data_combined["GDP|PPP"]*1.27
ssp2_data_combined["GDPpc_2017$"] = ssp2_data_combined["GDP|PPP_2017"]*1000/ssp2_data_combined["Population"]
ssp2_data_combined["logGDPpc"] = np.log(ssp2_data_combined["GDPpc_2017$"])
ssp2_data_combined.to_csv("ssp2_GDPpc.csv")
ssp2_data_combined

Unnamed: 0_level_0,Unnamed: 1_level_0,GDP|PPP,Population,GDP|PPP_2017,GDPpc_2017$,logGDPpc
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BHS,2025,14.220783,0.407386,18.060394,44332.352597,10.699470
BIH,2025,66.164981,3.680831,84.029526,22828.956812,10.035785
BLR,2025,221.851158,9.098162,281.750971,30967.898748,10.340706
BLZ,2025,4.312751,0.385562,5.477194,14205.746012,9.561402
BOL,2025,59.337327,12.001403,75.358406,6279.132767,8.744987
...,...,...,...,...,...,...
BEN,2050,48.804162,19.213739,61.981286,3225.883622,8.078962
BFA,2050,56.538251,38.601849,71.803579,1860.107215,7.528389
BGD,2050,749.487675,195.776605,951.849348,4861.915691,8.489188
BGR,2050,161.450789,6.308475,205.042502,32502.703439,10.389079
