# Preparation
Importing functions and common use data

In [1]:
# Importing the necessary functions
# Required packages are imported from the functions module

from zz_structured_code.code.config.config_imports import *
from zz_structured_code.code.config.config_project_path import main_project_directory

import os
from zz_structured_code.code.local_functions.local_functions import remove_border, f_read_df

import zz_structured_code.code.parameterization.parameterization as prmt
df_country_codes = prmt.f_df_continents()

# GDP data - SEDAC

## Reading data - historical

In [2]:
'''
Historical GDP data is obtained from the following source.
GDP, World Bank national accounts data, and OECD National Accounts data files. Accessed 25 Oct 2021
https://data.worldbank.org/indicator/NY.GDP.MKTP.CD
'''
# Path creation
directory_path = os.path.join(main_project_directory,
                                'data',
                                'input',)
file_name = 'GDP-Historical.csv'

# Reading file
df = pd.read_csv(join(directory_path, file_name), sep=',', skip_blank_lines=True, skiprows=4)
# df_gdp_countries = df
# df_gdp_countries
df.rename(columns={'Country Code':'Country code'}, inplace=True)
df.set_index('Country code', inplace=True, drop=True)
df.drop(columns=['Country Name', 'Indicator Name', 'Indicator Code', 'Unnamed: 65'], inplace=True)
try:
    df.columns = df.columns.values.astype(int)
    pass
except:
    pass
df.columns.name = 'Year'
df.fillna(0, inplace=True)
# df = df * 0.6
df_gdp_historical = df

## Reading data - projections

In [3]:
'''
GDP data is obtained from the following source for SRES A2.
Gaffin, S.R., X. Xing, and G. Yetman. 2002. Country-Level GDP and Downscaled Projections Based on the SRES A1, A2, B1, and B2 Marker Scenarios, 1990-2100. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). https://doi.org/10.7927/H4XW4GQ1. Accessed 27 Sep 2021
'''
# Path creation
directory_path = os.path.join(main_project_directory,
                                'data',
                                'input',)
file_name = 'GDP-Projections-nasa.csv'

# Reading file
df = pd.read_csv(join(directory_path, file_name), sep='\t')
df_gdp_projections = df

In [4]:
# Removing unnecessary columns
try:
    df.drop(['Name'], inplace=True, axis=1)
    pass
except:
    pass

try:
    df = pd.merge(left=df_gdp_projections, right=df_country_codes, on='UN Code')
    pass
except:
    pass

try:
    df.drop(columns=['Country', 'Continent', 'UN Code'], inplace=True)
    pass
except:
    pass

try:
    df.set_index('Country code', inplace=True)
    pass
except:
    pass

try:
    df.columns = df.columns.values.astype(int)
    pass
except:
    pass

df.fillna(0, inplace=True)
df.columns.name = 'Year'
df_gdp_projections = df

## Joining historical data and projections

In [5]:
cols_to_use = list(df_gdp_historical.columns.difference(df_gdp_projections.columns))
df_gdp_projections.columns
df_gdp_countries = pd.merge(df_gdp_historical[cols_to_use], df_gdp_projections, 
                            left_index=True, right_index=True,
                            how='inner')

try:
    df_gdp_countries.columns = df.columns.values.astype(int)
    pass
except:
    pass
df_gdp_countries.sort_values(by='Year', axis=1, inplace=True)
df = df_gdp_countries.copy()

## Extrapolating

In [6]:
df_extrapolated = prmt.f_extrapolate_backward_forward_df(df)

merge_column = 'Country code'
# df_gdp_countries_continents = prmt.f_distribution(df_extrapolated, df_country_codes, merge_column)

df_local = prmt.f_absolute_projections(df_extrapolated, df_country_codes, merge_column)
df_local.columns.name = 'Year'
df_absolute_gdp = df_local.T
df_local = df_local.T
df_absolute_gdp.drop('World', axis=1, inplace=True)

  return op.get_result()


# Population data
<a id='subsec:population'></a>

In [7]:
'''
The population data is obtained from the following sources: 

upto 2020: 
        United Nations, Department of Economic and Social Affairs, Population Division (2019). 
        Probabilistic Population Projections Rev. 1 based on the World Population Prospects 2019 Rev. 1: http://population.un.org/wpp/  
        accessed on 30 Sep 2021

2020 onwards: 
        United Nations, Department of Economic and Social Affairs, Population Division (2019). 
        World Population Prospects 2019, Online Edition. Rev. 1.  
        accessed on 30 Sep 2021
'''

# Creating path
directory_path = os.path.join(main_project_directory,
                              'data',
                              'input',)
file_name = 'UN-Population-distribution.csv'

# Reading file
df = pd.read_csv(join(directory_path, file_name), sep='\t')

# Indexing and renaming
df.set_index('Year', inplace=True)
df.rename(columns={'Latin America and the Caribbean':'South America', 'Northern America':
                  'North America'}, inplace=True)

df.columns.name = 'Country code'
df.index.name = 'Year'
df = df.T

# Assigning new namespace and obtaining contribution of the continent
df_population_projections_contribution = df.divide(df.loc['World', :], axis=1)*100

df.drop('World', axis=0, inplace=True)
df_extrapolated = prmt.f_extrapolate_backward_forward_df(df)
df_absolute_population = df_extrapolated.T * 1000 # converting the thousands data to individual count

# GDP per capita

In [8]:
df1 = df_absolute_gdp.copy()
df1 = df1.rename_axis(None, axis=1)
df1 = df1.rename_axis(None, axis=0)
df1.index = df1.index.rename(None)
df2 = df_absolute_population.copy()
df2 = df2.rename_axis(None, axis=1)
df2 = df2.rename_axis(None, axis=0)
df2.index = df2.index.rename(None)

df2 = df2[['Africa', 'Asia', 'Europe', 'South America', 'North America',
       'Oceania']]

In [9]:
df = df1.div(df2)
df.replace(np.inf, np.nan, inplace=True)

display('Constant 2010 USD GDP per capita over the entire time horizon for each continent')
df = df.mean(axis=0).astype('int')
df

'Constant 2010 USD GDP per capita over the entire time horizon for each continent'

Africa            1733
Asia              4100
Europe           27367
North America    46318
Oceania          23841
South America     5425
dtype: int64

# Availability per capita per day

In [10]:
# Denominator
# --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- --- #
df1 = df_absolute_population.copy()
df1 = df1.rename_axis(None, axis=1)
df1 = df1.rename_axis(None, axis=0)
df1.index = df1.index.rename(None)
df1 = df1[['Africa', 'Asia',  'Europe', 'North America', 'Oceania', 'South America']]
df1 = df1.T

# Numerator
# --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- --- #
df2 = df1.copy()
# fraction of global water available for:
# 'Africa', 'Asia',  'Europe', 'North America', 'Oceania', 'South America'
l_availability = [0.09, 0.284, 0.152, 0.170, 0.0210, 0.283]

l_availability = [x * 135e12/7 for x in l_availability]
# df2[1950] = l_availability
for column in df2.columns:
    df2[column] = l_availability  # Availability per day

# Division
# --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- ---  --- --- --- #
df_out = df2.div(df1, axis=0)
# df_out = df1.mul(df2, axis=0)
# df_out = df_out
n_years_skip = 15
l_years_skip = [1950+x for x in range(n_years_skip)]
l_columns = [x for x in df_out.columns if x not in l_years_skip]
df_out = df_out[l_columns]
df_out.replace(np.inf, np.nan, inplace=True)
# display(df_out)

display('Water availability liters per capita over the entire time horizon for each continent')

df = df_out.mean(axis=1).astype('int')
df


'Water availability liters per capita over the entire time horizon for each continent'

Africa           1450
Asia             1172
Europe           3774
North America    8487
Oceania          9462
South America    8348
dtype: int64