In [1]:
import pandas as pd
import numpy as np
import h3.api.numpy_int as h3

import matplotlib.pyplot as plt
import cartopy.io.shapereader as shpreader
import geopandas as gpd

from tqdm import tqdm
import seaborn as sns
import re

tqdm.pandas()

import stc_unicef_cpi.data.get_satellite_data as ge
import stc_unicef_cpi.data.make_dataset as md
import stc_unicef_cpi.utils.general as g
import stc_unicef_cpi.utils.constants as c

import stc_unicef_cpi.utils.geospatial as geo
import stc_unicef_cpi.utils.clean_text as ct



# Join countries 

In [None]:
read_path = '/mnt/c/Users/vicin/Desktop/DSSG/Project/stc_continuing/data/processed'

In [None]:
# specify dtypes 
dtypes ={'hex_code': int,
        'name_commuting':str,
        'geometry':str,  # come salvare geometry non lo so.
        'built':str}

dic_rename = {'gdp_ppp_1990':'GDP_PPP_1990', 'gdp_ppp_2000':'GDP_PPP_2000', 'gdp_ppp_2015':'GDP_PPP_2015',
            'NDVI_mean':'ndvi', 'NDWI_mean':'ndwi', 
            'precipitation_mean':'precimean', 'precipitation_stdDev':'precistd',
            'pr_mean':'precipiacc',
            'aet_mean_x':'evapotrans'
            }


In [None]:
joined = pd.DataFrame()
for country in c.countries_ssf[0:24]:
    country_code = ct.get_alpha3_code(country)
    print(country_code)
    # print(country_code)
    # if country_code in ['COM', 'COD']:
    #     data = pd.read_csv(read_path + f'/final/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)
    # elif country_code in ['CIV', 'ZAF']:
    #     data = pd.read_csv(read_path + f'/final/NO_DHS/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)
    # else:
    #     data = pd.read_csv(read_path + f'/joined/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)

    data = pd.read_csv(read_path + f'/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)
    
    data = data.rename(columns=dic_rename)
    data['country_code'] = country_code
    data['country'] = country

    joined = pd.concat([joined, data])

joined.to_csv(read_path + f'hexes_0_24_res7_thres30_all.csv', index=False)

In [None]:
joined = pd.DataFrame()
for country in c.countries_ssf[24:]:
    country_code = ct.get_alpha3_code(country)
    print(country_code)
    # print(country_code)
    # if country_code in ['COM', 'COD']:
    #     data = pd.read_csv(read_path + f'/final/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)
    # elif country_code in ['CIV', 'ZAF']:
    #     data = pd.read_csv(read_path + f'/final/NO_DHS/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)
    # else:
    #     data = pd.read_csv(read_path + f'/joined/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)

    data = pd.read_csv(read_path + f'/hexes_{country_code.upper()}_res7_thres30_all.csv',  dtype = dtypes)

    data = data.rename(columns=dic_rename)
    data['country_code'] = country_code
    data['country'] = country

    joined = pd.concat([joined, data])

joined.to_csv(read_path + f'hexes_24_48_res7_thres30_all.csv', index=False)
    


# Join DF

In [None]:
joined_0_24 = pd.read_csv(read_path + f'hexes_0_24_res7_thres30_all.csv')
joined_24_48 = pd.read_csv(read_path + f'hexes_24_48_res7_thres30_all.csv')

In [None]:
join = pd.concat([joined_0_24, joined_24_48])
print(join.shape)

In [None]:
# child population
join['child_pop'] = join[['M_0', 'M_1', 'M_5','M_10', 'F_0', 'F_1', 'F_5','F_10']].sum(axis=1) + 0.6 * join[['M_15', 'F_15']].sum(axis=1)

In [None]:
# remove areas with no population 
join = join[join['child_pop']>0].copy()
print(join.shape)

In [None]:
# hex centroid
join['hex_centroid'] = join['hex_code'].progress_apply(lambda x: h3.h3_to_geo(x))
join['hex_code_str'] = join['hex_code'].progress_apply(lambda x: h3.h3_to_string(x))
join['geometry'] = join['hex_code'].progress_apply(lambda x: h3.h3_to_geo_boundary(x, geo_json=True))


In [None]:
# global human settlement 
# water surface
join['water_surface'] = join['built'].apply(lambda x: int(bool(re.search('1', str(x)))))
# Land no built-up in any epoch
join['no_built'] = join['built'].apply(lambda x: int(bool(re.search('2', str(x)))))
# Built-up from 2000 to 2014 epochs
join['build_2000_2014'] = join['built'].apply(lambda x: int(bool(re.search('3', str(x)))))
# Built-up from 1990 to 2000 epochs
join['build_1990_2000'] = join['built'].apply(lambda x: int(bool(re.search('4', str(x)))))
# Built-up from 1975 to 1990 epochs
join['build_1975_1990'] = join['built'].apply(lambda x: int(bool(re.search('5', str(x)))))
# built-up up to 1975 epoch
join['build_prior_1975'] = join['built'].apply(lambda x: int(bool(re.search('6', str(x)))))

In [None]:
join.drop(columns=['copland', 'built', 'aet_mean_y', 'lat', 'long'], inplace=True) # 'NR', 'CDMA',

In [None]:
print(join.shape)
# (4493068, 154)

In [None]:
# join
joined.to_csv(read_path + f'/20221021_hexes_ssf.csv', index=False)