# Water Security

### Downloading all data

In [1]:
# Make sure you've run aws configure before hand
!aws s3 sync s3://s3groupegypt data

## Preprocessing

In [2]:
import pandas as pd
import numpy as np
from shapely.geometry import Point, box
import matplotlib.pyplot as plt
import geopandas
import geoplot
import h5py
import warnings
warnings.filterwarnings('ignore')

# Merge 0.5x0.5 degree cells into 1x1 degree cells.
# We overlay 1x1 degree cells on the world and intersect with co-ordinates
# and aggregate using average.
# This is pretty generic and infact works with any x degree cell.
def overlay_grid_cells(degrees=1):
    grid = []
    for lat in np.arange(-180, 180, degrees):
        for lon in np.arange(-360, 360, degrees):
            grid.append(box(lat, lon, lat+degrees, lon+degrees))
    return grid

def geodataframe_merge_adjacent_cells(df, degrees=1, aggfunc='mean'):
    columns = df.columns[:-1]
    grid = pd.DataFrame(data={'id': np.arange(0, 360*720)})
    crs = {'init': 'epsg:4326'}
    gf = geopandas.GeoDataFrame(grid, crs=crs, geometry=overlay_grid_cells())
    gf_intersect = geopandas.sjoin(gf, df, how="inner", op='intersects')
    b = gf_intersect.dissolve(by='id', aggfunc = 'mean')
    return geopandas.GeoDataFrame(b[columns], geometry=b.geometry.centroid)

# Converts dataframe to geodataframe
# Dataframe must contain lon/lat fields!
def dataframe_to_geodataframe(df):
    crs = {'init': 'epsg:4326'}
    geo = [Point(row['lon'], row['lat']) for (_, row) in df[['lon', 'lat']].iterrows()]
    gdf = geopandas.GeoDataFrame(df, crs=crs, geometry=geo)
    return gdf

frames = []

### Loading SSP2 dataset

Here we load the dataset and visualise the information with a head()/google map

In [3]:
# This dataset has population estimates every decade. We linearly impute 
# the intermediate years.
def linear_impute(df, prefix):
    df[["%c2_%s" % (prefix, i) for i in np.arange(1980, 2017) if i % 10]] = np.nan
    df_t = df[["%c2_%s" % (prefix, i) for i in np.arange(1980, 2017)]].transpose()
    df_it = df_t.interpolate(method='linear', direction = 'forward').transpose()
    df_it[["lon", "lat"]] = df[["px", "py"]]
    return df_it

In [4]:
# Read, Impute, GeoDataFrame, Center on grid
population = pd.read_csv("data/SSP2/pop_ssp2.csv")
population = linear_impute(population, prefix="p")
population = dataframe_to_geodataframe(population)
population = geodataframe_merge_adjacent_cells(population)

gdp = pd.read_csv("data/SSP2/gdp_ssp2.csv")
gdp = linear_impute(gdp, prefix="g")
gdp = dataframe_to_geodataframe(gdp)
gdp = geodataframe_merge_adjacent_cells(gdp)

frames.append(population)
frames.append(gdp)

In [5]:
gdp.head()

Unnamed: 0_level_0,g2_1980,g2_1981,g2_1982,g2_1983,g2_1984,g2_1985,g2_1986,g2_1987,g2_1988,g2_1989,...,g2_2010,g2_2011,g2_2012,g2_2013,g2_2014,g2_2015,g2_2016,lon,lat,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
340,0.011155,0.011242,0.011329,0.011416,0.011503,0.01159,0.011677,0.011764,0.011851,0.011938,...,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,-179.86174,-19.0043,POINT (-179.50000 -19.50000)
341,0.01371,0.013812,0.013914,0.014015,0.014117,0.014219,0.014321,0.014422,0.014524,0.014626,...,5.2e-05,5.2e-05,5.2e-05,5.2e-05,5.2e-05,5.2e-05,5.2e-05,-179.52819,-18.6327,POINT (-179.50000 -18.50000)
342,0.013705,0.013801,0.013898,0.013994,0.01409,0.014186,0.014283,0.014379,0.014475,0.014572,...,4.3e-05,4.3e-05,4.3e-05,4.3e-05,4.3e-05,4.3e-05,4.3e-05,-179.298957,-17.479223,POINT (-179.50000 -17.50000)
343,0.013764,0.013868,0.013972,0.014076,0.01418,0.014284,0.014388,0.014492,0.014596,0.0147,...,5e-05,5e-05,5e-05,5e-05,5e-05,5e-05,5e-05,-179.67352,-16.41892,POINT (-179.50000 -16.50000)
344,0.01059,0.010685,0.01078,0.010876,0.010971,0.011066,0.011162,0.011257,0.011352,0.011448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-179.702275,-15.82973,POINT (-179.50000 -15.50000)


In [6]:
population.head()

Unnamed: 0_level_0,p2_1980,p2_1981,p2_1982,p2_1983,p2_1984,p2_1985,p2_1986,p2_1987,p2_1988,p2_1989,...,p2_2010,p2_2011,p2_2012,p2_2013,p2_2014,p2_2015,p2_2016,lon,lat,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
340,0.003656,0.003699,0.003742,0.003785,0.003828,0.003871,0.003914,0.003957,0.003999,0.004042,...,7.017828e-07,7.017828e-07,7.017828e-07,7.017828e-07,7.017828e-07,7.017828e-07,7.017828e-07,-179.86174,-19.0043,POINT (-179.50000 -19.50000)
341,0.004463,0.004515,0.004566,0.004617,0.004669,0.00472,0.004771,0.004823,0.004874,0.004925,...,0.0001911859,0.0001911859,0.0001911859,0.0001911859,0.0001911859,0.0001911859,0.0001911859,-179.52819,-18.6327,POINT (-179.50000 -18.50000)
342,0.004357,0.004408,0.004458,0.004509,0.00456,0.00461,0.004661,0.004712,0.004762,0.004813,...,0.0001659954,0.0001659954,0.0001659954,0.0001659954,0.0001659954,0.0001659954,0.0001659954,-179.298957,-17.479223,POINT (-179.50000 -17.50000)
343,0.00448,0.004532,0.004584,0.004636,0.004688,0.004741,0.004793,0.004845,0.004897,0.004949,...,0.0001949389,0.0001949389,0.0001949389,0.0001949389,0.0001949389,0.0001949389,0.0001949389,-179.67352,-16.41892,POINT (-179.50000 -16.50000)
344,0.003656,0.003699,0.003742,0.003785,0.003828,0.003871,0.003914,0.003957,0.003999,0.004042,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-179.702275,-15.82973,POINT (-179.50000 -15.50000)


### Loading Water GAP dataset

Here we load the dataset and visualise the information with a head()/google map

### Loading GLDAS dataset

Here we load the dataset and visualise the information with a head()/google map

### Merging the dataframe

Here we load the dataset and visualise the information with a head()/google map

In [None]:
# merge(frames)