In [1]:
# import cell
import xarray as xr # to work with multi-dimensional arrays
import numpy as np # to work with multi-dimensional arrays
import glob # to find file pathways
import linecache as lc # to store header info of ASCII files
import time # used to note current time
import xesmf as xe # to regrid data
import pandas as pd
import pickle


import matplotlib
import matplotlib.pyplot as plt # for plotting
import matplotlib.colors as colors # for custom colourbars
import matplotlib.gridspec as gridspec # for more custom subplot positioning
import matplotlib.ticker as ticker # for custom tick label formatting
import cartopy # use for geographic map projections
import cartopy.crs as ccrs # use for geographic map projections
import regionmask # to work with IPCC sixth assessment regions

print(f'Import cell run @ {time.ctime()}')

Import cell run @ Sun Sep 20 18:21:06 2020


### Spatial Mean Population Variables

Both population projection datasets, the HYDE 3.2 and the NCAR-CIDR ones, have multiple projections across the periods used in this study; the pre-industrial, current, and future (1851-1900, 1981-2010, 2071-2100 respectively). Must time-average the available projections across each period using a suitable weighting.

Metacode:

1. Define file pathways and load Dataset objects.
2. Concatenate a period's available projections into a single Dataset object along a new dimension.
3. Determine suitable weighting to apply to each spatial population projection for averaging. (i.e. if projections are at equal intervals, use an equal weighting.)
4. Multiply each projection with weighting.
5. Sum up projection along the recently added dimension to obtain average. (Summing as opposed to averaging as have multiplied through by a weigthing in previous step.)
6. Save Dataset object.

Repeat above steps for all population variables; population count, population density, and both rural and urban population counts.

In [None]:
# define the HYDE 3.2 variable short and long names
pop_vars = {'popc': 'population count', 'popd': 'population density',
            'rurc': 'rural population count', 'urbc': 'urban population count'}

# create Dataset object to store the spatial means of the HYDE 3.2 variables
hyde_ens = xr.Dataset()

#################### pre-industrial #######################

# compute the pre-industrial period's spatial mean population variables
for var in list(pop_vars.keys()):
    
    # define the pathways of the pre-industrial population variables
    preind_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/Population/HYDE_3.2/Baseline/' + \
                           f'18[5,6,7,8,9]0/{var}*') + \
                 glob.glob('/home/ucfagtj/DATA/Dissertation/Data/Population/HYDE_3.2/Baseline/' + \
                           f'1900/{var}*')
    
    # store the geographic information contained within the ASCII file
    lon_ll, lat_ll = float(lc.getline(preind_pws[0], 3)[10:]), float(lc.getline(preind_pws[0], 4)[10:])
    lon_points, lat_points = float(lc.getline(preind_pws[0], 1)[5:]), float(lc.getline(preind_pws[0], 2)[5:])
    res = 360. / lon_points # same resolution in latitude and longitude
    
    # define latitude and longitude values
    # raster cell position given in ASCII file is the bottom left corner of a cell
    # this work will use the centre point as position of raster cell
    lat = np.arange(start = lat_ll + (res / 2), stop = lat_ll + (res / 2) + (lat_points * res), step = res)
    lon = np.arange(start = lon_ll + (res / 2), stop = lon_ll + (res / 2) + (lon_points * res), step = res)
    
    # load all projections; ASCII file type - text file with geographic info in first 6 rows
    arr_list = []
    for pw in preind_pws:
        arr_list.append(np.loadtxt(pw, skiprows = 6, dtype = 'float'))
    arr = np.stack(arr_list, axis = 2) # concatenate numpy arrays along new dimension
    
    # compute mean spatial projection; as equal intervals, equal weighting can be used
    arr[arr == -9999.0] = 0. # set all no data values to 0 before averaging
    arr = np.mean(arr, axis = 2)
    
    # convert to a DataArray object; negate latitude coordinates as array starts at +90 not -90
    da = xr.DataArray(data = arr, coords = {'lat': -lat, 'lon': lon}, dims = ['lat', 'lon'])
    
    # add attribute information to DataArray object
    da.attrs['title'] = f'preind_mean_{var}'
    da.attrs['long_title'] = f'pre-industrial spatial mean {pop_vars.get(var)}'
    da.attrs['resolution'] = f'lonxlat: {round(res, 3)} x {round(res, 3)} degrees'
    da.attrs['period'] = '1851-1900'
    da.attrs['source'] = 'HYDE3.2'
    
    # add DataArray object to Dataset object
    hyde_ens = hyde_ens.assign({f'{da.title}': da})
    
    # remove objects used for pre-industrial computation
    da.close()
    del(da, lat, lon, arr, arr_list, lon_ll, lat_ll, lon_points, lat_points, res)
    
#################### current and current anomaly #######################
    
# compute the current period's spatial population variables; both absolute and anomaly
for var in list(pop_vars.keys()):
    
    # define the pathways of the current period's population variables
    current_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/Population/HYDE_3.2/Baseline/' + \
                            f'19[8,9]0/{var}*') + \
                  glob.glob('/home/ucfagtj/DATA/Dissertation/Data/Population/HYDE_3.2/Baseline/' + \
                            f'20[0, 1]0/{var}*')
        
    # store the geographic information contained within the ASCII file; each file same info
    lon_ll, lat_ll = float(lc.getline(current_pws[0], 3)[10:]), float(lc.getline(current_pws[0], 4)[10:])
    lon_points, lat_points = float(lc.getline(current_pws[0], 1)[5:]), float(lc.getline(current_pws[0], 2)[5:])
    res = 360. / lon_points # same resolution in latitude and longitude
        
    # define latitude and longitude values
    # raster cell position given in ASCII file is the bottom left corner of a cell
    # this work will use the centre point as position of raster cell
    lat = np.arange(start = lat_ll + (res / 2), stop = lat_ll + (res / 2) + (lat_points * res), step = res)
    lon = np.arange(start = lon_ll + (res / 2), stop = lon_ll + (res / 2) + (lon_points * res), step = res)
    
    # load all projections; ASCII file type - text file with geographic info in first 6 rows
    arr_list = []
    for pw in current_pws:
        arr_list.append(np.loadtxt(pw, skiprows = 6, dtype = 'float'))
    arr = np.stack(arr_list, axis = 2) # concatenate numpy arrays along new dimension
    
    # replace no data values to 0
    arr[arr == -9999.0] = 0.
    
    # define an anomaly array with the pre-indsutrial baseline negated from each projection
    anom_arr = np.empty(arr.shape)
    for i in range(0, 4):
        anom_arr[:, :, i] = arr[:, :, i] - hyde_ens[f'preind_mean_{var}'].values

    # compute mean spatial projection; as equal intervals, equal weighting can be used
    arr = np.mean(arr, axis = 2)
    anom_arr = np.mean(anom_arr, axis = 2)
    
    # convert to a DataArray object; negate latitude coordinates as array starts at +90 not -90
    da = xr.DataArray(data = arr, coords = {'lat': -lat, 'lon': lon}, dims = ['lat', 'lon'])
    anom_da = xr.DataArray(data = anom_arr, coords = {'lat': -lat, 'lon': lon}, dims = ['lat', 'lon'])
    
    # add attribute information to DataArray object
    da.attrs['title'], anom_da.attrs['title'] = f'current_mean_{var}', f'current_mean_{var}_anom'
    da.attrs['long_title'] = f'current spatial mean {pop_vars.get(var)}'
    anom_da.attrs['long_title'] = f'current spatial mean {pop_vars.get(var)} anomaly'
    da.attrs['resolution'] = f'lonxlat: {round(res, 3)} x {round(res, 3)} degrees'
    anom_da.attrs['resolution'] = f'lonxlat: {round(res, 3)} x {round(res, 3)} degrees'
    da.attrs['period'], anom_da.attrs['period'] = '1981-2010', '1981-2010'
    da.attrs['source'], anom_da.attrs['source'] = 'HYDE3.2', 'HYDE3.2'
    anom_da.attrs['baseline'] = '1851-1900'
    
    # add DataArray object to Dataset object
    hyde_ens = hyde_ens.assign({f'{da.title}': da})
    hyde_ens = hyde_ens.assign({f'{anom_da.title}': anom_da})
    
    # remove objects used for current computation
    da.close(), anom_da.close()
    del(da, anom_da, lat, lon, arr, anom_arr, arr_list, lon_ll, lat_ll, lon_points, lat_points, res)

# add attribute information to Dataset object
hyde_ens.attrs['source'] = 'HYDE 3.2'
hyde_ens.attrs['created_on'] = time.ctime()
hyde_ens.attrs['resolution'] = hyde_ens.preind_mean_popc.resolution
hyde_ens.attrs['density_unit'] = 'count / km^2'

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/hyde3_2_pop_variables_original_res.nc'
hyde_ens.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
hyde_ens.close()

In [None]:
# define the NCAR-CIDR population variable short and long names; no density count in original
pop_vars = {'total': 'population count', 'rural': 'rural population count',
            'urban': 'urban population count'}

# create Dataset object to store the spatial means of the NCAR-CIDR variables
ncar_ens = xr.Dataset()

#################### ssp projections #######################
# will work with data as numpy objects due to xarray issue could not solve

# compute the future period's spatial mean population variables
for ssp in ['ssp1', 'ssp2', 'ssp3', 'ssp5']: # loop over each scenario
    for var in list(pop_vars.keys()): # loop over each population count type

        
        # define the pathways of the ssp population variable
        ssp_pws = []
        ssp_pws += glob.glob(f'/home/ucfagtj/DATA/Dissertation/Data/Population/IAM/{ssp}/{var}/' + \
                             f'NetCDF/{ssp}{var[:3]}2[0, 1][0, 7, 8, 9]*') + \
                   glob.glob(f'/home/ucfagtj/DATA/Dissertation/Data/Population/IAM/{ssp}/{var}/' + \
                             f'NetCDF/{ssp}_2[0, 1][0, 7, 8, 9]*')
      
        # load a single population projection; remaining projections will be concatenated to this
        x = xr.open_dataset(ssp_pws[0], use_cftime = True)
        pop_data = x.to_array(dim = 'dummy').squeeze(drop = True) # convert Dataset object to DataArray object
        x.close()
        
        # concatenate remaining projections along a new dimension
        for pw in ssp_pws[1:]:
            x = xr.open_dataset(pw, use_cftime = True)
            data = x.to_array(dim = 'dummy').squeeze(drop = True) # convert Dataset object to DataArray object
            pop_data = xr.concat([pop_data, data], dim = 'projection')
            x.close(), data.close()
            
        # compute mean spatial projection; as equal intervals, equal weighting can be used
        abs_mean = pop_data.mean(dim = 'projection', skipna = True, keep_attrs = False)
        
        # set NaN values as 0
        abs_mean = abs_mean.fillna(0.)
        
        # update attribute information to DataArray object
        abs_mean.attrs['title'] = f'{ssp}_mean_{var}c'
        abs_mean.attrs['long_title'] = f'{ssp} spatial mean {pop_vars.get(var)}'
        res = round(abs_mean.lat[19].values - abs_mean.lat[20].values, 3)
        abs_mean.attrs['resolution'] = f'lonxlat: {res}x{res} degrees'
        abs_mean.attrs['period'] = '2071-2100'
        abs_mean.attrs['source'] = 'NCAR-CIDR'
        
        # add DataArray object to Dataset object
        ncar_ens[f'{abs_mean.title}'] = abs_mean
       
        # remove and close unrequired objects
        pop_data.close(), abs_mean.close()
        
# work around for odd bug where adding ssp5 rural DataArray to Dataset gives all NaN values    
ncar_ens['ssp5_mean_ruralc'] = ncar_ens.ssp5_mean_totalc - ncar_ens.ssp5_mean_urbanc
ncar_ens.ssp5_mean_ruralc.attrs['title'] = 'ssp5_mean_ruralc'
ncar_ens.ssp5_mean_ruralc.attrs['long title'] = 'ssp5 spatial mean rural population count'
ncar_ens.ssp5_mean_ruralc.attrs['resolution'] = f'lonxlat: {res}x{res} degrees'
ncar_ens.ssp5_mean_ruralc.attrs['period'] = '2071-2100'
ncar_ens.ssp5_mean_ruralc.attrs['source'] = 'NCAR-CIDR'

# add attribute information to Dataset object
ncar_ens.attrs['source'] = 'NCAR-CIDR'
ncar_ens.attrs['created_on'] = time.ctime()
ncar_ens.attrs['resolution'] = ncar_ens.ssp5_mean_totalc.resolution

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_variables_original_res.nc'
ncar_ens.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
ncar_ens.close()

### Rectilinear Regridding

Must be careful when regridding population projections as do not need to use interpolation when regridding population counts to a coarser resolution. Instead simply sum the counts of the finer cells that fall within a given coarser cell.

#### Metacode:
1. Create a grid cell network of desired resolution, include both cell centre points and cell bounds.
2. Iterate through each cell of desired resoltuion grid summing up the finer cell values of those which fall within bounds.
3. Create a DataArray object of the aggregated finer cell values.
4. Add DataArray object to Dataset object.
5. Repreat for each population count variable.

#### HYDE 3.2 Data

In [None]:
# create an empty Dataset object of the desired grid resolution; 1.0 x 1.0 in this case
des_res = 1.
des_lon = np.arange(-180 + (des_res / 2), 180 + (des_res / 2), des_res)
des_lon_b = np.arange(des_lon[0] - (des_res / 2), des_lon[-1] + des_res, des_res)
des_lat = np.arange(90 - (des_res / 2), -90 - (des_res / 2), -des_res) # avoid centre points on poles
des_lat_b = np.arange(des_lat[0] + (des_res / 2), des_lat[-1] - des_res, -des_res)
des_grd = xr.Dataset({'lon': (['lon'], des_lon),
                      'lat': (['lat'], des_lat),
                      'lon_b': (['lon_b'], des_lon_b),
                      'lat_b': (['lat_b'], des_lat_b)})

# load in source data to be regridded and drop density variables
hyde_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/hyde3_2_pop_variables_original_res.nc'
hyde_data = xr.open_dataset(hyde_pw)
del(hyde_data['preind_mean_popd'], hyde_data['current_mean_popd'], hyde_data['current_mean_popd_anom'])

# summation regridding for each HYDE 3.2 variable
for variable in list(hyde_data.keys()):
    
    # load data of given variable
    data = hyde_data[f'{variable}']

    # create an empty numpy object to store destination grid population counts
    pop_c = np.zeros((len(des_lat), len(des_lon)), dtype = float)
    
    # define the longitude and latitude bounds of the desintation grid cell
    for col, lon_bnd in enumerate(des_grd.lon_b.values[0 :-1]):
        lon_upp_bnd, lon_low_bnd = lon_bnd + 1., lon_bnd
    
        for row, lat_bnd in enumerate(des_grd.lat_b.values[0 : -1]):
            lat_upp_bnd, lat_low_bnd = lat_bnd, lat_bnd - 1.
        
            # slice DataArray object by the longitude and latitude bounds so that only cells within remain
            x = data.sel(lon = slice(lon_low_bnd, lon_upp_bnd), lat = slice(lat_upp_bnd, lat_low_bnd))
            
            # add summed source cell data to corresponding destination cell in numpy array
            pop_c[row, col] = x.sum()
            
            # clean up objects for next iteration
            del(x, lat_upp_bnd, lat_low_bnd)
            
    # clean up objects for next iteration
    del(lon_upp_bnd, lon_low_bnd)
            
    # create DataArray object from summation array
    da = xr.DataArray(data = pop_c, coords = {'lat': des_lat, 'lon': des_lon}, dims = ['lat', 'lon'])        
            
    # add final summation numpy object to destination grid Dataset object
    des_grd = des_grd.assign({f'{variable}': da})
    
    # clean up objects used for next iteration
    da.close(), data.close()
    del(pop_c)

# add attribute information to Dataset object
des_grd.attrs['title'] = 'spatial mean population counts'
des_grd.attrs['resolution'] = f'lonxlat: {des_res}x{des_res} degrees'
des_grd.attrs['source'], des_grd.attrs['regridded'] = 'HYDE 3.2', 'True'
des_grd.attrs['created_on'] = time.ctime()

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/' + \
          f'hyde3_2_pop_variables_{int(des_res)}x{int(des_res)}_res.nc'
des_grd.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
des_grd.close(), hyde_data.close()

#### NCAR-CIDR Data

In [None]:
# create an empty Dataset object of the desired grid resolution; 1.0 x 1.0 here
des_res = 1.
des_lon = np.arange(-180 + (des_res / 2), 180 + (des_res / 2), des_res)
des_lon_b = np.arange(des_lon[0] - (des_res / 2), des_lon[-1] + des_res, des_res)
des_lat = np.arange(90 - (des_res / 2), -90 - (des_res / 2), -des_res) # avoid centre points on poles
des_lat_b = np.arange(des_lat[0] + (des_res / 2), des_lat[-1] - des_res, -des_res)
des_grd = xr.Dataset({'lon': (['lon'], des_lon),
                      'lat': (['lat'], des_lat),
                      'lon_b': (['lon_b'], des_lon_b),
                      'lat_b': (['lat_b'], des_lat_b)})

# load in source data to be regridded and drop density variables
ncar_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_variables_original_res.nc'
ncar_data = xr.open_dataset(ncar_pw)

# summation regridding for each NCAR-CIDR variable
for variable in list(ncar_data.keys()):
    
    # load data of given variable
    data = ncar_data[f'{variable}']

    # create an empty numpy object to store destination grid population counts
    pop_c = np.zeros((len(des_lat), len(des_lon)), dtype = float)
    
    # define the longitude and latitude bounds of the desintation grid cell
    for col, lon_bnd in enumerate(des_grd.lon_b.values[0 :-1]):
        lon_upp_bnd, lon_low_bnd = lon_bnd + 1., lon_bnd
    
        for row, lat_bnd in enumerate(des_grd.lat_b.values[0 : -1]):
            lat_upp_bnd, lat_low_bnd = lat_bnd, lat_bnd - 1.
        
            # slice DataArray object by the longitude and latitude bounds so that only cells within remain
            x = data.sel(lon = slice(lon_low_bnd, lon_upp_bnd), lat = slice(lat_upp_bnd, lat_low_bnd))
            
            # add summed source cell data to corresponding destination cell in numpy array
            pop_c[row, col] = x.sum()
            
            # clean up objects for next iteration
            del(x, lat_upp_bnd, lat_low_bnd)
            
    # clean up objects for next iteration
    del(lon_upp_bnd, lon_low_bnd)
            
    # create DataArray object from summation array
    da = xr.DataArray(data = pop_c, coords = {'lat': des_lat, 'lon': des_lon}, dims = ['lat', 'lon'])  
            
    # add final summation numpy object to destination grid Dataset object
    des_grd = des_grd.assign({f'{variable}': da})
    
    # clean up objects used for next iteration
    da.close(), data.close()
    del(pop_c)

# add attribute information to Dataset object
des_grd.attrs['title'] = 'spatial mean population counts'
des_grd.attrs['resolution'] = f'lonxlat: {des_res}x{des_res} degrees'
des_grd.attrs['source'], des_grd.attrs['regridded'] = 'NCAR-CIDR', 'True'
des_grd.attrs['regridding_method'], des_grd.attrs['created_on'] = 'summation of cells', time.ctime()

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/' + \
          f'ncar_pop_variables_{int(des_res)}x{int(des_res)}_res.nc'
des_grd.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
des_grd.close(), ncar_data.close()

### NCAR-CIDR Population Projection Anomalies

As the NCAR-CIDR population count projections do not match the resoltuion of the source HYDE 3.2 projections, computing the anomalies relative to both the pre-industrial (1851-1900), and current periods (1981-2010) will be computed with both datasets in a 1.0 degree longitude-latitude grid.

#### Metacode:

1. Load NCAR-CIDR and HYDE 3.2 population projections in the 1.0 degree grid resolution.
2. Compute the HYDE 3.2 baselines
3. Compute the SSP anomalies by negating the pre-industrial and current baselines from the NCAR-CIDR data.
4. Save resulting Dataset as a new file; do not overwrite the original file 1 degree resolution file.

In [32]:
# load in NCAR-CIDR and HYDE 3.2 1.0 x 1.0 resolution population projections
ncar_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_variables_1x1_res.nc'
hyde_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/hyde3_2_pop_variables_1x1_res.nc'
ncar_data, hyde_data = xr.open_dataset(ncar_pw), xr.open_dataset(hyde_pw)

# create a copy of the NCAR Dataset object to add anomaly DataArray objects to
new_ds = ncar_data.copy()
ncar_data.close()

# compute the pre-industrial and current anomalies for each ssp scenario
scenarios = ['ssp1', 'ssp2', 'ssp3', 'ssp5']
for ssp in scenarios:
    x = ssp + '_mean_'
    new_ds[f'{x}totalc_anom_cur'] = new_ds[f'{x}totalc'] - hyde_data.current_mean_popc
    new_ds[f'{x}urbanc_anom_cur'] = new_ds[f'{x}urbanc'] - hyde_data.current_mean_urbc
    new_ds[f'{x}ruralc_anom_cur'] = new_ds[f'{x}ruralc'] - hyde_data.current_mean_rurc
    new_ds[f'{x}totalc_anom_preind'] = new_ds[f'{x}totalc'] - hyde_data.preind_mean_popc
    new_ds[f'{x}urbanc_anom_preind'] = new_ds[f'{x}urbanc'] - hyde_data.preind_mean_urbc
    new_ds[f'{x}ruralc_anom_preind'] = new_ds[f'{x}ruralc'] - hyde_data.preind_mean_rurc 

# update Dataset attributed information
new_ds.attrs['preind_baseline'], new_ds.attrs['current_baseline'] = '1851-1900', '1981-2010'
new_ds.attrs['baseline_source'] = 'HYDE 3.2'
    
# save resulting Dataset object
save_pw = ncar_pw[0: -3] + '_with_anom.nc'
new_ds.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
new_ds.close(), hyde_data.close()

File saved: ncar_pop_variables_1x1_res_with_anom.nc


(None, None)

### Absolute Population Count and Pre-Industrial : Regional Aggregated Results

Important to note this is a summation method and not a weighted mean.

In [25]:
# load in spatial mean population counts of both absolute and pre-industiral anomaly
ncar_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_variables_1x1_res_with_anom.nc'
hyde_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/hyde3_2_pop_variables_1x1_res.nc'
ncar_ds, hyde_ds = xr.open_dataset(ncar_pw), xr.open_dataset(hyde_pw)

# extract only absolute and pre-industrial anomaly DataArray objects and store in a combined DataArray
pop_ds = xr.Dataset() # create Dataset object to store both datasets in
for ssp in ['ssp1', 'ssp2', 'ssp3', 'ssp5']:
    pop_ds[f'{ssp}_mean_totalc'] = ncar_ds[f'{ssp}_mean_totalc']
    pop_ds[f'{ssp}_mean_totalc_preind_anom'] = ncar_ds[f'{ssp}_mean_totalc_anom_preind']
pop_ds['preind_mean_totalc'] = hyde_ds['preind_mean_popc']
pop_ds['current_mean_totalc'] = hyde_ds['current_mean_popc']
pop_ds['current_mean_totalc_preind_anom'] = hyde_ds['current_mean_popc_anom']
ncar_ds.close(), hyde_ds.close()

# define an AR6 land region mask for common 1x1 degree resolution data has been regridded to 
lon, lat = np.arange(-179.5, 180), np.arange(-89.5, 90)
mask = regionmask.defined_regions.ar6.land.mask(lon, lat) # grid cells encoded with AR6 region number

# define AR6 land regions to sum over; must be regionmask objects
regions = regionmask.defined_regions.ar6.land

# create DataFrame object to store regional aggregated variables
reg_df = pd.DataFrame()

# define column headers
col_names = ['land']
col_names = col_names + regions.abbrevs[: -2]

# add column headers to DataDrame object
reg_df = reg_df.reindex(columns = col_names)

# create a list of the various DataArray objects
data_vars = pop_ds.data_vars.values()

# loop over each DataArray object; the population count variables
for data_arr in data_vars:
          
    # add a row to DataFrame object for given heat exposure variable
    row_name = data_arr.name
    reg_df = reg_df.append(pd.Series(name = row_name, dtype = 'float64'))
    
    # loop over each AR6 land region; last two are Antartica land regions so dissmiss 
    for region_id, region_abbrev in zip(regions.numbers[: -2], regions.abbrevs[: -2]):

        # apply region mask; sets all cells outside regions to nan
        region_data = data_arr.where(mask == region_id)
            
        # sum up all values within region; populate DataFrame object
        col_name = region_abbrev 
        reg_df[col_name][row_name] = region_data.sum(skipna = True)
        
    # compute the global sum of the population count variable
    col_name = 'land'
    global_land_data = data_arr.where(mask.fillna(-999) != -999) # sets all non-land cells to nan
    reg_df[col_name][row_name] =  global_land_data.sum(skipna = True)
    
    # close open Dataset and/or DataArray objects
    data_arr.close(), global_land_data.close()
    
# save DataFrame object using pickle; deconstructs and reconstucts data to save space
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/regional_total_pop_counts.pickle'
with open(save_pw, 'wb') as f:
    pickle.dump(reg_df, f) 
print(f'File saved: {save_pw.split("/")[-1]}')

# close open Dataset and/or DataArray objects
pop_ds.close()

reg_df

File saved: regional_total_pop_counts.pickle


Unnamed: 0,land,GIC,NWN,NEN,WNA,CNA,ENA,NCA,SCA,CAR,...,TIB,EAS,ARP,SAS,SEA,NAU,CAU,EAU,SAU,NZ
ssp1_mean_totalc,7597624000.0,633178.25,8193486.0,1241266.0,78842020.0,83561240.0,308571500.0,112792600.0,83870960.0,31479040.0,...,135548400.0,963586400.0,130306200.0,1466203000.0,531088700.0,2065800.0,256812.0,19764160.0,21039540.0,6570432.0
ssp1_mean_totalc_preind_anom,6186003000.0,546802.398898,7980712.0,1137629.0,77377500.0,73962960.0,269167900.0,104962500.0,77604460.0,26440140.0,...,105767100.0,540327400.0,124479100.0,1194688000.0,469747900.0,1891095.0,215223.269703,19008670.0,19747190.0,6052179.0
ssp2_mean_totalc,9247088000.0,618557.25,8000941.0,1205151.0,76768760.0,81169610.0,300286900.0,134265500.0,112556600.0,37818920.0,...,181053200.0,1044284000.0,173571400.0,1883793000.0,641483500.0,2281752.0,257881.25,19806340.0,21086600.0,6479892.0
ssp2_mean_totalc_preind_anom,7835467000.0,532181.398898,7788166.0,1101514.0,75304240.0,71571330.0,260883300.0,126435400.0,106290100.0,32780030.0,...,151271900.0,621025200.0,167744300.0,1612278000.0,580142700.0,2107047.0,216292.519703,19050840.0,19794250.0,5961638.0
ssp3_mean_totalc,11860840000.0,425971.75,4638194.0,632319.5,49671590.0,51303560.0,191161500.0,170971100.0,178875400.0,57110080.0,...,273130000.0,1188758000.0,233645300.0,2692948000.0,830522000.0,2130870.0,151859.5,12027630.0,12670280.0,4308614.0
ssp3_mean_totalc_preind_anom,10449220000.0,339595.898898,4425420.0,528682.4,48207070.0,41705280.0,151758000.0,163141000.0,172608900.0,52071190.0,...,243348700.0,765498900.0,227818200.0,2421433000.0,769181200.0,1956166.0,110270.769703,11272140.0,11377930.0,3790361.0
ssp5_mean_totalc,7952031000.0,899636.75,12230700.0,1985265.0,109978600.0,119238000.0,436469400.0,118717300.0,73824780.0,26891080.0,...,133949400.0,1002685000.0,139615000.0,1448254000.0,522100100.0,2622708.0,412364.25,28942030.0,31063380.0,9116246.0
ssp5_mean_totalc_preind_anom,6540409000.0,813260.898898,12017930.0,1881628.0,108514100.0,109639700.0,397065900.0,110887200.0,67558280.0,21852190.0,...,104168100.0,579425700.0,133787900.0,1176738000.0,460759300.0,2448003.0,370775.519703,28186530.0,29771030.0,8597992.0
preind_mean_totalc,1411621000.0,86375.851102,212774.7,103637.1,1464524.0,9598287.0,39403540.0,7830114.0,6266506.0,5038894.0,...,29781340.0,423259000.0,5827080.0,271515100.0,61340810.0,174704.8,41588.730297,755496.8,1292348.0,518253.8
current_mean_totalc,5660680000.0,325887.736405,4491506.0,796397.3,42086330.0,49879620.0,180517300.0,82204800.0,61111910.0,34376430.0,...,103466500.0,1331027000.0,41635530.0,1064937000.0,441949800.0,1191031.0,199719.232511,8792398.0,9051850.0,3692203.0


### Table View of Regional Population Count

In [3]:
# load data weighted regional means data
pc_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/regional_total_pop_counts.pickle'
unpickle = open(pc_pw, 'rb')
pc_df = pickle.load(unpickle)

# create a DataFrame object with Regions and periods column
col_names = ['region', 'pre_ind', 'current', 'ssp1', 'ssp2', 'ssp3', 'ssp5', 
             'current_anom', 'ssp1_anom', 'ssp2_anom', 'ssp3_anom', 'ssp5_anom',
             'current_pct', 'ssp1_pct', 'ssp2_pct', 'ssp3_pct', 'ssp5_pct']
df = pd.DataFrame()
df = df.reindex(columns = col_names)

# define region names; excluding the two Antartica land regions
region_names = ['land'] + regionmask.defined_regions.ar6.land.abbrevs[: -2]

# extract absolute and percentage increase for each region
for i, region in enumerate(region_names):
    
    # restrict population DataFrame object to given region
    reg_data = pc_df[f'{region}']
    
    # extract absolute values
    pre_ind = reg_data['preind_mean_totalc']
    current = reg_data['current_mean_totalc']
    ssp1 = reg_data['ssp1_mean_totalc']
    ssp2 = reg_data['ssp2_mean_totalc']
    ssp3 = reg_data['ssp3_mean_totalc']
    ssp5 = reg_data['ssp5_mean_totalc']
    
    # extract pre-industrial anomaly values
    current_anom = reg_data['current_mean_totalc_preind_anom']
    ssp1_anom = reg_data['ssp1_mean_totalc_preind_anom']
    ssp2_anom = reg_data['ssp2_mean_totalc_preind_anom']
    ssp3_anom = reg_data['ssp3_mean_totalc_preind_anom']
    ssp5_anom = reg_data['ssp5_mean_totalc_preind_anom']
    
    # calculate percentage change of anomaly relative to pre-industrial value
    current_pct = (current_anom / pre_ind) * 100
    ssp1_pct = (ssp1_anom / pre_ind) * 100 
    ssp2_pct = (ssp2_anom / pre_ind) * 100
    ssp3_pct = (ssp3_anom / pre_ind) * 100
    ssp5_pct = (ssp5_anom / pre_ind) * 100
        
    # from Dictionary object holding a given region's data; quote values as multiples of a million
    latex_bit = '\multirow{2}{*}{'
    data = {'region': latex_bit + f'{region}' + '}',
            'pre_ind': latex_bit + f'{round(pre_ind / 1e6, 2)}' + '}',
            'current': latex_bit + f'{round(current / 1e6, 2)}' + '}',
            'current_anom': round(current_anom / 1e6, 2),
            'ssp1': latex_bit + f'{round(ssp1 / 1e6, 2)}' + '}', 
            'ssp1_anom': round(ssp1_anom / 1e6, 2),
            'ssp2': latex_bit + f'{round(ssp2 / 1e6, 2)}' + '}',
            'ssp2_anom': round(ssp2_anom / 1e6, 2),
            'ssp3': latex_bit + f'{round(ssp3 / 1e6, 2)}' +'}',
            'ssp3_anom': round(ssp3_anom / 1e6, 2),
            'ssp5': latex_bit + f'{round(ssp5 / 1e6, 2)}' + '}',
            'ssp5_anom': round(ssp5_anom / 1e6, 2),
            'current_pct': f'({round(current_pct, 1)}\%)',
            'ssp1_pct': f'({round(ssp1_pct, 1)}\%)', 'ssp2_pct': f'({round(ssp2_pct, 1)}\%)',
            'ssp3_pct': f'({round(ssp3_pct, 1)}\%)', 'ssp5_pct': f'({round(ssp5_pct, 1)}\%)'}
    
    # add data as new entry to DataFrame object
    df = df.append(data, ignore_index = True, sort = False)

df

Unnamed: 0,region,pre_ind,current,ssp1,ssp2,ssp3,ssp5,current_anom,ssp1_anom,ssp2_anom,ssp3_anom,ssp5_anom,current_pct,ssp1_pct,ssp2_pct,ssp3_pct,ssp5_pct
0,\multirow{2}{*}{land},\multirow{2}{*}{1411.62},\multirow{2}{*}{5660.68},\multirow{2}{*}{7597.62},\multirow{2}{*}{9247.09},\multirow{2}{*}{11860.84},\multirow{2}{*}{7952.03},4249.06,6186.0,7835.47,10449.22,6540.41,(301.0\%),(438.2\%),(555.1\%),(740.2\%),(463.3\%)
1,\multirow{2}{*}{GIC},\multirow{2}{*}{0.09},\multirow{2}{*}{0.33},\multirow{2}{*}{0.63},\multirow{2}{*}{0.62},\multirow{2}{*}{0.43},\multirow{2}{*}{0.9},0.24,0.55,0.53,0.34,0.81,(277.3\%),(633.1\%),(616.1\%),(393.2\%),(941.5\%)
2,\multirow{2}{*}{NWN},\multirow{2}{*}{0.21},\multirow{2}{*}{4.49},\multirow{2}{*}{8.19},\multirow{2}{*}{8.0},\multirow{2}{*}{4.64},\multirow{2}{*}{12.23},4.28,7.98,7.79,4.43,12.02,(2010.9\%),(3750.8\%),(3660.3\%),(2079.9\%),(5648.2\%)
3,\multirow{2}{*}{NEN},\multirow{2}{*}{0.1},\multirow{2}{*}{0.8},\multirow{2}{*}{1.24},\multirow{2}{*}{1.21},\multirow{2}{*}{0.63},\multirow{2}{*}{1.99},0.69,1.14,1.1,0.53,1.88,(668.4\%),(1097.7\%),(1062.9\%),(510.1\%),(1815.6\%)
4,\multirow{2}{*}{WNA},\multirow{2}{*}{1.46},\multirow{2}{*}{42.09},\multirow{2}{*}{78.84},\multirow{2}{*}{76.77},\multirow{2}{*}{49.67},\multirow{2}{*}{109.98},40.62,77.38,75.3,48.21,108.51,(2773.7\%),(5283.5\%),(5141.9\%),(3291.7\%),(7409.5\%)
5,\multirow{2}{*}{CNA},\multirow{2}{*}{9.6},\multirow{2}{*}{49.88},\multirow{2}{*}{83.56},\multirow{2}{*}{81.17},\multirow{2}{*}{51.3},\multirow{2}{*}{119.24},40.28,73.96,71.57,41.71,109.64,(419.7\%),(770.6\%),(745.7\%),(434.5\%),(1142.3\%)
6,\multirow{2}{*}{ENA},\multirow{2}{*}{39.4},\multirow{2}{*}{180.52},\multirow{2}{*}{308.57},\multirow{2}{*}{300.29},\multirow{2}{*}{191.16},\multirow{2}{*}{436.47},141.11,269.17,260.88,151.76,397.07,(358.1\%),(683.1\%),(662.1\%),(385.1\%),(1007.7\%)
7,\multirow{2}{*}{NCA},\multirow{2}{*}{7.83},\multirow{2}{*}{82.2},\multirow{2}{*}{112.79},\multirow{2}{*}{134.27},\multirow{2}{*}{170.97},\multirow{2}{*}{118.72},74.37,104.96,126.44,163.14,110.89,(949.9\%),(1340.5\%),(1614.7\%),(2083.5\%),(1416.2\%)
8,\multirow{2}{*}{SCA},\multirow{2}{*}{6.27},\multirow{2}{*}{61.11},\multirow{2}{*}{83.87},\multirow{2}{*}{112.56},\multirow{2}{*}{178.88},\multirow{2}{*}{73.82},54.85,77.6,106.29,172.61,67.56,(875.2\%),(1238.4\%),(1696.2\%),(2754.5\%),(1078.1\%)
9,\multirow{2}{*}{CAR},\multirow{2}{*}{5.04},\multirow{2}{*}{34.38},\multirow{2}{*}{31.48},\multirow{2}{*}{37.82},\multirow{2}{*}{57.11},\multirow{2}{*}{26.89},29.34,26.44,32.78,52.07,21.85,(582.2\%),(524.7\%),(650.5\%),(1033.4\%),(433.7\%)


In [4]:
df.iloc[[41, 16]]

Unnamed: 0,region,pre_ind,current,ssp1,ssp2,ssp3,ssp5,current_anom,ssp1_anom,ssp2_anom,ssp3_anom,ssp5_anom,current_pct,ssp1_pct,ssp2_pct,ssp3_pct,ssp5_pct
41,\multirow{2}{*}{CAU},\multirow{2}{*}{0.04},\multirow{2}{*}{0.2},\multirow{2}{*}{0.26},\multirow{2}{*}{0.26},\multirow{2}{*}{0.15},\multirow{2}{*}{0.41},0.16,0.22,0.22,0.11,0.37,(380.2\%),(517.5\%),(520.1\%),(265.1\%),(891.5\%)
16,\multirow{2}{*}{SSA},\multirow{2}{*}{0.02},\multirow{2}{*}{0.91},\multirow{2}{*}{0.55},\multirow{2}{*}{0.69},\multirow{2}{*}{0.97},\multirow{2}{*}{0.54},0.89,0.53,0.67,0.96,0.52,(5690.3\%),(3398.2\%),(4290.9\%),(6105.2\%),(3338.2\%)


In [74]:
regions = regionmask.defined_regions.ar6.land
regions['CNA']

Region: C.North-America (CNA / 4)
center: [-96.96601942  39.5433657 ]

### NCAR-CIDR Population Density

As the NCAR-CIDR population projections do not contain a direct density projection, this will be computed indirectly using the total population count projection and the area (km$^2$) of each grid cell. Both these variables will be in a 1.0 x 1.0 degree resolution.

Metacode:

1. Obtain the area in km$^2$ of each grid cell for a 1.0 x 1.0 degree resolution global grid.
3. Load the 1.0 x 1.0 resolution NCAR-CIDR population data.
4. Divide the total population count for each cell by its area.
8. Repeat above for each SSP scenario in the NCAR-CIDR data.
9. Save modified Dataset as a new file; do not overwrite original file. 

#### Surface Area of Cell

In [9]:
# compute cell size data for 1.0 x 1.0 degree resolution grid; interpolate the 1.975 x 0.625 UKESM1 grid
ukesm1_pw = '/data/CMIP/CMIP6/CMIP/MOHC/UKESM1-0-LL/piControl/r1i1p1f2/fx/areacella/gn/v20190705' + \
            '/areacella_fx_UKESM1-0-LL_piControl_r1i1p1f2_gn.nc'
x = xr.open_dataset(ukesm1_pw, decode_times = False)
grd_in = x.areacella * 0.000001
x.close()

# create an empty Dataset object of the desired grid resolution; 1.0 x 1.0 longitude-latitude in this case
des_res = 1.0
grd_out_lon = np.arange(-180 + (des_res / 2), 180 + (des_res / 2), des_res)
grd_out_lat = np.arange(-90 + (des_res / 2), 90 + (des_res / 2), des_res) # avoid centre points on poles

# define grid_out Dataset object
grd_out = xr.Dataset({'lon': (['lon'], grd_out_lon),
                      'lat': (['lat'], grd_out_lat)})

# compute the regridder file containing the weights to apply
regridder = xe.Regridder(grd_in, grd_out,
                         method = 'bilinear', # interpolation method
                         periodic = True) # required for global girds; prevents blank data on meridian

# apply the weighting matrix to transform the data to the new resolution
rg_data = regridder(grd_in)
    
# clear regridder file from being saved
regridder.clean_weight_file()

# define some atrributes for the new resolution Dataset object
rg_data.attrs['long_title'] = 'spatial cell surface area'
rg_data.attrs['original_data_from'], rg_data.attrs['created_on'] = 'UKESM1 grid cell area', time.ctime()
rg_data.attrs['resolution'], rg_data.attrs['regridded'] = f'lonxlat: {des_res}x{des_res} degrees', 'True'
   
# save regridded Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/area_cell_1x1_res.nc'
rg_data.to_netcdf(save_pw, mode = 'w')
print(f'File saved: {save_pw.rsplit("/")[-1]}')
    
# close remaining open Datasets
rg_data.close(), grd_in.close(), grd_out.close()

Create weight file: bilinear_144x192_180x360_peri.nc
Remove file bilinear_144x192_180x360_peri.nc
File saved: area_cell_1x1_res.nc


(None, None, None)

#### NCAR-CIDR Population Density Computation

In [44]:
# load in NCAR-CIDR projection and cell surface area data in 1x1 degree longitude-latitude resolution
ncar_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_variables_1x1_res_with_anom.nc'
area_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/area_cell_1x1_res.nc'
ncar_ds, area_ds = xr.open_dataset(ncar_pw), xr.open_dataset(area_pw).areacella

# create a Dataset object to populated with population density projections
pd_ds = xr.Dataset(coords = {'lon': ncar_ds.lon, 'lat': ncar_ds.lat})
pd_ds.attrs['long_title'], pd_ds.attrs['source'] = 'spatial mean population density', 'NCAR-CIDR'
pd_ds.attrs['resolution'], pd_ds.attrs['created_on'] = 'lon-lat: 1.0x1.0 degrees', time.ctime()

# compute the total, rural, and urban population density for each SSP scenario
scenarios = ['ssp1', 'ssp2', 'ssp3', 'ssp5']
for ssp in scenarios:
    x = ssp + '_mean_'
    pd_ds[f'{x}total_pd'] = ncar_ds[f'{x}totalc'] / area_ds
    pd_ds[f'{x}urbanc_pd'] = ncar_ds[f'{x}urbanc'] / area_ds
    pd_ds[f'{x}ruralc_pd'] = ncar_ds[f'{x}ruralc'] / area_ds
    
# save resulting Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/Population/processed/ncar_pop_density_1x1_res.nc'
pd_ds.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object
pd_ds.close(), ncar_ds.close(), area_ds.close()

File saved: ncar_pop_density_1x1_res.nc


(None, None, None)