#### **Find the emission pathways that caused the change in air quality over 2015-2017 in China**
- Per gridcell (emulator).  
- Remove the inputs that don't predict the correct change.  

##### **Steps**

*Calculated on HPC using `find_emissions_that_caused_air_quality_change.py` and `find_emissions_that_caused_air_quality_change.bash`.*

1. Load observations for 1 station.  
2. Find the change in measured PM2.5 (annual-mean) and O3 (6mDM8h) concentrations for this location over 2015-2017.  
3. Filter through predictions of all emission configurations for this location. 
4. Keep emission configurations where the prediction matchs (within 1%) the measured change in PM2.5/O3 concentrations.  

*Calculated in this notebook.*

5. Split by region.  
6. Compare to bottom-up estimates.  

___

In [None]:
import glob
import tabula
import joblib
from itertools import islice
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import re
import itertools
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import gridspec
params = {
    'text.latex.preamble': ['\\usepackage{gensymb}'],
    'axes.grid': False,
    'savefig.dpi': 700,
    'font.size': 12,
    'text.usetex': True,
    'figure.figsize': [5, 5],
    'font.family': 'serif',
}
matplotlib.rcParams.update(params)

___

In [None]:
### bottom up emissions 2015-2017 - Zheng et al., 2018 ACP
df = tabula.read_pdf('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/zheng2018.pdf', pages=7)
df.drop(columns=['Unnamed: 2', 'Unnamed: 4', 'Unnamed: 9', 'c'], inplace=True)
df.columns = ['sector', 'so2', 'nox', 'nmvoc', 'nh3', 'co', 'tsp', 'pm10', 'pm25', 'bc', 'oc', 'co2']

df_2010 = df.iloc[0:7].copy()
df_2011 = df.iloc[7:14].copy()
df_2012 = df.iloc[14:21].copy()
df_2013 = df.iloc[21:28].copy()
df_2014 = df.iloc[28:35].copy()
df_2015 = df.iloc[35:42].copy()
df_2016 = df.iloc[42:49].copy()
df_2017 = df.iloc[49:56].copy()

df_2010.set_index('sector', inplace=True)
df_2011.set_index('sector', inplace=True)
df_2012.set_index('sector', inplace=True)
df_2013.set_index('sector', inplace=True)
df_2014.set_index('sector', inplace=True)
df_2015.set_index('sector', inplace=True)
df_2016.set_index('sector', inplace=True)
df_2017.set_index('sector', inplace=True)

df_2010 = df_2010.astype('float32').copy()
df_2011 = df_2011.astype('float32').copy()
df_2012 = df_2012.astype('float32').copy()
df_2013 = df_2013.astype('float32').copy()
df_2014 = df_2014.astype('float32').copy()
df_2015 = df_2015.astype('float32').copy()
df_2016 = df_2016.astype('float32').copy()
df_2017 = df_2017.astype('float32').copy()

df_diff = ((100 * df_2017 / df_2015) - 100).copy()
df_diff.drop(['2015', '2017'], inplace=True)
df_diff

___

In [None]:
obs_files = glob.glob('/nfs/a68/earlacoa/china_measurements_corrected/*.nc')

df_obs = pd.read_csv(
    '/nfs/a68/earlacoa/china_measurements_corrected/df_obs_o3_6mDM8h_ppb_PM2_5_DRY.csv',
    index_col='datetime',
    parse_dates=True
)
df_obs

In [None]:
outputs = ['o3_6mDM8h_ppb', 'PM2_5_DRY']

path_predictions = '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/predictions'

matrix_stacked = np.array(np.meshgrid(
    np.linspace(0.3, 1.3, 6), # np.linspace(0.2, 1.3, 12) for 10% intervals
    np.linspace(0.3, 1.3, 6), # np.linspace(0.3, 1.3, 6) for 20% intervals
    np.linspace(0.3, 1.3, 6), # removing edges of parameter space 0.0, 0.1, 1.4, 1.5
    np.linspace(0.3, 1.3, 6),
    np.linspace(0.3, 1.3, 6)
)).T.reshape(-1, 5)

obs_change_abs = {}
obs_change_per = {}
baselines = {}
targets = {}
station_diffs_abs = {}
station_diffs_per = {}

In [None]:
for output in outputs:
    for obs_file in obs_files:
        station_id = obs_file[47:-3]
        lat = df_obs.loc[df_obs.station_id == station_id].station_lat.unique()[0]
        lon = df_obs.loc[df_obs.station_id == station_id].station_lon.unique()[0]
        
        change_per = 100 * ((df_obs.loc[df_obs.station_id == station_id][output]['2017'].values[0] / \
                             df_obs.loc[df_obs.station_id == station_id][output]['2015'].values[0]) - 1)
        change_abs = df_obs.loc[df_obs.station_id == station_id][output]['2017'].values[0] - \
                     df_obs.loc[df_obs.station_id == station_id][output]['2015'].values[0]

        obs_change_abs.update({f'{station_id}_{output}': change_abs})
        obs_change_per.update({f'{station_id}_{output}': change_per})

        if output == 'o3_6mDM8h_ppb':
            emulator_output = 'o3_6mDM8h'
        else:
            emulator_output = output
            
        with xr.open_dataset(
            f'{path_predictions}/{emulator_output}/ds_RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0_{emulator_output}_popgrid_0.25deg.nc'
        )[emulator_output] as ds:
            baseline = ds.sel(lat=lat, method='nearest').sel(lon=lon, method='nearest').values
                    
        baselines.update({f'{station_id}_{output}': baseline})

        target_abs = baseline + change_abs
        target_per = baseline * (1 + (change_per / 100))
        target = np.mean([target_abs, target_per])
        targets.update({f'{station_id}_{output}': target})
        
        target_diffs_abs = {}
        target_diffs_per = {}
        
        for matrix in matrix_stacked:
            inputs = matrix.reshape(-1, 5)        
            filename = f'RES{inputs[0][0]:.1f}_IND{inputs[0][1]:.1f}_TRA{inputs[0][2]:.1f}_AGR{inputs[0][3]:.1f}_ENE{inputs[0][4]:.1f}'
            with xr.open_dataset(
                f'{path_predictions}/{emulator_output}/ds_{filename}_{emulator_output}_popgrid_0.25deg.nc'
            )[emulator_output] as ds:
                prediction = ds.sel(lat=lat, method='nearest').sel(lon=lon, method='nearest').values

            target_diff_abs = targets[f'{station_id}_{output}'] - prediction
            target_diff_per = (100 * (prediction / targets[f'{station_id}_{output}'])) - 100
            
            if abs(target_diff_per) < 1: # +/- 1% of target
                target_diffs_abs.update({filename: target_diff_abs})
                target_diffs_per.update({filename: target_diff_per})

        station_diffs_abs.update({f'{station_id}_{output}': target_diffs_abs})
        station_diffs_per.update({f'{station_id}_{output}': target_diffs_per})

In [None]:
# for key in [key for key in targets.keys()]:
#     if np.isnan(targets[key]):
#         del targets[key]

In [None]:
target_diffs = {}
for key in [key for key in targets.keys()]:
    target_diffs.update({key: targets[key] - baselines[key]})

In [None]:
# for key in list(set([key for key in emulators.keys()]) - set([key for key in targets.keys()])):
#     del emulators[key]

In [None]:
keys = [list(station_diffs_per[station].keys())for station in station_diffs_per.keys()]
keys_flatten = [item for sublist in keys for item in sublist]
keys_unique = {}
for key in keys_flatten:
    if key not in keys_unique:
        keys_unique.update({key: 1})
    elif key in keys_unique:
        keys_unique.update({key: keys_unique[key] + 1})

In [None]:
path = '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/find_emissions_that_match_change_air_quality/2015-2017_20percentintervals'

joblib.dump(targets, f'{path}/targets.joblib')
joblib.dump(baselines, f'{path}/baselines.joblib')
joblib.dump(target_diffs, f'{path}/target_diffs.joblib')
joblib.dump(obs_change_abs, f'{path}/obs_change_abs.joblib')
joblib.dump(obs_change_per, f'{path}/obs_change_per.joblib')
joblib.dump(keys_unique, f'{path}/keys_unique.joblib')
joblib.dump(station_diffs_per, f'{path}/station_diffs_per_1percent.joblib')
joblib.dump(station_diffs_abs, f'{path}/station_diffs_abs_1percent.joblib')

___

In [None]:
path = '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/find_emissions_that_match_change_air_quality'

files_targets = sorted(glob.glob(f'{path}/2015-2017_20percentintervals/targets*.joblib'))
files_baselines = sorted(glob.glob(f'{path}/2015-2017_20percentintervals/baselines*.joblib'))
files_target_diffs = sorted(glob.glob(f'{path}/2015-2017_20percentintervals/target_diffs*.joblib'))
files_station_diffs_per = sorted(glob.glob(f'{path}/2015-2017_20percentintervals/station_diffs_per*.joblib'))
files_station_diffs_abs = sorted(glob.glob(f'{path}/2015-2017_20percentintervals/station_diffs_abs*.joblib'))

In [None]:
targets = {}
baselines = {}
target_diffs = {}
station_diffs_per = {}
station_diffs_abs = {}

for index in range(len(files_targets)):
    if 'PM2_5_DRY' in files_target_diffs[index]:
        output = 'PM2_5_DRY'
    elif 'o3_6mDM8h' in files_target_diffs[index]:
        output = 'o3_6mDM8h'
        
    station_id = files_target_diffs[index][-12:-7]
    
    target = joblib.load(files_targets[index])
    baseline = joblib.load(files_baselines[index])
    target_diff = joblib.load(files_target_diffs[index])
    station_diff_per = joblib.load(files_station_diffs_per[index])
    station_diff_abs = joblib.load(files_station_diffs_abs[index])
    
    targets.update({f'{station_id}_{output}': target[[key for key in target.keys()][0]]})
    baselines.update({f'{station_id}_{output}': baseline[[key for key in baseline.keys()][0]]})
    target_diffs.update({f'{station_id}_{output}': target_diff[[key for key in target_diff.keys()][0]]})
    station_diffs_per.update({f'{station_id}_{output}': station_diff_per[[key for key in station_diff_per.keys()][0]]})
    station_diffs_abs.update({f'{station_id}_{output}': station_diff_abs[[key for key in station_diff_abs.keys()][0]]})

In [None]:
#targets
#baselines
#target_diffs
#station_diffs_per
#station_diffs_abs

In [None]:
joblib.dump(targets, f'{path}/2015-2017_20percentintervals/targets.joblib')
joblib.dump(baselines, f'{path}/2015-2017_20percentintervals/baselines.joblib')
joblib.dump(target_diffs, f'{path}/2015-2017_20percentintervals/target_diffs.joblib')
joblib.dump(station_diffs_per, f'{path}/2015-2017_20percentintervals/station_diffs_per.joblib')
joblib.dump(station_diffs_abs, f'{path}/2015-2017_20percentintervals/station_diffs_abs.joblib')

___

In [None]:
path = '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/find_emissions_that_match_change_air_quality'

targets = joblib.load(f'{path}/2015-2017_20percentintervals/targets.joblib')
baselines = joblib.load(f'{path}/2015-2017_20percentintervals/baselines.joblib')
target_diffs = joblib.load(f'{path}/2015-2017_20percentintervals/target_diffs.joblib')
station_diffs_per = joblib.load(f'{path}/2015-2017_20percentintervals/station_diffs_per.joblib')
station_diffs_abs = joblib.load(f'{path}/2015-2017_20percentintervals/station_diffs_abs.joblib')

In [None]:
regional_provinces = {
    'Beijing': 'North China',
    'Tianjin': 'North China',
    'Hebei': 'North China',
    'Shanxi': 'North China',
    'Nei Mongol': 'North China',
    'Liaoning': 'North East China',
    'Jilin': 'North East China',
    'Heilongjiang': 'North East China',
    'Shanghai': 'East China',
    'Jiangsu': 'East China',
    'Zhejiang': 'East China', 
    'Anhui': 'East China', 
    'Fujian': 'East China', 
    'Jiangxi': 'East China', 
    'Shandong': 'East China',
    'Taiwan': 'East China',
    'Henan': 'South Central China',
    'Hubei': 'South Central China',
    'Hunan': 'South Central China',
    'Guangdong': 'South Central China',
    'Guangxi': 'South Central China',
    'Hainan': 'South Central China',
    'Hong Kong': 'South Central China',
    'Macao': 'South Central China',
    'Chongqing': 'South West China',
    'Sichuan': 'South West China',
    'Guizhou': 'South West China',
    'Yunnan': 'South West China',
    'Xizang': 'South West China',
    'Shaanxi': 'North West China',
    'Gansu': 'North West China', 
    'Qinghai': 'North West China',
    'Ningxia Hui': 'North West China',
    'Xinjiang Uygur': 'North West China'
}

gba_prefectures = ['Dongguan', 'Foshan', 'Guangzhou', 'Huizhou', 'Jiangmen', 'Shenzhen', 'Zhaoqing', 'Zhongshan', 'Zhuhai', 'Hong Kong', 'Macao']

In [None]:
gdf_prefectures_china = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/gadm36_CHN_3.shp')
gdf_prefectures_hongkong = gpd.read_file('/nfs/a68/earlacoa/shapefiles/hongkong/gadm36_HKG_1.shp')
gdf_prefectures_macao = gpd.read_file('/nfs/a68/earlacoa/shapefiles/macao/gadm36_MAC_2.shp')

list_prefectures_gba = []
for gba_prefecture in gba_prefectures:
    list_prefectures_gba.append(gdf_prefectures_china.loc[gdf_prefectures_china.NAME_2 == gba_prefecture])


list_prefectures_gba.append(gdf_prefectures_hongkong)
list_prefectures_gba.append(gdf_prefectures_macao)
    
gdf_prefectures_gba = pd.concat(list_prefectures_gba)

In [None]:
prefectures = {
    'North China': [],
    'North East China': [],
    'East China': [],
    'South Central China': [],
    'South West China': [],
    'North West China': [],
}
for prefecture in gdf_prefectures_china.NAME_2.unique():
    province = gdf_prefectures_china.loc[gdf_prefectures_china.NAME_2 == prefecture].NAME_1.unique()[0]
    region = regional_provinces[province]
    prefectures[region].append(prefecture)
    

for region, prefecture_list in prefectures.items():
    prefecture_list = list(set(prefecture_list))
    prefectures.update({region: prefecture_list})

In [None]:
regional_prefectures = {}
for region, prefecture_list in prefectures.items():
    prefecture_list = list(set(prefecture_list))
    for prefecture in prefecture_list:
        regional_prefectures.update({prefecture: region})

In [None]:
gdf_china_north = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_north.shp')
gdf_china_north_east = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_north_east.shp')
gdf_china_east = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_east.shp')
gdf_china_south_central = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_south_central.shp')
gdf_china_south_west = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_south_west.shp')
gdf_china_north_west = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/CHN_north_west.shp')

In [None]:
from shapely.geometry import Point

In [None]:
obs_lats = {}
obs_lons = {}
obs_prefectures = {}
obs_regions = {}
obs_gba = {}
  
for obs_file in obs_files:
    with xr.open_dataset(obs_file) as ds:
        station_id = ds.station
        if '.nc' in station_id:
            station_id = station_id[:-3]
            
        obs_lats.update({station_id: ds.lat})
        obs_lons.update({station_id: ds.lon})
        obs_prefectures.update({station_id: ds.city})
        
        if gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_north).values[0]:
            region = 'North China'
        elif gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_north_east).values[0]:
            region = 'North East China'
        elif gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_east).values[0]:
            region = 'East China'
        elif gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_south_central).values[0]:
            region = 'South Central China'
        elif gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_south_west).values[0]:
            region = 'South West China'
        elif gpd.GeoSeries(Point(ds.lon, ds.lat)).within(gdf_china_north_west).values[0]:
            region = 'North West China'
            
        obs_regions.update({station_id: region})
        if (ds.city in gba_prefectures) or (ds.city == 'Hong Kong') or (ds.city == 'Macao'):
            obs_gba.update({station_id: True})
        else:
            obs_gba.update({station_id: False})

In [None]:
# obs_lats = {}
# obs_lons = {}
# obs_prefectures = {}
# obs_regions = {}
# obs_gba = {}
  
# for obs_file in obs_files:
#     with xr.open_dataset(obs_file) as ds:
#         station_id = ds.station
#         if '.nc' in station_id:
#             station_id = station_id[:-3]
            
#         obs_lats.update({station_id: ds.lat})
#         obs_lons.update({station_id: ds.lon})
#         obs_prefectures.update({station_id: ds.city})
#         if ds.city == 'Lhasa Googut':
#             region = regional_prefectures['Lhasa']
#         elif ds.city == 'Huaian':
#             region = regional_prefectures["Huai'an"]
#         elif ds.city == 'Urumqi':
#             region = 'North West China'
#         elif ds.city == 'Ordos City':
#             region = regional_prefectures["Ordos"]
#         elif ds.city == 'Yingkou':
#             region = 'North East China'
#         elif ds.city == 'Sunlight':
#             region = 'East China'
#         elif ds.city == 'Iris':
#             region = 'South Central China'
#         elif ds.city == 'chicken':
#             region = 'South Central China'
#         elif ds.city == 'Mudan River':
#             region = 'North West China'
#         elif ds.city == 'Maanshan':
#             region = 'East China'
#         elif ds.city == 'North Sea':
#             region = 'South Central China'
#         elif ds.city == 'Yanan':
#             region = 'North China'
#         elif ds.city == 'Korla':
#             region = 'North West China'
#         elif ds.city == 'Shouguang':
#             region = 'East China'
#         elif ds.city == 'Zhang Qiu':
#             region = 'East China'
#         elif ds.city == 'Jimo':
#             region = 'East China'
#         elif ds.city == 'Jiaonan':
#             region = 'East China'
#         elif ds.city == 'Jiaozhou':
#             region = 'East China'
#         elif ds.city == 'Laixi':
#             region = 'East China'
#         elif ds.city == 'Pingdu':
#             region = 'East China'
#         elif ds.city == 'Penglai':
#             region = 'East China'
#         else:
#             region = regional_prefectures[ds.city]
            
#         obs_regions.update({station_id: region})
#         if (ds.city in gba_prefectures) or (ds.city == 'Hong Kong') or (ds.city == 'Macao'):
#             obs_gba.update({station_id: True})
#         else:
#             obs_gba.update({station_id: False})

In [None]:
regions = ['China', 'GBA', 'North China', 'North East China', 'East China', 'South Central China', 'South West China', 'North West China']
region_stations = {key: [] for key in regions}

for station_id, station_region in obs_regions.items():
    region_stations['China'].append(station_id)
    region_stations[station_region].append(station_id)
    
    if obs_gba[station_id] == True:
        region_stations['GBA'].append(station_id)

In [None]:
regional_targets = {}
regional_baselines = {}
regional_target_diffs = {}
regional_station_diffs_per = {}
regional_station_diffs_abs = {}

for region in regions:
    regional_targets.update({region: dict((key, value) for key, value in targets.items() if key[0:5] in region_stations[region])})
    regional_baselines.update({region: dict((key, value) for key, value in baselines.items() if key[0:5] in region_stations[region])})
    regional_target_diffs.update({region: dict((key, value) for key, value in target_diffs.items() if key[0:5] in region_stations[region])})
    regional_station_diffs_per.update({region: dict((key, value) for key, value in station_diffs_per.items() if key[0:5] in region_stations[region])})
    regional_station_diffs_abs.update({region: dict((key, value) for key, value in station_diffs_abs.items() if key[0:5] in region_stations[region])})

In [None]:
outputs = ['PM2_5_DRY', 'o3_6mDM8h', 'combined']

regional_keys_unique_sorted = {}
regional_target_diffs_under0p5 = {}

for output in outputs:
    for region in regions:
        if output == 'combined':
            keys = [list(regional_station_diffs_per[region][stationid_output].keys()) for stationid_output in regional_station_diffs_per[region].keys()]
        else:
            keys = [list(regional_station_diffs_per[region][stationid_output].keys()) for stationid_output in regional_station_diffs_per[region].keys() if output in stationid_output]
            
        keys_flatten = [item for sublist in keys for item in sublist]

        keys_unique = {}
        for key in keys_flatten:
            if key not in keys_unique:
                keys_unique.update({key: 1})
            elif key in keys_unique:
                keys_unique.update({key: keys_unique[key] + 1})


        keys_unique_sorted = {key: value for key, value in sorted(keys_unique.items(), key=lambda item: item[1], reverse=True)}
        regional_keys_unique_sorted.update({f'{region}_{output}': keys_unique_sorted})

        target_diffs_under0p5 = {}
        for key, value in regional_target_diffs[region].items():
            if abs(value) < 0.5:
                target_diffs_under0p5.update({key: value})


        regional_target_diffs_under0p5.update({f'{region}_{output}': target_diffs_under0p5})

In [None]:
number_of_stations = {}
number_of_emission_configurations = {}
first_emission_configuration_keys = {}
second_emission_configuration_keys = {}
third_emission_configuration_keys = {}
first_emission_configuration_values = {}
second_emission_configuration_values = {}
third_emission_configuration_values = {}
number_of_stations_with_target_diff_under_0p5 = {}

for output in outputs:
    for region in regional_targets.keys():
        number_of_stations.update({f'{region}_{output}': len(regional_targets[region].keys())})
        number_of_emission_configurations.update({f'{region}_{output}': len(regional_keys_unique_sorted[f'{region}_{output}'].keys())})
        
        top3_emission_configurations = list(islice(regional_keys_unique_sorted[f'{region}_{output}'].items(), 3))
        
        first_emission_configuration_keys.update({f'{region}_{output}': top3_emission_configurations[0][0]})
        second_emission_configuration_keys.update({f'{region}_{output}': top3_emission_configurations[1][0]})
        third_emission_configuration_keys.update({f'{region}_{output}': top3_emission_configurations[2][0]})
        first_emission_configuration_values.update({f'{region}_{output}': top3_emission_configurations[0][1]})
        second_emission_configuration_values.update({f'{region}_{output}': top3_emission_configurations[1][1]})
        third_emission_configuration_values.update({f'{region}_{output}': top3_emission_configurations[2][1]})
        number_of_stations_with_target_diff_under_0p5.update({f'{region}_{output}': len(regional_target_diffs_under0p5[f'{region}_{output}'])})

In [None]:
df_regions = pd.concat([
    pd.Series(number_of_stations, name='Stations'),
    pd.Series(number_of_emission_configurations, name='Possible Emission Configurations'),
    pd.Series(first_emission_configuration_keys, name='First Emission Configuration - Key'),
    pd.Series(first_emission_configuration_values, name='First Emission Configuration - Value'),
    pd.Series(second_emission_configuration_keys, name='Second Emission Configuration - Key'),
    pd.Series(second_emission_configuration_values, name='Second Emission Configuration - Value'),
    pd.Series(third_emission_configuration_keys, name='Third Emission Configuration - Key'),
    pd.Series(third_emission_configuration_values, name='Third Emission Configuration - Value'),
    pd.Series(number_of_stations_with_target_diff_under_0p5, name='Stations with trend size under 0.5 ugm-3')
], axis=1)
df_regions

In [None]:
df_regions.to_csv(f'{path}/df_regions.csv')
joblib.dump(regional_keys_unique_sorted, f'{path}/regional_keys_unique_sorted.joblib')

In [None]:
df_regions = pd.read_csv(f'{path}/df_regions.csv')
regional_keys_unique_sorted = joblib.load(f'{path}/regional_keys_unique_sorted.joblib')

In [None]:
def make_boxplot(index, values, sector, bottomup):
    ax = fig.add_subplot(gs[index])
    ax.set_facecolor('whitesmoke')
    plt.ylim([0.0, 1.5])
    plt.yticks(np.arange(0, 1.75, 0.25))
    ax.set_yticklabels(np.arange(0, 175, 25))
    plt.yticks(fontsize=14)
    ax.axes.get_xaxis().set_visible(False)
    if index == 0:
        plt.ylabel('Emission change ({\%})', fontsize=14)
    else:
        ax.axes.get_yaxis().set_visible(False)
        
    plt.title(sector)
    color1 = '#c7eae5'
    color2 = '#01665e'
    plt.boxplot(
        values, 
        patch_artist=True,
        boxprops={'facecolor': color1, 'color': color2, 'linewidth': 1.5},
        capprops={'color': color2, 'linewidth': 1.5},
        whiskerprops={'color': color2, 'linewidth': 1.5},
        flierprops={'color': color2, 'markeredgecolor': color2, 'linewidth': 1.5},
        medianprops={'color': color2, 'linewidth': 1.5},
        showmeans=True,
        meanprops={'markeredgecolor': color2, 'color': color2},
        showfliers=False,
        whis=(5, 95),
        zorder=1
    )
    plt.scatter(1, bottomup, color='#8c510a', zorder=2, marker='*')
    plt.annotate(r'\textbf{(' + chr(97 + index) + ')}', xy=(0, 1.05), xycoords='axes fraction', fontsize=14, weight='bold')

In [None]:
zhang2018_bottomup20152017_allspecies_res = 1 + (df_diff.loc['Residential'][['so2', 'nox', 'nmvoc', 'nh3', 'co', 'pm25', 'bc', 'oc', 'pm10']].mean() / 100)
zhang2018_bottomup20152017_allspecies_ind = 1 + (df_diff.loc['Industry'][['so2', 'nox', 'nmvoc', 'nh3', 'co', 'pm25', 'bc', 'oc', 'pm10']].mean() / 100)
zhang2018_bottomup20152017_allspecies_tra = 1 + (df_diff.loc['Transportation'][['so2', 'nox', 'nmvoc', 'nh3', 'co', 'pm25', 'bc', 'oc', 'pm10']].mean() / 100)
zhang2018_bottomup20152017_allspecies_agr = 1 + (df_diff.loc['Agriculture'][['so2', 'nox', 'nmvoc', 'nh3', 'co', 'pm25', 'bc', 'oc', 'pm10']].mean() / 100)
zhang2018_bottomup20152017_allspecies_ene = 1 + (df_diff.loc['Power'][['so2', 'nox', 'nmvoc', 'nh3', 'co', 'pm25', 'bc', 'oc', 'pm10']].mean() / 100)

In [None]:
output = 'combined'

factors_all = {}

for region in regions:
    factors_res = {}
    factors_ind = {}
    factors_tra = {}
    factors_agr = {}
    factors_ene = {}
    for index, items in enumerate(list(itertools.islice(regional_keys_unique_sorted[f'{region}_{output}'].items(), 100))):
        factor_res, factor_ind, factor_tra, factor_agr, factor_ene = [float(item) for item in re.findall('\d+\.\d+',  items[0])]
        factors_res.update({index: factor_res})
        factors_ind.update({index: factor_ind})
        factors_tra.update({index: factor_tra})
        factors_agr.update({index: factor_agr})
        factors_ene.update({index: factor_ene})
        
    factors_all.update({f'{region}_RES': factors_res})
    factors_all.update({f'{region}_IND': factors_ind})
    factors_all.update({f'{region}_TRA': factors_tra})
    factors_all.update({f'{region}_AGR': factors_agr})
    factors_all.update({f'{region}_ENE': factors_ene})

In [None]:
fig = plt.figure(1, figsize=(15, 3))
gs = gridspec.GridSpec(1, 5)

region = 'China'

make_boxplot(0, factors_all[f'{region}_RES'].values(), 'RES', zhang2018_bottomup20152017_allspecies_res)
make_boxplot(1, factors_all[f'{region}_IND'].values(), 'IND', zhang2018_bottomup20152017_allspecies_ind)
make_boxplot(2, factors_all[f'{region}_TRA'].values(), 'TRA', zhang2018_bottomup20152017_allspecies_tra)
make_boxplot(3, factors_all[f'{region}_AGR'].values(), 'AGR', zhang2018_bottomup20152017_allspecies_agr)
make_boxplot(4, factors_all[f'{region}_ENE'].values(), 'ENE', zhang2018_bottomup20152017_allspecies_ene)

gs.tight_layout(fig, rect=[0, 0, 0.65, 0.85])

plt.annotate(r'\textbf{$\Delta$: Top-down from emulators}', xy=(-4.0, -0.15), xycoords='axes fraction', fontsize=14, color='#01665e')
plt.annotate(r'\textbf{$\star$: Bottom-up from Zheng et al., (2018)}', xy=(-2.0, -0.15), xycoords='axes fraction', fontsize=14, color='#8c510a')

#plt.savefig(f'/nfs/b0122/Users/earlacoa/png/paper_aia_emulator_annual/emission_factors_boxplot_top100_{region}_{output}.png', dpi=700, alpha=True, bbox_inches='tight')
#plt.savefig(f'/nfs/b0122/Users/earlacoa/png/paper_aia_emulator_annual/emission_factors_boxplot_top100_{region}_{output}.eps', format='eps', dpi=700, alpha=True, bbox_inches='tight')
plt.show()

In [None]:
baselines_mean = {}
targets_mean = {}
target_diffs_mean = {}

for output in ['PM2_5_DRY', 'o3_6mDM8h']:
    for region in regions:
        values = []
        for key, value in regional_baselines[region].items():
            if output in key and value is not np.nan:
                values.append(value)

        values = np.array(values)
        baselines_mean.update({f'{region}_{output}': np.nanmean(values)})

        values = []
        for key, value in regional_targets[region].items():
            if output in key and value is not np.nan:
                values.append(value)

        values = np.array(values)
        targets_mean.update({f'{region}_{output}': np.nanmean(values)})

        values = []
        for key, value in regional_target_diffs[region].items():
            if output in key and value is not np.nan:
                values.append(value)

        values = np.array(values)
        target_diffs_mean.update({f'{region}_{output}': np.nanmean(values)})

In [None]:
for output in ['PM2_5_DRY', 'o3_6mDM8h']:
    for region in regions:
        print(region, output)
        percent_change = (100 * ( baselines_mean[f'{region}_{output}'] + target_diffs_mean[f'{region}_{output}'] ) / baselines_mean[f'{region}_{output}']) - 100
        print(f"Baseline = {round(baselines_mean[f'{region}_{output}'], 1)}")
        print(f"Target = {round(targets_mean[f'{region}_{output}'], 1)}")
        #print(f"Absolute change = {round(target_diffs_mean[f'{region}_{output}'], 1)}")
        print(f"Percentage change = {round(percent_change, 1)} %")
        print()

In [None]:
print('RES', round(zhang2018_bottomup20152017_allspecies_res, 2))
print('IND', round(zhang2018_bottomup20152017_allspecies_ind, 2))
print('TRA', round(zhang2018_bottomup20152017_allspecies_tra, 2))
print('AGR', round(zhang2018_bottomup20152017_allspecies_agr, 2))
print('ENE', round(zhang2018_bottomup20152017_allspecies_ene, 2))

In [None]:
sectors = ['RES', 'IND', 'TRA', 'AGR', 'ENE']
color1 = '#c7eae5'
color2 = '#01665e'

for region in regions:
    print(region)
    for sector in sectors:
        bp = plt.boxplot(
            factors_all[f'{region}_{sector}'].values(), 
            patch_artist=True,
            boxprops={'facecolor': color1, 'color': color2, 'linewidth': 1.5},
            capprops={'color': color2, 'linewidth': 1.5},
            whiskerprops={'color': color2, 'linewidth': 1.5},
            flierprops={'color': color2, 'markeredgecolor': color2, 'linewidth': 1.5},
            medianprops={'color': color2, 'linewidth': 1.5},
            showmeans=True,
            meanprops={'markeredgecolor': color2, 'color': color2},
            showfliers=False,
            whis=(5, 95),
            zorder=1
        )
        print(sector, round(bp['means'][0].get_ydata()[0], 2))
    print()

In [None]:
# health impact assessment for both top-down and bottom-up
# rounded to the nearest 10% change in emissions

ctl = 'RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0'

bottom_up_overall           = 'RES0.8_IND0.8_TRA1.0_AGR1.0_ENE0.9'
bottom_up_contribution_res  = 'RES0.8_IND1.0_TRA1.0_AGR1.0_ENE1.0'
bottom_up_contribution_ind  = 'RES1.0_IND0.8_TRA1.0_AGR1.0_ENE1.0'
bottom_up_contribution_ene  = 'RES1.0_IND1.0_TRA1.0_AGR1.0_ENE0.9'

top_down_overall           = 'RES0.9_IND1.0_TRA0.9_AGR0.9_ENE0.5'
top_down_contribution_res  = 'RES0.9_IND1.0_TRA1.0_AGR1.0_ENE1.0'
top_down_contribution_tra  = 'RES1.0_IND1.0_TRA0.9_AGR1.0_ENE1.0'
top_down_contribution_agr  = 'RES1.0_IND1.0_TRA1.0_AGR0.9_ENE1.0'
top_down_contribution_ene  = 'RES1.0_IND1.0_TRA1.0_AGR1.0_ENE0.5'

sims = [ctl, bottom_up_overall, bottom_up_contribution_res, bottom_up_contribution_ind, bottom_up_contribution_ene, top_down_overall, top_down_contribution_res, top_down_contribution_tra, top_down_contribution_agr, top_down_contribution_ene]

In [None]:
path = '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator_annual/health_impact_assessments'
outputs = ['PM2_5_DRY', 'o3_6mDM8h']

dfs = {}

for sim in sims:
    for output in outputs:
        try:
            df = pd.read_csv(f'{path}/{output}/df_country_hia_{output}_{sim}.csv')
            dfs.update({f'{output}_{sim}': df})
        except:
            FileNotFoundError

In [None]:
dfs.keys()

In [None]:
#diff_bottom_up_overall = dfs[f'PM2_5_DRY_{bottom_up_overall}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
diff_bottom_up_res = dfs[f'PM2_5_DRY_{bottom_up_contribution_res}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
diff_bottom_up_ind = dfs[f'PM2_5_DRY_{bottom_up_contribution_ind}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
diff_bottom_up_ene = dfs[f'PM2_5_DRY_{bottom_up_contribution_ene}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]

diff_top_down_overall = dfs[f'PM2_5_DRY_{top_down_overall}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
#diff_top_down_res = dfs[f'PM2_5_DRY_{top_down_contribution_res}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
diff_top_down_tra = dfs[f'PM2_5_DRY_{top_down_contribution_tra}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
#diff_top_down_agr = dfs[f'PM2_5_DRY_{top_down_contribution_agr}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]
#diff_top_down_ene = dfs[f'PM2_5_DRY_{top_down_contribution_ene}'].mort_ncdlri_mean_total.values[0] - dfs[f'PM2_5_DRY_{ctl}'].mort_ncdlri_mean_total.values[0]

#print(f'Bottom up, contribution RES = {round((100 * diff_bottom_up_res / diff_bottom_up_overall), 2)} %')
#print(f'Bottom up, contribution IND = {round((100 * diff_bottom_up_ind / diff_bottom_up_overall), 2)} %')
#print(f'Bottom up, contribution ENE = {round((100 * diff_bottom_up_ene / diff_bottom_up_overall), 2)} %')
print()
#print(f'Top down, contribution RES = {round((100 * diff_top_down_res / diff_top_down_overall), 2)} %')
print(f'Top down, contribution TRA = {round((100 * diff_top_down_tra / diff_top_down_overall), 2)} %')
#print(f'Top down, contribution AGR = {round((100 * diff_top_down_agr / diff_top_down_overall), 2)} %')
#print(f'Top down, contribution ENE = {round((100 * diff_top_down_ene / diff_top_down_overall), 2)} %')

In [None]:
#diff_bottom_up_overall = dfs[f'o3_6mDM8h_{bottom_up_overall}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_bottom_up_res = dfs[f'o3_6mDM8h_{bottom_up_contribution_res}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_bottom_up_ind = dfs[f'o3_6mDM8h_{bottom_up_contribution_ind}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_bottom_up_ene = dfs[f'o3_6mDM8h_{bottom_up_contribution_ene}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]

diff_top_down_overall = dfs[f'o3_6mDM8h_{top_down_overall}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_top_down_res = dfs[f'o3_6mDM8h_{top_down_contribution_res}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_top_down_tra = dfs[f'o3_6mDM8h_{top_down_contribution_tra}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
#diff_top_down_agr = dfs[f'o3_6mDM8h_{top_down_contribution_agr}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]
diff_top_down_ene = dfs[f'o3_6mDM8h_{top_down_contribution_ene}'].mort_copd_mean_total.values[0] - dfs[f'o3_6mDM8h_{ctl}'].mort_copd_mean_total.values[0]

#print(f'Bottom up, contribution RES = {round((100 * diff_bottom_up_res / diff_bottom_up_overall), 2)} %')
#print(f'Bottom up, contribution IND = {round((100 * diff_bottom_up_ind / diff_bottom_up_overall), 2)} %')
#print(f'Bottom up, contribution ENE = {round((100 * diff_bottom_up_ene / diff_bottom_up_overall), 2)} %')
print()
print(f'Top down, contribution RES = {round((100 * diff_top_down_res / diff_top_down_overall), 2)} %')
print(f'Top down, contribution TRA = {round((100 * diff_top_down_tra / diff_top_down_overall), 2)} %')
#print(f'Top down, contribution AGR = {round((100 * diff_top_down_agr / diff_top_down_overall), 2)} %')
print(f'Top down, contribution ENE = {round((100 * diff_top_down_ene / diff_top_down_overall), 2)} %')