#### Find the emission pathways that caused the change in PM2.5 over 2015-2018 in China
- Overall change was -6.0% per year, -3.0 ugm-3 per year
- Per gridcell (emulator).  
- Remove the inputs that don't predict the correct change.  

##### Example for January
1. Load observations for 1 station.  
2. Find the change in PM2.5 concentrations for this location over (January) 2015-2018.  
3. Load nearest emulator to this station.  
4. See which inputs the emulator for this location needs to match this change in PM2.5 concentrations.

##### Eventually, swap out the January emulators for the whole year emulators

In [2]:
import glob
import joblib
from itertools import islice
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd

In [26]:
def obs_ds_to_df(filepath):
    ds = xr.open_dataset(filepath)
    df = ds.to_dataframe()
    df['station_id'] = ds.station_id
    df['station_lat'] = ds.station_lat
    df['station_lon'] = ds.station_lon
    df['province'] = ds.province
    df['prefecture'] = ds.city_en
    ds.close()
    df.set_index('times', inplace=True)
    return df

def obs_change_over_2015_2018(df, month, variable):
    df_2015_2018 = df.loc['2015':'2018'].copy()
    
    # for january only
    df_2015_2018_month = df_2015_2018.loc[df_2015_2018.index.month == month].copy()
    df_2015_2018_month_mean = df_2015_2018_month.groupby(df_2015_2018_month.index.year).mean()
    
    change_per = 100 * ((df_2015_2018_month_mean[variable].values[-1] / df_2015_2018_month_mean[variable].values[0]) - 1)
    change_abs = df_2015_2018_month_mean[variable].values[-1] - df_2015_2018_month_mean[variable].values[0]
    return change_per, change_abs

def load_nearest_emulator(df, variable):
    lat = round(df.station_lat.unique()[0] * 4) / 4
    lon = round(df.station_lon.unique()[0] * 4) / 4
    emulator = joblib.load(f'/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/{variable}/emulator_{variable}_{lat}_{lon}.joblib')
    return emulator

In [28]:
month = 1

obs_files = glob.glob('/nfs/a68/earlacoa/china_measurements/*.nc')

obs_change_abs = {}
obs_change_per = {}
emulators = {}
baselines = {}
targets = {}

for obs_file in obs_files:
    station_id = obs_file[37:-3]
    
    df = obs_ds_to_df(obs_file)
    try:
        change_per, change_abs = obs_change_over_2015_2018(df, month, 'PM2.5')

        obs_change_abs.update({station_id: change_abs})
        obs_change_per.update({station_id: change_per})
        
        emulator = load_nearest_emulator(df, 'PM2_5_DRY')
        emulators.update({station_id: emulator})

        baseline = emulator.predict(np.array([[1.0, 1.0, 1.0, 1.0, 1.0]]))
        baselines.update({station_id: baseline})

        target_abs = baseline + change_abs
        target_per = baseline * (1 + (change_per / 100))
        target = np.mean([target_abs, target_per])
        targets.update({station_id: target})
    except:
        IndexError



In [29]:
for key in [key for key in targets.keys()]:
    if np.isnan(targets[key]):
        del targets[key]

In [42]:
target_diffs = {}
for key in [key for key in targets.keys()]:
    target_diffs.update({key: targets[key] - baselines[key]})

In [40]:
joblib.dump(targets, '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/targets.joblib')
joblib.dump(baselines, '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/baselines.joblib')
joblib.dump(target_diffs, '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/target_diffs.joblib')

In [30]:
for key in list(set([key for key in emulators.keys()]) - set([key for key in targets.keys()])):
    del emulators[key]

In [381]:
matrix_stacked = np.array(np.meshgrid(
    np.linspace(0.3, 1.2, 10), # 1.5 and 16 for 0.1, 1.5 and 6 for 0.3, 1.4 and 8 for 0.2
    np.linspace(0.3, 1.2, 10), # removing edges of parameter space 0.0, 0.1, 1.4, 1.5
    np.linspace(0.3, 1.2, 10), # also removing unlikely reductions in emissions of > -40% or +30%
    np.linspace(0.3, 1.2, 10),
    np.linspace(0.3, 1.2, 10)
)).T.reshape(-1, 5)

station_diffs_abs = {}
station_diffs_per = {}

for station_id, emulator in emulators.items():
    target_diffs_abs = {}
    target_diffs_per = {}
    for matrix in matrix_stacked:
        inputs = matrix.reshape(-1, 5)
        filename = f'RES{inputs[0][0]:.1f}_IND{inputs[0][1]:.1f}_TRA{inputs[0][2]:.1f}_AGR{inputs[0][3]:.1f}_ENE{inputs[0][4]:.1f}'
        target_diff_abs = targets[station_id] - emulator.predict(inputs)[0]
        target_diff_per = (100 * (emulator.predict(inputs)[0] / targets[station_id])) - 100
        if abs(target_diff_per) < 0.01: # +/- 0.01% of target
            target_diffs_abs.update({filename: target_diff_abs})
            target_diffs_per.update({filename: target_diff_per})
    
    station_diffs_abs.update({station_id: target_diffs_abs})
    station_diffs_per.update({station_id: target_diffs_per})

In [383]:
keys = [list(station_diffs_per[station].keys())for station in station_diffs_per.keys()]
keys_flatten = [item for sublist in keys for item in sublist]
keys_unique = {}
for key in keys_flatten:
    if key not in keys_unique:
        keys_unique.update({key: 1})
    elif key in keys_unique:
        keys_unique.update({key: keys_unique[key] + 1})

In [386]:
joblib.dump(station_diffs_per, '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/station_diffs_per.joblib')
joblib.dump(station_diffs_abs, '/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/station_diffs_abs.joblib')

['/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/station_diffs_abs.joblib']

___

In [3]:
targets = joblib.load('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/targets.joblib')
baselines = joblib.load('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/baselines.joblib')
target_diffs = joblib.load('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/target_diffs.joblib')
station_diffs_per = joblib.load('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/station_diffs_per.joblib')
station_diffs_abs = joblib.load('/nfs/b0122/Users/earlacoa/paper_aia_china/emulator/find_emissions_that_match_pm25_decline/station_diffs_abs.joblib')

In [199]:
regional_provinces = {
    'Beijing': 'North China',
    'Tianjin': 'North China',
    'Hebei': 'North China',
    'Shanxi': 'North China',
    'Nei Mongol': 'North China',
    'Liaoning': 'North East China',
    'Jilin': 'North East China',
    'Heilongjiang': 'North East China',
    'Shanghai': 'East China',
    'Jiangsu': 'East China',
    'Zhejiang': 'East China', 
    'Anhui': 'East China', 
    'Fujian': 'East China', 
    'Jiangxi': 'East China', 
    'Shandong': 'East China',
    'Taiwan': 'East China',
    'Henan': 'South Central China',
    'Hubei': 'South Central China',
    'Hunan': 'South Central China',
    'Guangdong': 'South Central China',
    'Guangxi': 'South Central China',
    'Hainan': 'South Central China',
    'Hong Kong': 'South Central China',
    'Macao': 'South Central China',
    'Chongqing': 'South West China',
    'Sichuan': 'South West China',
    'Guizhou': 'South West China',
    'Yunnan': 'South West China',
    'Xizang': 'South West China',
    'Shaanxi': 'North West China',
    'Gansu': 'North West China', 
    'Qinghai': 'North West China',
    'Ningxia Hui': 'North West China',
    'Xinjiang Uygur': 'North West China'
}

In [200]:
# gdf_prefectures_china = gpd.read_file('/nfs/a68/earlacoa/shapefiles/china/gadm36_CHN_3.shp')
# gdf_prefectures_hongkong = gpd.read_file('/nfs/a68/earlacoa/shapefiles/hongkong/gadm36_HKG_1.shp')
# gdf_prefectures_macao = gpd.read_file('/nfs/a68/earlacoa/shapefiles/macao/gadm36_MAC_2.shp')

# gba_prefectures = ['Dongguan', 'Foshan', 'Guangzhou', 'Huizhou', 'Jiangmen', 'Shenzhen', 'Zhaoqing', 'Zhongshan', 'Zhuhai', 'Hong Kong', 'Macao']

# list_prefectures_gba = []
# for gba_prefecture in gba_prefectures:
#     list_prefectures_gba.append(gdf.loc[gdf.NAME_2 == gba_prefecture])


# list_prefectures_gba.append(gdf_prefectures_hongkong)
# list_prefectures_gba.append(gdf_prefectures_macao)
    
# gdf_prefectures_gba = pd.concat(list_prefectures_gba)

In [242]:
obs_lats = {}
obs_lons = {}
obs_provinces = {}
obs_prefectures = {}
obs_regions = {}
obs_gba = {}

for obs_file in obs_files:
    with xr.open_dataset(obs_file) as ds:
        key = ds.station_id
        if '.nc' in key:
            key = key[:-3]
            
        if key in targets:
            obs_lats.update({key: ds.station_lat})
            obs_lons.update({key: ds.station_lon})
            obs_provinces.update({key: ds.province})
            obs_prefectures.update({key: ds.city_en})
            obs_regions.update({key: regional_provinces[ds.province]})           
            if (ds.city_en in gba_prefectures) or (ds.city_en == 'Hong Kong') or (ds.city_en == 'Macao'):
                obs_gba.update({key: True})
            else:
                obs_gba.update({key: False})

In [288]:
regions = ['China', 'GBA', 'North China', 'North East China', 'East China', 'South Central China', 'South West China', 'North West China']
region_stations = {key: [] for key in regions}

for station_id, station_region in obs_regions.items():
    region_stations['China'].append(station_id)
    region_stations[station_region].append(station_id)
    
    if obs_gba[station_id] == True:
        region_stations['GBA'].append(station_id)

In [314]:
regional_targets = {}
regional_baselines = {}
regional_target_diffs = {}
regional_station_diffs_per = {}
regional_station_diffs_abs = {}

for region in regions:
    regional_targets.update({region: dict((key, value) for key, value in targets.items() if key in region_stations[region])})
    regional_baselines.update({region: dict((key, value) for key, value in baselines.items() if key in region_stations[region])})
    regional_target_diffs.update({region: dict((key, value) for key, value in target_diffs.items() if key in region_stations[region])})
    regional_station_diffs_per.update({region: dict((key, value) for key, value in station_diffs_per.items() if key in region_stations[region])})
    regional_station_diffs_abs.update({region: dict((key, value) for key, value in station_diffs_abs.items() if key in region_stations[region])})

In [357]:
regional_keys_unique_sorted = {}
regional_target_diffs_under0p5 = {}

for region in regions:
    keys = [list(regional_station_diffs_per[region][station_id].keys()) for station_id in regional_station_diffs_per[region].keys()]
    keys_flatten = [item for sublist in keys for item in sublist]

    keys_unique = {}
    for key in keys_flatten:
        if key not in keys_unique:
            keys_unique.update({key: 1})
        elif key in keys_unique:
            keys_unique.update({key: keys_unique[key] + 1})


    keys_unique_sorted = {key: value for key, value in sorted(keys_unique.items(), key=lambda item: item[1], reverse=True)}
    regional_keys_unique_sorted.update({region: keys_unique_sorted})
    
    target_diffs_under0p5 = {}
    for key, value in regional_target_diffs[region].items():
        if abs(value) < 0.5:
            target_diffs_under0p5.update({key: value})
            
    
    regional_target_diffs_under0p5.update({region: target_diffs_under0p5})

In [387]:
number_of_stations = {}
number_of_emission_configurations = {}
first_emission_configuration_keys = {}
second_emission_configuration_keys = {}
third_emission_configuration_keys = {}
first_emission_configuration_values = {}
second_emission_configuration_values = {}
third_emission_configuration_values = {}
number_of_stations_with_target_diff_under_0p5 = {}

for region in regional_targets.keys():
    number_of_stations.update({region: len(regional_targets[region].keys())})
    number_of_emission_configurations.update({region: len(regional_keys_unique_sorted[region].keys())})
    top3_emission_configurations = list(islice(regional_keys_unique_sorted[region].items(), 3))
    first_emission_configuration_keys.update({region: top3_emission_configurations[0][0]})
    second_emission_configuration_keys.update({region: top3_emission_configurations[1][0]})
    third_emission_configuration_keys.update({region: top3_emission_configurations[2][0]})
    first_emission_configuration_values.update({region: top3_emission_configurations[0][1]})
    second_emission_configuration_values.update({region: top3_emission_configurations[1][1]})
    third_emission_configuration_values.update({region: top3_emission_configurations[2][1]})
    number_of_stations_with_target_diff_under_0p5.update({region: len(regional_target_diffs_under0p5[region])})

In [389]:
df_regions = pd.concat([
    pd.Series(number_of_stations, name='Stations'),
    pd.Series(number_of_emission_configurations, name='Possible Emission Configurations'),
    pd.Series(first_emission_configuration_keys, name='First Emission Configuration - Key'),
    pd.Series(first_emission_configuration_values, name='First Emission Configuration - Value'),
    pd.Series(second_emission_configuration_keys, name='Second Emission Configuration - Key'),
    pd.Series(second_emission_configuration_values, name='Second Emission Configuration - Value'),
    pd.Series(third_emission_configuration_keys, name='Third Emission Configuration - Key'),
    pd.Series(third_emission_configuration_values, name='Third Emission Configuration - Value'),
    pd.Series(number_of_stations_with_target_diff_under_0p5, name='Stations with trend size under 0.5 ugm-3')
], axis=1)

df_regions

Unnamed: 0,Stations,Possible Emission Configurations,First Emission Configuration - Key,First Emission Configuration - Value,Second Emission Configuration - Key,Second Emission Configuration - Value,Third Emission Configuration - Key,Third Emission Configuration - Value,Stations with trend size under 0.5 ugm-3
China,1453,24452,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,97,RES1.2_IND0.5_TRA0.4_AGR1.0_ENE0.9,7,RES1.0_IND1.2_TRA1.0_AGR0.7_ENE0.5,7,112
GBA,55,1423,RES1.0_IND0.5_TRA1.1_AGR0.7_ENE0.9,2,RES0.3_IND0.8_TRA1.0_AGR1.1_ENE1.2,2,RES0.9_IND0.7_TRA0.5_AGR0.3_ENE0.4,2,2
North China,141,2399,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,18,RES1.0_IND1.0_TRA1.0_AGR1.1_ENE0.8,4,RES1.0_IND1.1_TRA0.7_AGR0.5_ENE1.0,4,19
North East China,156,2579,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,3,RES0.6_IND0.6_TRA0.5_AGR0.4_ENE1.2,2,RES0.7_IND0.3_TRA0.3_AGR0.6_ENE0.7,2,4
East China,449,8045,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,29,RES1.1_IND1.1_TRA0.4_AGR0.7_ENE0.5,4,RES1.1_IND1.0_TRA0.6_AGR1.0_ENE1.1,4,32
South Central China,353,8417,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,15,RES1.1_IND0.8_TRA0.9_AGR0.8_ENE0.3,4,RES1.0_IND0.4_TRA0.8_AGR0.5_ENE0.8,3,18
South West China,201,4175,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,23,RES1.2_IND0.5_TRA0.4_AGR1.0_ENE0.9,5,RES1.2_IND0.6_TRA0.5_AGR0.6_ENE1.1,5,27
North West China,153,1807,RES1.0_IND1.0_TRA1.0_AGR1.0_ENE1.0,9,RES0.9_IND1.2_TRA0.4_AGR0.9_ENE1.0,3,RES1.1_IND1.0_TRA0.6_AGR0.7_ENE1.0,2,12
