In [3]:
%matplotlib inline

%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np

import xarray as xr

import regionmask

import pandas as pd

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import cartopy.feature as cfeat
import matplotlib.patches as mpatches

import feather

import glob


import sys
sys.path.append('../')
import utils
import plotting

np.seterr(invalid='ignore'); # disable a warning from matplotlib and cartopy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Concentration Data

In [2]:
ds_normal_model = utils.import_GC_runs_general('../../GCrundirs/type_NA-out_conc-yr_2016-gridmod_final-validation/OutputDir/','GEOSChem.SpeciesConc.2016*_0000z.nc4','GEOSChem.AerosolMass.2016*_0000z.nc4','normal_model' )
ds_nonuc_model = utils.import_GC_runs_general('../../GCrundirs/type_NA-out_conc-yr_2016-gridmod_nonuclear/OutputDir/','GEOSChem.SpeciesConc.2016*_0000z.nc4','GEOSChem.AerosolMass.2016*_0000z.nc4','nonuc_model' )

datasets = [
    ds_normal_model,
    ds_nonuc_model
]

In [91]:
poll_ds = utils.combine_and_convert_ds(utils.gas_species_list, utils.aerosol_species_dict.keys(), datasets,['normal_model', 'nonuc_model'], ['nonuc_model'], 'normal_model', 'normal and nonuclear' )



In [93]:
poll_ds.to_zarr('nonuc_normal_GC.zarr', mode = 'w') #save the dataset 

<xarray.backends.zarr.ZarrStore at 0x7f26ca3eb620>

# HEMCO Data

In [2]:
ds_normal = xr.open_mfdataset('../../GCrundirs/type_NA-out_conc-yr_2016-gridmod_final-validation/OutputDir/HEMCO_diagnostics.2016*.nc', combine = 'by_coords')
ds_nonuc = xr.open_mfdataset('../../GCrundirs/type_NA-out_conc-yr_2016-gridmod_nonuclear/OutputDir/HEMCO_diagnostics.2016*.nc', combine = 'by_coords')

ds_normal = ds_normal.isel(lev = 0)
ds_nonuc = ds_nonuc.isel(lev = 0)

ds_emis = xr.concat([ds_nonuc, ds_normal], pd.Index(['nonuc', 'normal'], name='model_name'))

In [3]:
emis_list = ['EmisNO2_Anthro', 'EmisNO_Anthro', 'EmisSO2_Anthro',  'EmisNH3_Total']

In [10]:
emis_ds = ds_emis.drop_vars([species for species in ds_emis.data_vars if species not in emis_list])
emis_ds['EmisNOx_Anthro'] = (
        emis_ds['EmisNO2_Anthro'] + emis_ds['EmisNO_Anthro'] #sum our NO2 and NO to get NOx
                   )
emis_ds['EmisNOx_Anthro'].attrs['units'] = 'kg/m2/s'

In [11]:
emis_ds.to_zarr('emissions_HEMCO', mode = 'w') #save the dataset 

<xarray.backends.zarr.ZarrStore at 0x7f647a315f10>

# US-EGO Emissions Data

In [4]:
##### import files and change hydro #####

###get ORIS, plant type, and Region Name from our modified generation file
oris_nonuc_df = pd.read_csv('../optimization_model/good_model_inputs/inputs_gen_no-nuclear_all-generators_20k-new_name.csv',usecols=[1,2,4,5,8,22,23,24,25])
oris_normal_df = pd.read_csv('../optimization_model/good_model_inputs/inputs_gen_normal.csv',usecols=[1,2,4,5,8,22,23,24,25])
oris_nonuc_df.loc[oris_nonuc_df['FuelType'] == 'Pumps', 'FuelType'] = 'Hydro' #change pumps to hydro label
oris_normal_df.loc[oris_normal_df['FuelType'] == 'Pumps', 'FuelType'] = 'Hydro' #change pumps to hydro label

###import egrid generation data and modify the index to be ORISCode
egrid_df_raw = pd.read_excel('../raw_data/egrid2016_data.xlsx',sheet_name='GEN16', usecols='D, J, L')
egrid = egrid_df_raw.drop(egrid_df_raw.index[0]).rename(columns={'DOE/EIA ORIS plant or facility code':'ORISCode'})
#change those with no capacity to 1 (so that our capacity factor calculation doesn't fail)
egrid.loc[egrid['Generator nameplate capacity (MW)'] == 0, 'Generator nameplate capacity (MW)'] = 1
#calculate the capacity factor of generation in egrid
egrid['egrid_capafactor'] = egrid['Generator annual net generation (MWh)'] / (8760 * egrid['Generator nameplate capacity (MW)']) 

###import the generation data from our optimization
gen_normal_df=feather.read_dataframe('../optimization_model/outputs/gen_normal.feather')

###import the generation data from our optimization
gen_nonuc_df=feather.read_dataframe('../optimization_model/outputs/gen_no-nuclear_modified-all-generators.feather')



In [32]:
##### change index and convert to xarray #####

#group by ORIS code and make a date and ORIS code mult index
gen_nonuc_df = pd.concat([gen_nonuc_df,oris_nonuc_df['ORISCode']], axis = 1).groupby(['ORISCode']).sum()
gen_normal_df = pd.concat([gen_normal_df,oris_normal_df['ORISCode']], axis = 1).groupby(['ORISCode']).sum()
gen_normal_df = gen_normal_df.stack()
gen_nonuc_df = gen_nonuc_df.stack()
gen_nonuc_df.index.names = (['ORISCode','date'])
gen_normal_df.index.names = (['ORISCode','date'])

#convert to xarray datasets
oris_nonuc_ds = oris_nonuc_df.to_xarray()
oris_normal_ds = oris_normal_df.to_xarray()
egrid_ds = egrid.to_xarray()
gen_normal_ds = gen_normal_df.to_xarray()
gen_nonuc_ds = gen_nonuc_df.to_xarray()


In [33]:
###### setup for our datasets #######

###create dicts of our datasets/names
gen_names_dict = {'normal':gen_normal_ds,'nonuc':gen_nonuc_ds}
oris_names_dict = {'normal':oris_normal_ds,'nonuc':oris_nonuc_ds}
models = ['normal','nonuc']

### group egrid by ORIS code, sum, and drop the capacity factor (because it wouldn't be the sum)
egrid = egrid_ds.groupby('ORISCode').sum().drop('egrid_capafactor')

ValueError: One or more of the specified variables cannot be found in this dataset

In [None]:
####### concatenate our different dataset components together #######

### define dicts for output
capacity = {}
fueltype = {}
regionname = {}
gmodel_oris = {}
gmodel_egrid = {}

for name in models:
    ###set up inputs
    #create a capacity, fueltype, and regionname dict grouped by ORISCode
    capacity[name] = oris_names_dict[name].groupby('ORISCode').sum()['Capacity']
    fueltype[name]= oris_names_dict[name].to_dataframe().groupby('ORISCode').first()['FuelType']
    regionname[name]= oris_names_dict[name].to_dataframe().groupby('ORISCode').first()['RegionName']
    #group ORIS dicts by ORIS code and take the mean of everything but capacity
    oris_names_dict[name] = oris_names_dict[name].groupby('ORISCode').mean().drop('Capacity')
    #rename our generation variable
    gen_names_dict[name] = gen_names_dict[name].rename({'__xarray_dataarray_variable__':'modelgeneration'})

    ###concatenate the generation and ORIS files
    gmodel_oris[name] = xr.merge([gen_names_dict[name], oris_names_dict[name]])
    #add in the capacity
    gmodel_oris[name]['Capacity'] = capacity[name]
    #create a column for capacity factors
    gmodel_oris[name]['model_capafactor'] = 100 * gmodel_oris[name]['modelgeneration'] / (gmodel_oris[name]['Capacity'] * 8760) # % generation for each year's total capacity
    
    ###concatenate our model/oris and egrid emissions dataframes into one, grouped by ORIS code
    gmodel_egrid[name] = xr.merge([gmodel_oris[name],egrid])
    #turn all zeroes (just in the modelgeneration) to NAN
    gmodel_egrid[name].where('modelgeneration' == 0)['modelgeneration'] = np.nan
    #rename the egrid data column for ease
    gmodel_egrid[name] = gmodel_egrid[name].rename({'Generator annual net generation (MWh)':'egridgeneration'})
    #add in fueltype
    gmodel_egrid[name]['fueltype'] = fueltype[name]
    gmodel_egrid[name] = gmodel_egrid[name].set_coords('fueltype')
    #add in region name
    gmodel_egrid[name]['regionname'] = regionname[name]
    gmodel_egrid[name] = gmodel_egrid[name].set_coords('regionname')
    

In [None]:
####### fix datetime and add dec 31st at the 23rd hour ######

###create a dict for dec 31st 23rd hour (just copy the dec 31 22nd hour data over)
ds_dec23 = {}
for name in models:
    ds_dec23[name] = gmodel_egrid[name].isel(date = [-1])
    #change date to datetime for the 23rd hour
    ds_dec23[name]['date'] = [datetime.datetime(2017,12,31,23,0)]

###change all dates to datetime
#make a list of dates for the year
base = datetime.datetime(2017, 1, 1) #base date
date_list = [base + datetime.timedelta(hours=x) for x in range(8759)] #loop through all hours but the final one (we will add it in above)
date_list
for name in models:
    gmodel_egrid[name]['date'] = date_list
    gmodel_egrid[name] = xr.merge([gmodel_egrid[name],ds_dec23[name]])

Note:

To convert our generation in MWh to get the emissions in kg/sec, we use the following (and making sure our emissions rates are in kg/sec)

$MWH/3600sec -> (MW/s) * kg/MW -> kg/s$


In [None]:
###### add NOx, SO2, CO2, CH4 emissions to gmodel_egrid dataset ######

no_mult = 0.8544304 # NO/NOx as estimated from NEI2011 inventory
no2_mult = 1 - 0.8544304 # NO2/NOx as estimated from NEI2011 inventory
### process our emissions by multiplying generation* emissions factors
for name in models:
    gmodel_egrid[name]['NO']  = no_mult * gmodel_egrid[name]['modelgeneration']/3600 * gmodel_egrid[name]['PLNOXRTA']
    gmodel_egrid[name]['NO2']  = no2_mult * gmodel_egrid[name]['modelgeneration']/3600 * gmodel_egrid[name]['PLNOXRTA']
    gmodel_egrid[name]['SO2']  =  gmodel_egrid[name]['modelgeneration']/3600 * gmodel_egrid[name]['PLSO2RTA']
    gmodel_egrid[name]['CO2']  =  gmodel_egrid[name]['modelgeneration']/3600 * gmodel_egrid[name]['PLCO2RTA']
    gmodel_egrid[name]['CH4']  =  gmodel_egrid[name]['modelgeneration']/3600 * gmodel_egrid[name]['PLCH4RTA']

##### annual generation in original dataset ######
    gmodel_egrid[name]['annual_modelgeneration'] = gmodel_egrid[name]['modelgeneration'].groupby('ORISCode').sum(dim = 'date')

In [None]:
####### create new datasets for region and fuel type grouped data, add annual generation to normal ds ######
###dicts for datasets
fueltype_grouped_ds = {}
regionname_grouped_ds = {}

###list of pollutants
pollutants = ['NO','NO2','SO2','CO2','CH4']

for name in models:
    
###new datasets    
    #annual generation by fuel type
    fueltype_grouped_ds[name] = gmodel_egrid[name].groupby('fueltype').sum(dim = 'ORISCode')
    fueltype_grouped_ds[name].drop(['model_capafactor','PLCH4RTA','PLCO2RTA','PLNOXRTA','PLSO2RTA','PLN2ORTA'])
    
    #annual generation by region
    regionname_grouped_ds[name] = gmodel_egrid[name].groupby('regionname').sum(dim = 'ORISCode')
    regionname_grouped_ds[name].drop(['model_capafactor','PLCH4RTA','PLCO2RTA','PLNOXRTA','PLSO2RTA','PLN2ORTA'])

    #add annual emissions of each pollutant
    for pollutant in pollutants:
        regionname_grouped_ds[name][f'annual_{pollutant}'] = regionname_grouped_ds[name][f'{pollutant}'].sum(dim = 'date')
        fueltype_grouped_ds[name][f'annual_{pollutant}'] = fueltype_grouped_ds[name][f'{pollutant}'].sum(dim = 'date')
#add ratio of difference in emissions normal-nonuc/ difference in generation normal-nonuc to the nonuc dataset
for pollutant in pollutants:
    fueltype_grouped_ds['nonuc'][f'normal-nonuc_{pollutant}-gen_ratio'] = (fueltype_grouped_ds['normal'][f'annual_{pollutant}']-
                                                                           fueltype_grouped_ds['nonuc'][f'annual_{pollutant}'])/(fueltype_grouped_ds['normal']['annual_modelgeneration']-
                                                                                                                                 fueltype_grouped_ds['nonuc']['annual_modelgeneration'])
    regionname_grouped_ds['nonuc'][f'normal-nonuc_{pollutant}-gen_ratio'] = (regionname_grouped_ds['normal'][f'annual_{pollutant}']-
                                                                             regionname_grouped_ds['nonuc'][f'annual_{pollutant}'])/(regionname_grouped_ds['normal']['annual_modelgeneration']-
                                                                                                                                     regionname_grouped_ds['nonuc']['annual_modelgeneration'])
    
    

In [None]:
##### save datasets #####

### Final datasets to save are: gmodel_egrid, fueltype_grouped_ds, regionname_grouped_ds
for name in models:
    regionname_grouped_ds[name].to_netcdf(f'./regionname_grouped_emissions_{name}_ds.nc', 'w')
    fueltype_grouped_ds[name].to_netcdf(f'./fueltype_grouped_emissions_{name}_ds.nc', 'w')
    gmodel_egrid[name].to_netcdf(f'./gmodel_egrid_emissions_{name}_ds.nc', 'w')

### datasets partway to save are:

oris_nonuc_ds.to_netcdf('../optimization_model/outputs/oris_nonuc_ds.nc', 'w')
oris_normal_ds.to_netcdf('../optimization_model/outputs/oris_normal_ds.nc', 'w')
egrid_ds.to_netcdf('../optimization_model/outputs/egrid_ds.nc', 'w')
gen_normal_ds.to_netcdf('../optimization_model/outputs/gen_normal_ds.nc', 'w')
gen_nonuc_ds.to_netcdf('../optimization_model/outputs/gen_nonuc_ds.nc', 'w')

# MERRA2 RH and T

In [None]:
##### import MERRA2 datasets for RH and T #####

#import 
T_ds = xr.open_mfdataset('../../MERRA2/2016/GEOS_0.5x0.625_NA/MERRA2/2016/*/MERRA2.2016*.A1.05x0625.NA.nc4', combine = 'by_coords')
RH_ds = xr.open_mfdataset('../../MERRA2/2016/GEOS_0.5x0.625_NA/MERRA2/2016/*/MERRA2.2016*.A3dyn.05x0625.NA.nc4', combine = 'by_coords')

#reduce datasets just to T and RH
T_ds = T_ds['TS']
RH_ds = RH_ds['RH']

In [None]:
##### Select T and RH in the bounds of our US lat and lon #####
T = {}
RH = {}
for region in utils.lat_lon_dict.keys():
    T[region] = {}
    T[region] = T_ds.sel(
        lon = slice(utils.lat_lon_dict[region][0], utils.lat_lon_dict[region][1]), 
        lat = slice(utils.lat_lon_dict[region][2],utils.lat_lon_dict[region][3])
    ).groupby('time.season').mean()
    RH[region] = {}
    RH[region] = RH_ds.sel(
        lon = slice(utils.lat_lon_dict[region][0], utils.lat_lon_dict[region][1]), 
        lat = slice(utils.lat_lon_dict[region][2],utils.lat_lon_dict[region][3])
    ).groupby('time.season').mean()

In [None]:
##### Calculate seasonal mean RH and T by region #####
T_seasonal_mean = {}
RH_seasonal_mean = {}
for region in utils.lat_lon_dict.keys():
    T_seasonal_mean[region] = {}
    RH_seasonal_mean[region] = {}
    for seasons in ['DJF','JJA','MAM','SON']:
        T_seasonal_mean[region][seasons] = T[region].sel(season = seasons).mean().values
        RH_seasonal_mean[region][seasons] = RH[region].sel(season = seasons).mean().values

In [None]:
##### convert to dataframes ##### 

T_df = pd.DataFrame.from_dict({(i,j): T_seasonal_mean[i][j]
                            for i in T_seasonal_mean.keys() 
                            for j in T_seasonal_mean[i].keys()},
                            orient='index', columns = ['T'])
T_df.index = pd.MultiIndex.from_tuples(T_df.index)

RH_df = pd.DataFrame.from_dict({(i,j): RH_seasonal_mean[i][j]
                            for i in RH_seasonal_mean.keys() 
                            for j in RH_seasonal_mean[i].keys()},
                            orient='index', columns = ['RH'])
RH_df.index = pd.MultiIndex.from_tuples(RH_df.index)



In [None]:
##### merge dataframes #####
RH_T_df = pd.merge(
    T_df, RH_df, 
    left_index=True, right_index=True)

In [None]:
##### save dataframe #####
pd.DataFrame.to_csv(RH_T_df, './RH_T.csv')