# Regridding NO2, CO via XESMF:
    
This notebook will generate the results in the last two rows of Table 1, as well as Figures 6, 7, 9 and 10, the data needed to get the results in the second rows of Tables 4 and 5.


First, we import all relevant libraries.

In [None]:
import sys
import os
import netCDF4 as ntf
from pyhdf.SD import SD, SDC
import numpy as np
import math
from netCDF4 import Dataset 
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import cartopy.crs as ccrs
import cartopy
import xesmf as xmf
import xarray as xr
import glob

Provide output filepaths as well as path to our land mask

In [None]:
binary_path_patch_co = '/data0/rm3873/co_binary_patch_regridded_v0.nc'
binary_path_bilinear_co = '/data0/rm3873/no2_binary_bilinear_regridded_v0.nc'
binary_path_patch_no2 = '/data0/rm3873/co_binary_patch_regridded_v0.nc'
binary_path_bilinear_no2 = '/data0/rm3873/no2_binary_bilinear_regridded_v0.nc'
land_mask_path = '/data0/zzheng/GEOS-Chem-grid/land_mask.nc'
target_grid_lats = 121
target_grid_lons = 96
day_start = 1
day_end = 366
year = 2015
lat_st = 6
lat_end = 36.25
lat_siz = 0.25
lon_st = 68.125
lon_end = 97.8126
lon_siz = 0.3125

Next, we load in the raw NO2 dataset, where all the files that were downloaded are kept in one directory, and then extract the desired data into an array.

In [None]:
no2_data = xr.open_mfdataset("/data0/rm3873/daily_no2_data/*.nc4",concat_dim='TIMERANGE',combine='nested')

Now, we can use the XESMF library for regridding NO2 under a bilinear scheme provided the desired boundaries!

In [None]:
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(6,36.25,.25)),
        "lon": (["lon"], np.arange(68,98,.3125)),
    }
)
regridder = xmf.Regridder(no2_data, ds_out, "bilinear")
no2_data = regridder(no2_data)

Next, we load in the raw CO dataset, where all the files that were downloaded are kept in one directory, and then extract the desired data into an array.

In [None]:
co_data = xr.open_mfdataset("/data0/rm3873/co_data/*.nc4",concat_dim='TIMERANGE',combine='nested')
co_data = co_data.rename({'Longitude': 'lon','Latitude': 'lat'})

Now, we can use the XESMF library for regridding CO under a bilinear scheme provided the desired boundaries!

In [None]:
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(6,36.25,.25)),
        "lon": (["lon"], np.arange(68,98,.3125)),
    }
)
regridder = xmf.Regridder(co_data, ds_out, "bilinear")
co_data = regridder(co_data)

We extract the specific data we want.

In [None]:
no2_grid = np.array(no2_data["ColumnAmountNO2"])

In [None]:
co_grid = np.array(co_data["CO_dof_A"])

We repeat the above, but now it's to geneerate a regridding under the patch scheme.

In [None]:
co_data = xr.open_mfdataset("/data0/rm3873/co_data/*.nc4",concat_dim='TIMERANGE',combine='nested')
co_data = co_data.rename({'Longitude': 'lon','Latitude': 'lat'})
no2_data = xr.open_mfdataset("/data0/rm3873/daily_no2_data/*.nc4",concat_dim='TIMERANGE',combine='nested')
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(6,36.25,.25)),
        "lon": (["lon"], np.arange(68,98,.3125)),
    }
)
regridder_co = xmf.Regridder(co_data, ds_out, "patch")
co_data_patch = regridder_co(co_data)
regridder_no2 = xmf.Regridder(no2_data, ds_out, "patch")
no2_data_patch = regridder_no2(no2_data)
patch_co_grid = np.array(co_data_patch["CO_dof_A"])
patch_no2_grid = np.array(no2_data_patch["ColumnAmountNO2"])

Let's setup the regridded NetCDF file with our desired dimensions.

In [None]:
for p in [binary_path_patch_co,binary_path_bilinear_co,binary_path_patch_no2,binary_path_bilinear_no2]:

    land_mask = Dataset(land_mask_path,mode='r',format='NETCDF4_CLASSIC')
    ncfile = Dataset(p,mode='w',format='NETCDF4_CLASSIC') 
    lat_dim = ncfile.createDimension('lat', target_grid_lats)     
    lon_dim = ncfile.createDimension('lon', target_grid_lons)
    time = ncfile.createDimension('time',day_end-day_start)

    lat = ncfile.createVariable('lat', np.float32, ('lat',))
    lat.units = 'degrees_north'
    lat.long_name = 'latitude'
    lon = ncfile.createVariable('lon', np.float32, ('lon',))
    lon.units = 'degrees_east'
    lon.long_name = 'longitude'
    time = ncfile.createVariable('time', np.float64, ('time',))
    time.units = 'days of 2015'
    time.long_name = 'days_of_the_year'
    if(p in [binary_path_patch_co,binary_path_bilinear_co]):
        # Define a 3D variable to hold the data
        co = ncfile.createVariable('co',np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
        if(p == binary_path_patch_co):
            co[::] = patch_co_grid 
        else:
            co[::] = co_grid
    else:
        no2 = ncfile.createVariable('no2',np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
        if(p == binary_path_patch_no2):
            no2[::] = patch_no2_grid 
        else:
            no2[::] = no2_grid
        

    lat[:] = np.arange(lat_st,lat_end,lat_siz)
    lon[:] = np.arange(lon_st,lon_end,lon_siz)
    time[:] = np.arange(day_start,day_end)

Let's read in the simulated data (already matched for the GEOS-Chem target grid), and copy them into a dictionary of the species

In [None]:
raw_emission = xr.open_dataset("/data0/rm3873/dsi_india/daily_emission.nc").sel(lat=slice(6,36),lon=slice(68,98))
raw_gas = xr.open_dataset("/data0/rm3873/dsi_india/daily_gas_column.nc").sel(lat=slice(6,36),lon=slice(68,98))
raw_pm = xr.open_dataset("/data0/rm3873/dsi_india/daily_surface_pm25_RH50.nc").sel(lat=slice(6,36),lon=slice(68,98))
raw_met = xr.open_dataset("/data0/rm3873/dsi_india/daily_meteo.nc").sel(lat=slice(6,36),lon=slice(68,98))
raw_aod = xr.open_dataset("/data0/rm3873/dsi_india/daily_aod.nc").sel(lat=slice(6,36),lon=slice(68,98))
raw_emission["EmisDST_Natural"] = raw_emission["EmisDST1_Natural"] + raw_emission["EmisDST2_Natural"] + raw_emission["EmisDST3_Natural"] + raw_emission["EmisDST4_Natural"]
feature_ml = [
    {"PM25":[]},
    {'CO_trop':[], 'SO2_trop':[], 'NO2_trop':[], 'CH2O_trop':[], 'NH3_trop':[]},
     {'AOT_C':[], 'AOT_DUST_C':[]},
    {'T2M':[], 'PBLH':[], 'U10M':[], 'V10M':[], 'PRECTOT':[], 'RH':[]},
    {'EmisDST_Natural':[], 
                'EmisNO_Fert':[], 'EmisNO_Lightning':[], 'EmisNO_Ship':[], 'EmisNO_Soil':[]}]
sets = [raw_pm,raw_gas,raw_aod,raw_met,raw_emission]

for i in range(len(sets)):
    for spec in feature_ml[i]:
        print(spec)
        cur_set = sets[i][spec]
        
        for entry in cur_set:
            feature_ml[i][spec].append([])
        for j in range(len(cur_set)):
            print(j)
            for k in range(len(cur_set[j])):
                feature_ml[i][spec][j].append(cur_set[j][k])
        feature_ml[i][spec] = np.array(feature_ml[i][spec])

Now we can simply apply our four regridded missing masks onto all these datasets!

In [None]:
grid_dict = {"patch_regridded_co":patch_co_grid,"bilinear_regridded_co":co_grid,
             "patch_regridded_no2":patch_no2_grid,"bilinear_regridded_co":no2_grid}

for grid_name in grid_dict:
    missing_list = np.argwhere(np.isnan(grid_dict[grid_name]))
    missing_vals = [np.NaN] * len(missing_list)
    for j in range(len(sets)):
        for spec in feature_ml[j]:
            print(spec)
            feature_ml[j][spec][tuple(np.transpose(missing_list))] = missing_vals

    for i in range(len(feature_ml)):
        for spec in feature_ml[i]:
            fname = '/data0/rm3873/' + grid_name + '_' + str(spec) + '.nc'
            ncfile = Dataset(fname,mode='w',format='NETCDF4_CLASSIC') 
            lat_dim = ncfile.createDimension('lat', 121)     
            lon_dim = ncfile.createDimension('lon', 96)
            time = ncfile.createDimension('time',365)

            lat = ncfile.createVariable('lat', np.float32, ('lat',))
            lat.units = 'degrees_north'
            lat.long_name = 'latitude'
            lon = ncfile.createVariable('lon', np.float32, ('lon',))
            lon.units = 'degrees_east'
            lon.long_name = 'longitude'
            time = ncfile.createVariable('time', np.float64, ('time',))
            time.units = 'days of 2015'
            time.long_name = 'days_of_the_year'
            # Define a 3D variable to hold the data
            key_var = ncfile.createVariable(spec,np.float64,('time','lat','lon'))

            lat[:] = np.arange(6,36.25,0.25)
            lon[:] = np.arange(68.125,97.8126,0.3125)
            time[:] = np.arange(1,366)
            key_var[::] = feature_ml[i][spec]
            ncfile.close()