In [1]:
import os
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import xarray as xr
from sklearn.preprocessing import QuantileTransformer
from scipy.ndimage import gaussian_filter

base_dir = os.path.join(os.getcwd(), '..')
sys.path.append(base_dir)

import src.preprocessing as preproc

In [None]:
height_dataset_path = '../data/all_aod_global_updated_ds_06_06_2008.nc'
height_dataset = preproc.load_dataset(height_dataset_path)

aod_dataset_path = '../data/emmisions_test_6.3_ACCMIP_200806.06_rad.nc'
aod_dataset = preproc.load_dataset(aod_dataset_path)

st_dataset_path = '../data/emmisions_test_6.3_ACCMIP_200806.06_timelatlonlev_echam.nc'
st_dataset = preproc.load_dataset(st_dataset_path)
st_dataset = st_dataset.rename({'var131': 'u',
                                'var132': 'v',
                                'var135': 'omega',
                                'var151': 'slp',
                                'var156': 'geopoth'})

dataset_path = '../data/emmisions_test_6.3_ACCMIP_200806.06_echam.nc'
dataset = preproc.load_dataset(dataset_path)

height = height_dataset.height.assign_coords(time=dataset.time)
dataset = dataset.assign(height=height)

pressure = dataset.hyam + dataset.hybm * dataset.aps
dataset = dataset.assign(pressure=pressure.swap_dims({'nhym': 'lev'}).transpose('time', 'lev', 'lat', 'lon'))

st = st_dataset.st.assign_coords(time=dataset.time)
dataset = dataset.assign(st=st)

omega = st_dataset.omega.assign_coords(time=dataset.time)
dataset = dataset.assign(omega=omega)

aod = aod_dataset.TAU_2D_550nm.assign_coords(time=dataset.time)
dataset = dataset.assign(TAU_2D_550nm=aod)

smooth_aod = gaussian_filter(aod, sigma=(0, 0.6, 0.6), truncate=4)
dataset['SMOOTH_TAU_2D_550nm'] = (aod.dims, smooth_aod)

bext = aod_dataset.ALFA_AER_533nm.assign_coords(time=dataset.time)
dataset = dataset.assign(ALFA_AER_533nm=bext)

# Gaussianizing the 3D covariates
pressure = dataset.pressure.values.flatten()
relhum = dataset.relhum.values.flatten()
temperature = dataset.st.values.flatten()
vertical_velocity = dataset.omega.values.flatten()


covariates_3d = np.stack([pressure, relhum, temperature, vertical_velocity], axis=1)
covariates_3d += np.finfo(np.float32).eps * np.random.randn(*covariates_3d.shape)
quantile_transformer = QuantileTransformer(output_distribution="normal")
transformed_covariates_3d = quantile_transformer.fit_transform(covariates_3d)
dataset['pressure_as_normal'] = (dataset.pressure.dims, 
                                 transformed_covariates_3d[:, 0].reshape(dataset.pressure.shape))
dataset['relhum_as_normal'] = (dataset.relhum.dims, 
                               transformed_covariates_3d[:, 1].reshape(dataset.relhum.shape))
dataset['st_as_normal'] = (dataset.st.dims, 
                           transformed_covariates_3d[:, 2].reshape(dataset.st.shape))
dataset['omega_as_normal'] = (dataset.st.dims, 
                              transformed_covariates_3d[:, 3].reshape(dataset.omega.shape))

# Shortlist columns
selected_columns = ['height',
                    'pressure', 'pressure_as_normal',
                    'st', 'st_as_normal',
                    'relhum', 'relhum_as_normal',
                    'omega', 'omega_as_normal',
                    'TAU_2D_550nm', 'SMOOTH_TAU_2D_550nm',
                    'ALFA_AER_533nm']
dataset = dataset[selected_columns]
dataset.to_netcdf('../data/aodisaggregation_emmisions_test_6.3_ACCMIP_200806.06_echam.nc')