# Input processing

Hydroclimatic variables typically exhibit certain temporal correlation. To enable the CNN to explore temporal correlation between each input variable and its antecedent conditions, we stack the input image at time t on top of its antecedent conditions to form a 3-D volume. We set the number of lags to 2 (i.e., t − 1, t − 2) after preliminary experiments; thus, each input volume has dimensions 128 × 128 × 3.

In [1]:
#!pip install rioxarray xarray numpy

import rioxarray
import xarray as xr
import numpy as np

#### 1. Stack TWSA<sub>GLDAS</sub>, precipitation and temperature

In [2]:
gldas_ds = xr.open_mfdataset('./data/GLDAS/processed/TWSA_M_2002_2016_India_03125.nc4').transpose("lat", "lon", "time")

twsa_gldas_da = gldas_ds['twsa_gldas']
temp_da = gldas_ds['tair_f_inst']
precip_da = gldas_ds['rainf_f_tavg']

# normalise input
twsa_gldas_da = (twsa_gldas_da - twsa_gldas_da.mean()) / twsa_gldas_da.std()
temp_da = (temp_da - temp_da.mean()) / temp_da.std()
precip_da = (precip_da - precip_da.mean()) / precip_da.std()
#twsa_gldas_da = twsa_gldas_da / twsa_gldas_da.max()
#temp_da = temp_da / temp_da.max()
#precip_da = precip_da / precip_da.max()

In [3]:
# replace nan with 1e-7 for masking in Keras
twsa_gldas_da = twsa_gldas_da.fillna(1e-7)
temp_da = temp_da.fillna(1e-7)
precip_da = precip_da.fillna(1e-7)

In [4]:
T = twsa_gldas_da.shape[2]

# start with t=3 as t=0 is an empty entry
for t in range(3,T):
    twsa_image = np.stack((twsa_gldas_da[:,:,t], twsa_gldas_da[:,:,t-1], twsa_gldas_da[:,:,t-2]), axis=-1)
    np.save(f'./data/processed_input/twsa/image_{t}.npy', twsa_image)

    precip_image = np.stack((precip_da[:,:,t], precip_da[:,:,t-1], precip_da[:,:,t-2]), axis=-1)
    np.save(f'./data/processed_input/precip/image_{t}.npy', precip_image)
    
    temp_image = np.stack((temp_da[:,:,t], temp_da[:,:,t-1], temp_da[:,:,t-2]), axis=-1)
    np.save(f'./data/processed_input/temp/image_{t}.npy', temp_image)