## Preprocess HSAF-HRES Data (part 2)

This script is used to 

- Fill in the gap hours in `HSAF_pr.nc`
- Allign `HSAF_pr.nc` with `HRES_pr.nc`
- Fix HSAF-HRES coordinates
- Remove negative values in both hres and hsaf

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
from py_env_hpc import *

# Step 1: Fill data gaps in HSAF dataset
print ("Step 1: Fill data gaps in HSAF dataset")
hsaf_path = ATMOS_DATA + "/HSAF_pr.nc"
variable = "pr"

hsaf_dataset = xr.open_dataset(hsaf_path)
hsaf_dataset_time_index = pd.to_datetime(hsaf_dataset.time.values)
expected_time_range = pd.date_range(start=str(hsaf_dataset_time_index[0]), end=str(hsaf_dataset_time_index[-1]), freq='H')
missing_hours = expected_time_range[~expected_time_range.isin(hsaf_dataset_time_index)]
hsaf_dataset_filled = hsaf_dataset.reindex(time=expected_time_range)
hsaf_dataset_filled[variable] = hsaf_dataset_filled[variable].where(hsaf_dataset_filled[variable].notnull(), np.nan)
HSAF=hsaf_dataset_filled
print("Data gaps are filled in.")

# Step 2: Align and save HSAF and HRES datasets
print ("Step 2: Align HSAF and HRES datasets")

date_start = "2020-07-01T13"
date_end = "2023-04-26T23"
variable = "pr"

hres_path = ATMOS_DATA + "/HRES_pr.nc"

HRES = xr.open_dataset(hres_path).sel(time=slice(date_start, date_end))
HSAF = HSAF.sel(time=slice(date_start, date_end))

HRES, HSAF = xr.align(HRES, HSAF, join="override")

encoding = {'qind': {'_FillValue': False, 'missing_value': False}}  # Specify appropriate values for _FillValue and missing_value
print("Datasets aligned")

# Step 3: Convert lat/lon to integers
print("Step 3: Convert lat/lon to integers")
HRES = HRES.assign_coords(latitude=HRES.latitude.round(2), longitude=HRES.longitude.round(2))
HSAF = HSAF.assign_coords(latitude=HSAF.latitude.round(2), longitude=HSAF.longitude.round(2))

HRES = HRES.assign_coords(longitude=np.where(HRES.longitude == -0., 0, HRES.longitude))
HSAF = HSAF.assign_coords(longitude=np.where(HSAF.longitude == -0., 0, HSAF.longitude))
print("Latitude and longitude converted to integers.")

# Step 4: Replace negative values with zero
print("Step 4: Replace negative values with zero")
HRES['pr'] = xr.where(HRES['pr'] < 0, 0, HRES['pr'])
HSAF['pr'] = xr.where(HSAF['pr'] < 0, 0, HSAF['pr'])

HSAF.to_netcdf(hsaf_path+".pp2", mode='w', encoding=encoding)
HRES.to_netcdf(hres_path+".pp2", mode='w')

Step 1: Fill data gaps in HSAF dataset
Data gaps are filled in.
Step 2: Align HSAF and HRES datasets
Datasets aligned
Step 3: Convert lat/lon to integers
Latitude and longitude converted to integers.
