# CWatM input data

In [15]:
from pathlib import Path
from functools import reduce
import pandas as pd
import xarray as xr
from datetime import datetime as dt

from isimip_client.client import ISIMIPClient

In [None]:
RAW_CWATM_INPUT_FOLDER = Path("../data/raw/CWatM_input")

## Define function

In [3]:
def download_process_merge_ISIMIP_input(climate_variable,
                                        flux_conversion_factor=None):

    client = ISIMIPClient()

    # Search the ISIMIP repository using specifiers.
    # Max's TIP: in data.isimip.org, use the "Facets" "Sidebar view" to ge the specifiers key and values
    response = client.files(simulation_round="ISIMIP3a",
                            product="InputData",
                            climate_scenario="obsclim",
                            climate_forcing="gswp3-w5e5",
                            climate_variable=climate_variable
                            )

    # Download files
    for result in response["results"]:

        file_name = result["name"]
        if "1971_1980" in file_name or \
            "1981_1990" in file_name or \
            "1991_2000" in file_name or \
            "2001_2010" in file_name:

            print(f"Downloading file {file_name}")
            # Download file
            client.download(result["file_url"], path=RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable))
        else:
            print(f"Skipping {file_name} due to none matching years")

    ## Process and merge files
    files = RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable).glob("gswp3-w5e5*.nc")

    data_sum_list = []
    time_slice = slice(dt(1975, 1, 1), dt(2004, 12, 31))

    for file in files:

        print(f"Processing file {file.name}")

        data = xr.open_dataset(file)
        data_sliced = data.sel(time=time_slice)
        if flux_conversion_factor is not None:
            data_sliced = data_sliced * flux_conversion_factor
        data_sum = data_sliced.sum("time")

        data_sum_list.append(data_sum)

    print("Merging files")
    data_sum = reduce(lambda x, y: x + y, data_sum_list)

    print("Storing 'sum_1975_2004.nc' file")
    data_sum.to_netcdf(RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable, "sum_1975_2004.nc"))

## Precipitation

In [None]:
CLIMATE_VARIABLE = "pr"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=86400 # Convert from kg/m²/s to mm/day
                                    )

## Temperature

In [None]:
CLIMATE_VARIABLE = "tas"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Temperature max

In [None]:
CLIMATE_VARIABLE = "tasmax"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Temperature min

In [None]:
CLIMATE_VARIABLE = "tasmin"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Surface air pressure

In [None]:
CLIMATE_VARIABLE = "ps"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Longwave radiation

In [None]:
CLIMATE_VARIABLE = "rlds"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=0.0864/2.45 # Convert from W/m² to mm/day
                                    )

## Shortwave radiation

In [None]:
CLIMATE_VARIABLE = "rsds"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=0.0864/2.45 # Convert from W/m² to mm/day
                                    )

## Wind

In [None]:
CLIMATE_VARIABLE = "sfcwind"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Relative humidity

In [None]:
CLIMATE_VARIABLE = "hurs"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

## Specific humidity

In [None]:
CLIMATE_VARIABLE = "huss"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE)

# Process and aggregate

In [17]:
time_slice_days = len(pd.date_range(start=dt(1975, 1, 1), end=dt(2004, 12, 31), freq="d"))
time_slice_years = len(pd.date_range(start=dt(1975, 1, 1), end=dt(2004, 12, 31), freq="YE"))

pr_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("pr", "sum_1975_2004.nc"))
tas_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tas", "sum_1975_2004.nc"))
tasmax_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tasmax", "sum_1975_2004.nc"))
tasmin_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tasmin", "sum_1975_2004.nc"))
ps_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("ps", "sum_1975_2004.nc"))
rlds_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("rlds", "sum_1975_2004.nc"))
rsds_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("rsds", "sum_1975_2004.nc"))
sfcwind_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("sfcwind", "sum_1975_2004.nc"))
hurs_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("hurs", "sum_1975_2004.nc"))
huss_sum = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("huss", "sum_1975_2004.nc"))

mean_per_year = xr.merge([pr_sum,
                          rlds_sum,
                          rsds_sum]) / time_slice_years
mean_per_day = xr.merge([tas_sum,
                         tasmax_sum,
                         tasmin_sum,
                         ps_sum,
                         sfcwind_sum,
                         hurs_sum,
                         huss_sum]) / time_slice_days

data_mean = xr.merge([mean_per_year, mean_per_day])

data_mean.to_netcdf(Path("../data/processed/CWatM_data/forcings.nc"))

data_mean.to_dataframe().to_parquet(Path("../data/processed/CWatM_data/forcings.parquet"))