# CWatM input data

In [1]:
from pathlib import Path
from functools import reduce
import pandas as pd
import xarray as xr
from datetime import datetime as dt

from isimip_client.client import ISIMIPClient

In [2]:
RAW_CWATM_INPUT_FOLDER = Path("../data/raw/CWatM_input")

## Define function

In [3]:
def download_process_merge_ISIMIP_input(climate_variable,
                                        flux_conversion_factor=None,
                                        averaging_time=None):

    client = ISIMIPClient()

    # Search the ISIMIP repository using specifiers.
    # Max's TIP: in data.isimip.org, use the "Facets" "Sidebar view" to ge the specifiers key and values
    response = client.files(simulation_round="ISIMIP3a",
                            product="InputData",
                            climate_scenario="obsclim",
                            climate_forcing="gswp3-w5e5",
                            climate_variable=climate_variable
                            )

    # Download files
    for result in response["results"]:

        file_name = result["name"]
        if "1971_1980" in file_name or \
            "1981_1990" in file_name or \
            "1991_2000" in file_name or \
            "2001_2010" in file_name:

            print(f"Downloading file {file_name}")
            # Download file
            client.download(result["file_url"], path=RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable))
        else:
            print(f"Skipping {file_name} due to none matching years")

    ## Process and merge files
    files = RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable).glob("gswp3-w5e5*.nc")

    data_sum_list = []
    time_slice = slice(dt(1975, 1, 1), dt(2004, 12, 31))

    for file in files:

        print(f"Processing file {file.name}")

        data = xr.open_dataset(file)
        data_sliced = data.sel(time=time_slice)
        if flux_conversion_factor is not None:
            data_sliced = data_sliced * flux_conversion_factor
        data_sum = data_sliced.sum("time")

        data_sum_list.append(data_sum)

    print("Merging files")
    data_sum = reduce(lambda x, y: x + y, data_sum_list)

    if averaging_time == "day":
        time_slice_days = len(pd.date_range(start=dt(1975, 1, 1), end=dt(2004, 12, 31), freq="d"))
        data_mean = data_sum / time_slice_days

        print("Storing 'mean_1975_2004.nc' file")
        data_mean.to_netcdf(RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable, "mean_1975_2004.nc"))
    elif averaging_time == "year":
        time_slice_years = len(pd.date_range(start=dt(1975, 1, 1), end=dt(2004, 12, 31), freq="YE"))
        data_mean = data_sum / time_slice_years

        print("Storing 'mean_1975_2004.nc' file")
        data_mean.to_netcdf(RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable, "mean_1975_2004.nc"))
    else:
        print("Storing 'sum_1975_2004.nc' file")
        data_sum.to_netcdf(RAW_CWATM_INPUT_FOLDER.joinpath(climate_variable, "sum_1975_2004.nc"))

## Precipitation

In [None]:
CLIMATE_VARIABLE = "pr"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=86400, # Convert from kg/m²/s to mm/day
                                    averaging_time="year"
                                    )

## Temperature

In [None]:
CLIMATE_VARIABLE = "tas"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day"
                                    )

## Temperature max

In [None]:
CLIMATE_VARIABLE = "tasmax"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day"
                                    )

## Temperature min

In [None]:
CLIMATE_VARIABLE = "tasmin"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day"
                                    )

## Surface air pressure

In [None]:
CLIMATE_VARIABLE = "ps"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day"
                                    )

## Longwave radiation

In [None]:
CLIMATE_VARIABLE = "rlds"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=0.0864/2.45, # Convert from W/m² to mm/day
                                    averaging_time="year"
                                    )

## Shortwave radiation

In [None]:
CLIMATE_VARIABLE = "rsds"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    flux_conversion_factor=0.0864/2.45, # Convert from W/m² to mm/day
                                    averaging_time="year"
                                    )

## Wind

In [None]:
CLIMATE_VARIABLE = "sfcwind"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day")

## Relative humidity

In [None]:
CLIMATE_VARIABLE = "hurs"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day")

## Specific humidity

In [None]:
CLIMATE_VARIABLE = "huss"

download_process_merge_ISIMIP_input(CLIMATE_VARIABLE,
                                    averaging_time="day")

# Merge forcings

In [17]:
pr_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("pr", "mean_1975_2004.nc"))
tas_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tas", "mean_1975_2004.nc"))
tasmax_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tasmax", "mean_1975_2004.nc"))
tasmin_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("tasmin", "mean_1975_2004.nc"))
ps_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("ps", "mean_1975_2004.nc"))
rlds_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("rlds", "mean_1975_2004.nc"))
rsds_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("rsds", "mean_1975_2004.nc"))
sfcwind_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("sfcwind", "mean_1975_2004.nc"))
hurs_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("hurs", "mean_1975_2004.nc"))
huss_mean = xr.open_dataset(RAW_CWATM_INPUT_FOLDER.joinpath("huss", "mean_1975_2004.nc"))

data_mean = xr.merge([pr_mean,
                      tas_mean,
                      tasmax_mean,
                      tasmin_mean,
                      ps_mean,
                      rlds_mean,
                      rsds_mean,
                      sfcwind_mean,
                      hurs_mean,
                      huss_mean,
                      ])

data_mean.to_netcdf(Path("../data/processed/CWatM_data/forcings.nc"))