In [None]:
import os
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import glob 

Before reading the *.nc files, it is important to use `cdo` to `merge` all the `.nc` files for each year, and then read them:

In [None]:
# FIXME: Put the `cdo` code here as an example

In [None]:
# Set the folder path where your .nc files are located
folder_path = '/home/paulc600/scratch/HYPE_easymore/merged_easymore/*.nc'

In [None]:
# Set the output folder path where you want to save the text files
output_folder_path = '/home/paulc600/local/'

In [None]:
# Get a list of all .nc files in the folder
file_list = glob.glob(folder_path)

In [None]:
# Open the .nc files using xarray's open_mfdataset
dataset = xr.open_mfdataset(file_list, combine='by_coords')


In [None]:
# easymore names
time_id = 'time'
easymore_id = 'ID'
prec_esmr_name = 'precipitation'
temp_esmr_name = 'temperature'

In [None]:
# Extract the time, precipitation, and temperature variables from the dataset
time = dataset[time_id]
precipitation = dataset[prec_esmr_name]
temperature = dataset[temp_esmr_name]
ID = dataset[easymore_id]

In [None]:
# path to the modified catchment file (if modifications were necessary)
cat_path = '/home/paulc600/SMM/SMM HYPE files/Modified_SMMcat.shp'

In [None]:
# read shapefile with extra river ID given to catchments without rivers
New_ID=gpd.read_file(cat_path)

In [None]:
# checking
New_ID

In [None]:
# Resample the precipitation array to daily sum
precipitation_daily = precipitation.resample(time='D').sum(dim=time_id)

In [None]:
# Resample the temperature array to daily mean
temperature_daily = temperature.resample(time='D').mean(dim=time_id)

In [None]:
# FIXME: put your easymore and datatoole configuration and scripts in a folder inside this repository!

In [None]:
# Convert precipitation_daily and temperature_daily to DataFrames
precipitation_df = precipitation_daily.to_dataframe(name=prec_esmr_name)
temperature_df = temperature_daily.to_dataframe(name=temp_esmr_name)

In [None]:
# Reset the index of the DataFrames
precipitation_df = precipitation_df.reset_index()
temperature_df = temperature_df.reset_index()

In [None]:
# Assumptions
riv_seg_str = 'seg_nhm'
cat_str = 'hru_nhm'

In [None]:
# Convert ID column to integer
precipitation_df[easymore_id] = precipitation_df[easymore_id].astype(int)
temperature_df[easymore_id] = temperature_df[easymore_id].astype(int)
New_ID[riv_seg_str] = New_ID[riv_seg_str].astype(int)

In [None]:
# Format precipitation and temperature data to 3 significant figures
precipitation_df[prec_esmr_name] = precipitation_df[prec_esmr_name].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')
temperature_df[temp_esmr_name] = temperature_df[temp_esmr_name].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')

In [None]:
# Pivot the DataFrames to have each ID as a separate column
precipitation_pivoted = precipitation_df.pivot(index=time_id, columns=easymore_id, values=prec_esmr_name)
temperature_pivoted = temperature_df.pivot(index=time_id, columns=easymore_id, values=temp_esmr_name)

In [None]:
New_ID.set_index(cat_str).loc[precipitation_pivoted.columns, riv_seg_str]

In [None]:
# replace ID with river ID from modified shapefile (including imaginary rivers)
precipitation_pivoted.columns=New_ID.set_index(cat_str).loc[precipitation_pivoted.columns, riv_seg_str]
temperature_pivoted.columns=New_ID.set_index(cat_str).loc[temperature_pivoted.columns, riv_seg_str]

In [None]:
# Convert precipitation from meters to millimeters
precipitation_pivoted *= 1000

In [None]:
#checking
precipitation_pivoted

In [None]:
#checking
temperature_pivoted

In [None]:
# Save the DataFrames to text files with time and separate columns for each ID
precipitation_pivoted.to_csv(os.path.join(output_folder_path, 'Pobs.txt'), sep='\t', na_rep='', index_label='time')
temperature_pivoted.to_csv(os.path.join(output_folder_path, 'Tobs.txt'), sep='\t', na_rep='', index_label='time')

Disclaimer: this workflow will finally become fully automated and placed into a new Python package.