In [None]:
import os
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import glob 

In [None]:
# Set the folder path where your .nc files are located
folder_path = '/home/paulc600/scratch/HYPE_easymore/merged_easymore/*.nc'

In [None]:
# Set the output folder path where you want to save the text files
output_folder_path = '/home/paulc600/local/'

In [None]:
# Get a list of all .nc files in the folder
file_list = glob.glob(folder_path)

In [None]:
# Open the .nc files using xarray's open_mfdataset
dataset = xr.open_mfdataset(file_list, combine='by_coords')


In [None]:
# Extract the time, precipitation, and temperature variables from the dataset
time = dataset['time']
precipitation = dataset['precipitation']
temperature = dataset['temperature']
ID = dataset['ID']

In [None]:
# read shapefile with extra river ID given to catchments without rivers
New_ID=gpd.read_file('/home/paulc600/SMM/SMM HYPE files/Modified_SMMcat.shp')

In [None]:
New_ID

In [None]:
# Resample the precipitation array to daily sum
precipitation_daily = precipitation.resample(time='D').sum(dim='time')

In [None]:
# Resample the temperature array to daily mean
temperature_daily = temperature.resample(time='D').mean(dim='time')

In [None]:
# Convert precipitation_daily and temperature_daily to DataFrames
precipitation_df = precipitation_daily.to_dataframe(name='precipitation')
temperature_df = temperature_daily.to_dataframe(name='temperature')

In [None]:
# Reset the index of the DataFrames
precipitation_df = precipitation_df.reset_index()
temperature_df = temperature_df.reset_index()

In [None]:
# Convert ID column to integer
precipitation_df['ID'] = precipitation_df['ID'].astype(int)
temperature_df['ID'] = temperature_df['ID'].astype(int)
New_ID['seg_nhm'] = New_ID['seg_nhm'].astype(int)

In [None]:
# Format precipitation and temperature data to 3 significant figures
precipitation_df['precipitation'] = precipitation_df['precipitation'].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')
temperature_df['temperature'] = temperature_df['temperature'].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')

In [None]:
# Pivot the DataFrames to have each ID as a separate column
precipitation_pivoted = precipitation_df.pivot(index='time', columns='ID', values='precipitation')
temperature_pivoted = temperature_df.pivot(index='time', columns='ID', values='temperature')

In [None]:
New_ID.set_index('hru_nhm').loc[precipitation_pivoted.columns, 'seg_nhm']

In [None]:
# replace ID with river ID from modified shapefile (including imaginary rivers)
precipitation_pivoted.columns=New_ID.set_index('hru_nhm').loc[precipitation_pivoted.columns, 'seg_nhm']
temperature_pivoted.columns=New_ID.set_index('hru_nhm').loc[temperature_pivoted.columns, 'seg_nhm']

In [None]:
# Convert precipitation from meters to millimeters
precipitation_pivoted *= 1000

In [None]:
precipitation_pivoted

In [None]:
temperature_pivoted

In [None]:
# Save the DataFrames to text files with time and separate columns for each ID
precipitation_pivoted.to_csv(os.path.join(output_folder_path, 'Pobs.txt'), sep='\t', na_rep='', index_label='time')
temperature_pivoted.to_csv(os.path.join(output_folder_path, 'Tobs.txt'), sep='\t', na_rep='', index_label='time')