In [1]:
import os
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import glob 

In [2]:
# Set the folder path where your .nc files are located
folder_path = '/home/paulc600/scratch/Easymore_SMMCatchment/*.nc'

In [3]:
# Set the output folder path where you want to save the text files
output_folder_path = '/home/paulc600/local/HYPE Inputs/'

In [4]:
# Get a list of all .nc files in the folder
file_list = glob.glob(folder_path)

In [5]:
# Open the .nc files using xarray's open_mfdataset
dataset = xr.open_mfdataset(file_list, combine='by_coords')


In [6]:
# Extract the time, precipitation, and temperature variables from the dataset
time = dataset['time']
precipitation = dataset['precipitation']
temperature = dataset['temperature']
ID = dataset['ID']

In [7]:
# read shapefile with extra river ID given to catchments without rivers
New_ID=gpd.read_file('/home/paulc600/SMM/SMM HYPE files/Modified_SMMcat.shp')

In [8]:
New_ID

Unnamed: 0,hru_nhm,seg_nhm,POI_ID,hru_id,hru_segmen,hru_id_tb,hru_segm_1,Type_NCA,HUC04,Coastal_HR,Shape_Leng,Shape_Area,geometry
0,113602,58183,6.500040e+13,113,3,3651,1723,0,0904,0,28637.472967,3.652090e+07,"POLYGON ((-112.87990 49.58915, -112.87988 49.5..."
1,113598,58184,6.500040e+13,109,4,3647,1724,0,0904,0,22917.257393,1.750700e+07,"POLYGON ((-112.84816 49.57293, -112.84776 49.5..."
2,113588,58185,6.500040e+13,99,5,3637,1725,0,0904,0,10941.686207,5.667200e+06,"POLYGON ((-112.83280 49.56569, -112.83283 49.5..."
3,113584,58186,6.500040e+13,95,6,3633,1726,0,0904,0,47581.126463,3.803350e+07,"POLYGON ((-112.83256 49.56479, -112.83243 49.5..."
4,113577,58188,6.500040e+13,88,8,3626,1728,0,0904,0,11865.104926,2.801800e+06,"POLYGON ((-112.93812 49.51740, -112.93809 49.5..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,114309,58671,2.300280e+13,699,0,4358,0,1,1005,0,202460.006602,3.860121e+08,"MULTIPOLYGON (((-108.52850 49.15148, -108.5286..."
469,114313,58672,2.300280e+13,703,0,4362,0,1,1005,0,295200.002203,9.586554e+08,"MULTIPOLYGON (((-108.67155 49.02478, -108.6714..."
470,114322,58673,2.300280e+13,712,0,4371,0,1,1005,0,188640.001102,2.876408e+08,"MULTIPOLYGON (((-108.96500 49.40234, -108.9650..."
471,114388,58674,2.300280e+13,778,0,4437,0,1,1005,0,105620.001701,2.727878e+08,"POLYGON ((-110.30901 48.72789, -110.30899 48.7..."


In [9]:
# Resample the precipitation array to daily sum
precipitation_daily = precipitation.resample(time='D').sum(dim='time')

In [10]:
# Resample the temperature array to daily mean
temperature_daily = temperature.resample(time='D').mean(dim='time')

In [11]:
# Convert precipitation_daily and temperature_daily to DataFrames
precipitation_df = precipitation_daily.to_dataframe(name='precipitation')
temperature_df = temperature_daily.to_dataframe(name='temperature')

In [12]:
# Reset the index of the DataFrames
precipitation_df = precipitation_df.reset_index()
temperature_df = temperature_df.reset_index()

In [13]:
# Convert ID column to integer
precipitation_df['ID'] = precipitation_df['ID'].astype(int)
temperature_df['ID'] = temperature_df['ID'].astype(int)
New_ID['seg_nhm'] = New_ID['seg_nhm'].astype(int)

In [14]:
# Format precipitation and temperature data to 3 significant figures
precipitation_df['precipitation'] = precipitation_df['precipitation'].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')
temperature_df['temperature'] = temperature_df['temperature'].apply(lambda x: '{:.3g}'.format(x) if not np.isnan(x) else '')

In [15]:
# Pivot the DataFrames to have each ID as a separate column
precipitation_pivoted = precipitation_df.pivot(index='time', columns='ID', values='precipitation')
temperature_pivoted = temperature_df.pivot(index='time', columns='ID', values='temperature')

In [16]:
New_ID.set_index('hru_nhm').loc[precipitation_pivoted.columns, 'seg_nhm']

ID
113490    58219
113492    58241
113494    58237
113496    58232
113497    58214
          ...  
114447    58610
114450    58389
114455    58570
114458    58579
114460    58465
Name: seg_nhm, Length: 473, dtype: int64

In [17]:
# replace ID with river ID from modified shapefile (including imaginary rivers)
precipitation_pivoted.columns=New_ID.set_index('hru_nhm').loc[precipitation_pivoted.columns, 'seg_nhm']
temperature_pivoted.columns=New_ID.set_index('hru_nhm').loc[temperature_pivoted.columns, 'seg_nhm']

In [18]:
precipitation_pivoted

seg_nhm,58219,58241,58237,58232,58214,58233,58213,58223,58221,58216,...,58448,58380,58342,58582,58405,58610,58389,58570,58579,58465
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,0.000248,0.00049,0.000477,0.000311,4.4e-05,0.000489,4.89e-05,0.000253,5.08e-05,0.000135,...,2.11e-05,1.92e-05,2.28e-05,3.37e-05,0,0,0,0,0,0
1980-01-02,0.000995,0.00119,0.00113,0.000397,0.00021,0.00101,0.000108,0.000345,7.29e-05,0.000212,...,0.000838,0.000904,0.000816,0.000667,0.000136,1.18e-05,8.14e-06,6.38e-05,6.14e-05,2.65e-05
1980-01-03,0.00443,0.00508,0.00467,0.000979,0.000361,0.00304,8.36e-05,0.00112,5.88e-05,0.000671,...,0.00148,0.0014,0.00147,0.00184,0.000359,0.000152,6.05e-05,9.33e-05,0.000161,0.000248
1980-01-04,0.00051,0.000564,0.000539,0.000253,0.000111,0.000505,4.73e-05,0.000215,2.97e-05,0.000118,...,0.00135,0.00168,0.00149,0.00214,0.000406,0.000297,0.000129,5.95e-05,3.99e-05,0.000104
1980-01-05,0.0151,0.0161,0.0159,0.0129,0.0103,0.0153,0.00997,0.0128,0.00995,0.0116,...,0.00707,0.00755,0.00709,0.00486,0.00433,0.00338,0.00242,0.00255,0.00285,0.00355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-12-29,0.000204,0.00024,0.000228,0.000107,5.15e-05,0.00018,5.12e-05,0.000111,4.79e-05,8.52e-05,...,6.97e-05,5.81e-05,7.94e-05,0.000123,6.5e-05,3.34e-05,9.78e-05,5.64e-05,3.54e-05,3.65e-05
1981-12-30,0.000182,0.000279,0.000243,1.47e-05,9.12e-06,7.37e-05,3.79e-08,2.48e-05,7.5e-07,3.4e-06,...,2.26e-05,1.97e-05,2.45e-05,2.91e-05,4.43e-05,7.84e-06,5.72e-06,1.84e-05,2.16e-05,4e-05
1981-12-31,0.00181,0.0021,0.00205,0.00148,0.000715,0.00193,0.000745,0.00138,0.00089,0.00108,...,6.51e-05,4.87e-05,5.56e-05,5.26e-05,0.000207,3.52e-05,2.96e-05,0.000131,0.000206,0.000398
1982-01-01,0.0011,0.00132,0.00121,0.000315,0.000206,0.000767,0.000129,0.000334,0.00011,0.000221,...,0.00292,0.00289,0.00273,0.0024,0.000531,5.85e-05,4.73e-05,0.000177,0.000245,0.000336


In [19]:
temperature_pivoted

seg_nhm,58219,58241,58237,58232,58214,58233,58213,58223,58221,58216,...,58448,58380,58342,58582,58405,58610,58389,58570,58579,58465
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,-2.97,-3.42,-3.38,-2.91,-1.85,-3.13,-1.61,-2.94,-1.23,-2.27,...,-6.73,-6.68,-6.87,-5.87,-3.38,-7.18,-7.43,-7.43,-5.7,-4.81
1980-01-02,-3.88,-3.79,-3.87,-5.74,-6.07,-3.85,-6.65,-5.6,-6.42,-5.8,...,-7.58,-7.7,-7.57,-8.39,-8.53,-9.38,-9.51,-10.1,-10.4,-9.92
1980-01-03,-5.12,-5.57,-5.56,-5.5,-4.07,-5.54,-3.86,-5.38,-3.85,-4.63,...,-5.41,-5.43,-5.32,-5.87,-4.93,-5.85,-5.91,-6.12,-6.86,-6.6
1980-01-04,-9.05,-8.82,-8.89,-9.62,-10.9,-8.91,-11.4,-9.84,-9.79,-10.2,...,-8.45,-8.52,-8.11,-8.78,-11,-12.9,-13.6,-14.2,-12.3,-10.8
1980-01-05,-12.4,-12.6,-12.7,-13.5,-13,-13.1,-13.2,-13.3,-13.4,-13.3,...,-11.5,-11.5,-11.3,-12.8,-10.2,-11.2,-11.4,-13.2,-14.2,-14.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-12-29,-17.9,-17.6,-17.7,-18.9,-20.9,-18.2,-21.7,-18.9,-22,-20.3,...,-31,-30.7,-31.5,-30.7,-29.2,-31.4,-31.9,-32.3,-32.4,-29.2
1981-12-30,-17.7,-17.2,-17.4,-20.3,-19.4,-17.6,-19.7,-20.7,-19.6,-20.3,...,-28.5,-28.4,-29.4,-28.3,-24.1,-25,-24.7,-27.6,-28.4,-26.6
1981-12-31,-14.8,-14.5,-14.6,-15.8,-17.7,-14.5,-18.7,-16.3,-19.9,-17.9,...,-21.8,-21.5,-21.5,-21.2,-22.3,-22.4,-22.8,-23.9,-24.3,-23
1982-01-01,-15.4,-15.3,-15.4,-17.3,-17.5,-15.6,-18,-17.4,-19.1,-17.9,...,-30.3,-30.6,-30.4,-28.9,-25.4,-26.6,-27.2,-28,-28.9,-27.5


In [20]:
# Save the DataFrames to text files with time and separate columns for each ID
precipitation_pivoted.to_csv(os.path.join(output_folder_path, 'Pobs.txt'), sep='\t', na_rep='', index_label='time')
temperature_pivoted.to_csv(os.path.join(output_folder_path, 'Tobs.txt'), sep='\t', na_rep='', index_label='time')