In [80]:
import os
import h5py
import pandas as pd
import numpy as np

from glob import glob

In [None]:
# Input list of RAS Folders

# Add as many folders as needed
ras_folder_list = []

# Give a save file name
save_file = 

In [None]:
class FlowExtraction:
    def __init__(self, folder_list, save_file) -> None:
        self.ras_folder_list = folder_list
        self.save_file = save_file

        self.plan_files = self.get_plan_files()
        self.plan_files = self.remove_hdfs()
        self.flow_list = self.hdf_attrb()[0]
        self.ref_line_names = self.hdf_attrb()[1]
        self.flow_extract()

    def get_plan_files(self):
        # Iterate over each RAS Folder and get all .p**.hdf files
        plan_files = []
        # This gives a list of all plan files in the selected RAS Folder
        plan_files = [
            file_path
            for folder in self.ras_folder_list
            for file_path in glob(os.path.join(folder, "*.p*.hdf"), recursive=True)
        ]
        return plan_files
    
    # Removes found HDF files which does not
    # contain any /Reference Lines path in it
    def remove_hdfs(self):
        """
        This function do the following:

        1. Read the HDF and removes all HDF files not containing a Reference Line node in it. 
        2. Updates the plan_files list only retains HDF files with Reference Lines
        
        """
        for plan in self.plan_files:
            with h5py.File(plan, 'r') as file:
                ref_line_path = '/Geometry/Reference Lines/Attributes'
                if ref_line_path not in file:
                    self.plan_files.remove(plan)
        
        return self.plan_files

    def hdf_attrb(self):
        """
        This function do the following:

        1. Reads the filtered HDF files
        2. Reads the Flow Title node and appends it, if it does not exist, in flow_list
        3. Reads the Reference Line Name node and appends unique values in the ref_list
        """
        flow_list = []
        for plan in self.plan_files:
            with h5py.File(plan, 'r') as file:
                plan_info = file['/Plan Data/Plan Information']
                ref_line_path = file['/Geometry/Reference Lines/Attributes']

                # Build a list of Flow Title
                flow_name = plan_info.attrs['Flow Title'].decode('utf-8')
                if flow_name not in flow_list:
                    flow_list.append(flow_name)
                
                # Build a list of Reference Line names
                ref_line_names = np.array(ref_line_path)
                ref_line_names = [x[0].decode('utf-8') for x in ref_line_names]
                ref_line_names = list(set(ref_line_names))

            
        return flow_list, ref_line_names
    
    def flow_extract(self):
        """
        This function 
        1. Extracts flow in each filtered HDF. 
        2. Creates an Excel file each sheet name corresponds to the Flow Title.
        3. Skips an HDF file if its error. 
        """
        hdf_error_list = []
        for plan in self.plan_files:
            try:
                    with h5py.File(plan, 'r') as file:
                            timestamp = [x.decode() for x in np.array(file['/Results/Unsteady/Output/Output Blocks/DSS Hydrograph Output/Unsteady Time Series/Time Date Stamp'])]
                            ts = np.abs(file[
                                    '/Results/Unsteady/Output/Output Blocks/'
                                    'DSS Hydrograph Output/Unsteady Time Series/Reference Lines/Flow'
                                    ])
                            
                            # Reference Line Names
                            ref_line_path = file['/Geometry/Reference Lines/Attributes']
                            ref_line_names = np.array(ref_line_path)
                            ref_line_names = [x[0].decode('utf-8') for x in ref_line_names]

                            # Flow Title
                            plan_info = file['/Plan Data/Plan Information']
                            flow_name = plan_info.attrs['Flow Title'].decode('utf-8')

                            df = pd.DataFrame(ts, index=timestamp, columns=ref_line_names)
                            df.index = pd.to_datetime(df.index, 
                                                    format='%d%b%Y %H:%M:%S'
                                                    )
                            if not os.path.exists(self.save_file):
                                    df.to_excel(save_file, sheet_name=flow_name)
                                    
                            else:
                                    with pd.ExcelWriter(self.save_file,
                                                            mode='a',
                                                            engine= 'openpyxl',
                                                            if_sheet_exists='replace') as writer:
                                            df.to_excel(writer, sheet_name=flow_name)
            except Exception:
                    print(f'Skipping {os.path.basename(plan)} due to error.')
                    continue



In [None]:
FlowExtraction(folder_list=ras_folder_list,save_file=save_file)

Skipping lagnas.p04.hdf due to error.


<__main__.NlexFlows at 0x1ff401d2990>