# Introduction

In this NoteBook, I am just testing to see whether I can create `Ostrich`'s `TPL` files using `MESHFlow`'s Jinja2 templating engines.

This may require some changes in the `Jinja2` templating files to be able to accept strings as values instead of numbers.

In [12]:
import meshflow as mf
import pandas as pd

import re
import json

from typing import (
    Dict,
    Union,
)
from pathlib import Path
from os import PathLike # Python 3.8+

We just directly go ahead and call the templating functions. But before that, it is important to parse the `CLASS` files and build the three distinct dictionaries out of it for the templating functions.

In [2]:
class_file = './wolf-creek-research-basin/mesh/MESH_parameters_CLASS.ini'

In [3]:
def _remove_comments(string) -> str:
    '''remove comment strings in the CLASS file strings'''
    #return re.sub(r'\s+\d{2}\s.*$', '', sections[0], flags=re.MULTILINE)
    return re.sub(r'\s+\d{2}\s(?:[^\n ]| (?! ))*$', '', string, flags=re.MULTILINE)

In [4]:
# function to read a section in form of a pandas.DataFrame
def class_section_divide(section: str, **read_csv_kwargs):
    '''Refer to the following link for comprehensive, and hard-coded
    values for the CLASS sections are implemented as there is no other
    way around it.
    '''
    # split lines
    lines = section.splitlines()

    # build a dictionary out of CLASS sections
    class_section = {}
    
    # vegetation parameters
    class_section['veg1'] = "\n".join(lines[:4])
    class_section['veg2'] = "\n".join(lines[4:7])
    
    # surface/hydraulic parameters
    class_section['hyd1'] = lines[7]
    class_section['hyd2'] = lines[8]

    # soil parameters
    class_section['soil'] = "\n".join(lines[9:12])

    # prognostic parameters
    class_section['prog1'] = lines[12] if len(lines[12]) > 0 else ""
    class_section['prog2'] = lines[13] if len(lines[13]) > 0 else ""
    class_section['prog3'] = lines[14] if len(lines[14]) > 0 else ""

    # return dictionary
    return class_section

In [5]:
def _parse_class_meta_data(
    case_section : str,
) -> Dict:
    """Parse the CLASS file's meta-data section to extract
    `info_entry` and `case_entry` dictionaries, necessary
    to run MESHFlow's `meshflow.utility.render_class_template`
    function.

    Parameters
    ----------
    case_section : str
        The section of the CLASS file that contains the meta-data.
        It should be a string containing the first four lines of the
        CLASS file, which are:
        - Title
        - Author
        - Place
        - Case information (centroid latitude, longitude, reference heights, etc.)


    Returns
    -------
    info_entry : Dict
        A dictionary containing the author and location information.
    case_entry : Dict
        A dictionary containing the case information, including:
        - Centroid latitude and longitude
        - Reference heights for wind speed, specific humidity, and air temperature
        - Reference height surface roughness
        - Number of land cover types (NL)
        - Number of soil types (NM)
    """
    # remove comments from the section
    case_section = _remove_comments(case_section)
    
    # hard-coded values based on different lines of the CLASS file
    # the indices refer to line numbers in the section
    title_line = case_section.splitlines()[0]
    author_line = case_section.splitlines()[1]
    place_line = case_section.splitlines()[2]
    case_line = case_section.splitlines()[3]

    # now building dictionaries that MESHFlow needs just here for
    # simplicity
    info_entry = {
        "author": author_line.strip(),
        "location": place_line.strip(),
    }

    # now building the `case_entry` containing extra meta-data information
    # about the data

    # first stripping and splitting the `case_line` string
    case_line = case_line.strip().split()
    # build `case_entry` key-value pairs, note that the keys are hard-coded
    # to match `MESHFlow`'s requirements
    case_entry = {
        "centroid_lat": float(case_line[0]), # float value
        "centroid_lon": float(case_line[1]), # float value
        "reference_height_wndspd": float(case_line[2]), # float value
        "reference_height_spechum_airtemp": float(case_line[3]), # float value
        "reference_height_surface_roughness": float(case_line[4]), # float value
        "NL": int(case_line[-2]), # integer value, number of sub-basins
        "NM": int(case_line[-1]), # integer value, number of GRU blocks
    }
    
    return info_entry, case_entry

In [6]:
def _determine_gru_type(
    line : str
) -> int:
    """
    Return the 1-based column index of the first numeric token that is exactly '1.000'
    (or numerically equal to 1.0 with three decimal places) in a line of mixed data.

    The line may contain:
      - Multiple spaces between columns
      - Trailing non-numeric descriptor fields (tokens containing any letter)
      - Other numeric fields (including things like 05)

    Parsing stops once a token containing any alphabetic character appears, assuming
    the remainder are descriptors rather than data columns.

    Args:
        line: A string containing whitespace-separated columns.

    Returns:
        The 1-based column number where the first 1.000 occurs, or None if not found.
    """
    tokens = line.strip().split()
    slice_len = min(5, len(tokens))
    
    # to track mixed GRU types and also 
    gru_type_sum = 0
    
    # iterate over the first line of the vegetation parameter section
    for i in range(slice_len):
        # if a distinct GRU, look for 1.000 value
        if tokens[i] == "1.000":
            return i + 1  # 1-based

        # Calculate the sum until this for loop breaks
        # or ends
        gru_type_sum += float(tokens[i])

    # FIXME: if sum equals to 1, then that means we deal with a mixed GRU
    #        type, and we will have to add the relevant feature to both
    #        MESHFlow and MESHFIAT;
    #        For now, find the first column without non-zero value
    if gru_type_sum == 1:
        for i in range(slice_len):
            if float(tokens[i]) > 0:
                return i + 1

    # Raise an error if it is not a valid CLASS field
    if gru_type_sum == 0:
        raise ValueError("Invalid CLASS GRU type")


In [7]:
def _parse_class_veg1(
    veg_section : str,
    gru_idx : int,
) -> Dict[str, float]:
    """
    """
    # the `veg_section` must only be 4 lines
    veg_lines = veg_section.splitlines()
    
    if len(veg_lines) != 4:
        raise ValueError("The vegetation section must have exactly 4 lines.")

    # gru index is the 1-based index of the GRU type
    # so the index of the first column is gru_idx - 1
    # for the 5th type, the second section of each block
    # will have a value of `0`
    idx = gru_idx - 1

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    if 1 <= gru_idx <= 4: # non-barren-land types
        veg_params = {
            # first-line parameters of the block
            'fcan': float(veg_lines[0].strip().split()[idx]),
            'lamx': float(veg_lines[0].strip().split()[idx + 5]),
            # second-line parameters
            'lnz0': float(veg_lines[1].strip().split()[idx]),
            'lamn': float(veg_lines[1].strip().split()[idx + 5]),
            # third-line parameters
            'alvc': float(veg_lines[2].strip().split()[idx]),
            'cmas': float(veg_lines[2].strip().split()[idx + 5]),
            # fourth-line parameters
            'alic': float(veg_lines[3].strip().split()[idx]),
            'root': float(veg_lines[3].strip().split()[idx + 5]),
        }

    elif gru_idx == 5:
        veg_params = {
            # first-line parameters of the block
            'fcan': float(veg_lines[0].strip().split()[idx]),
            'lamx': 0.0,
            # second-line parameters
            'lnz0': float(veg_lines[1].strip().split()[idx]),
            'lamn': 0.0,
            # third-line parameters
            'alvc': float(veg_lines[2].strip().split()[idx]),
            'cmas': 0.0,
            # fourth-line parameters
            'alic': float(veg_lines[3].strip().split()[idx]),
            'root': 0.0,
        }

    else:
        raise ValueError("Invalid GRU index. Must be between 1 and 5.")

    return veg_params

def _parse_class_veg2(
    veg_section : str,
    gru_idx : int,
) -> Dict[str, float]:
    """
    """
    # the `veg_section` must only be 3 lines
    veg_lines = veg_section.splitlines()
    
    if len(veg_lines) != 3:
        raise ValueError("The vegetation section must have exactly 4 lines.")

    # gru index is the 1-based index of the GRU type
    # so the index of the first column is gru_idx - 1
    # for the 5th type, the second section of each block
    # will have a value of `0`
    idx = gru_idx - 1

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    if 1 <= gru_idx <= 4: # non-barren-land types
        veg_params = {
            # first-line parameters of the block
            'rsmn': float(veg_lines[0].strip().split()[idx]),
            'qa50': float(veg_lines[0].strip().split()[idx + 5]),
            # second-line parameters
            'vpda': float(veg_lines[1].strip().split()[idx]),
            'vpdb': float(veg_lines[1].strip().split()[idx + 5]),
            # third-line parameters
            'psga': float(veg_lines[2].strip().split()[idx]),
            'psgb': float(veg_lines[2].strip().split()[idx + 5]),
        }

    elif gru_idx == 5:
        param_names = ['rsmn', 'qa50', 'vpda', 'vpdb', 'psga', 'psgb']
        veg_params = {k: 0.0 for k in param_names}

    else:
        raise ValueError("Invalid GRU index. Must be between 1 and 5.")

    return veg_params

def _parse_class_hyd1(
    hyd_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    hyd_line = _remove_comments(hyd_line)

    # strip and split based on whitespace
    hyd_line = hyd_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    veg_params = {
        'drn': float(hyd_line[0]),
        'sdep': float(hyd_line[1]),
        'fare': float(hyd_line[2]),
        'dd': float(hyd_line[3]),
    }

    return veg_params

def _parse_class_hyd2(
    hyd_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    hyd_line = _remove_comments(hyd_line)

    # strip and split based on whitespace
    hyd_line = hyd_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    hyd_params = {
        'xslp': float(hyd_line[0]),
        'xdrainh': float(hyd_line[1]),
        'mann': float(hyd_line[2]),
        'ksat': float(hyd_line[3]),
        'mid': " ".join(hyd_line[5:])
    }

    return hyd_params

def _parse_class_soil(
    soil_section : str,
) -> Dict[str, float]:
    """
    """

    # remove comments
    soil_section = _remove_comments(soil_section)

    # strip and split based on whitespace
    soil_lines = soil_section.splitlines()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    soil_params = {
        # first line parameters
        'sand1': float(soil_lines[0].strip().split()[0]),
        'sand2': float(soil_lines[0].strip().split()[1]),
        'sand3': float(soil_lines[0].strip().split()[2]),
        # second line parameters
        'clay1': float(soil_lines[1].strip().split()[0]),
        'clay2': float(soil_lines[1].strip().split()[1]),
        'clay3': float(soil_lines[1].strip().split()[2]),
        # third line parameters
        'orgm1': float(soil_lines[2].strip().split()[0]),
        'orgm2': float(soil_lines[2].strip().split()[1]),
        'orgm3': float(soil_lines[2].strip().split()[2]),
    }

    return soil_params

def _parse_class_prog1(
    prog_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    prog_line = _remove_comments(prog_line)

    # strip and split based on whitespace
    prog_line = prog_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    prog_params = {
        'tbar1': float(prog_line[0]),
        'tbar2': float(prog_line[1]),
        'tbar3': float(prog_line[2]),
        'tcan': float(prog_line[3]),
        'tsno': float(prog_line[4]),
        'tpnd': float(prog_line[5]),
    }

    return prog_params

def _parse_class_prog2(
    prog_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    prog_line = _remove_comments(prog_line)

    # strip and split based on whitespace
    prog_line = prog_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    prog_params = {
        'thlq1': float(prog_line[0]),
        'thlq2': float(prog_line[1]),
        'thlq3': float(prog_line[2]),
        'thic1': float(prog_line[3]),
        'thic2': float(prog_line[4]),
        'thic3': float(prog_line[5]),
        'zpnd': float(prog_line[6]),
    }

    return prog_params

def _parse_class_prog3(
    prog_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    prog_line = _remove_comments(prog_line)

    # strip and split based on whitespace
    prog_line = prog_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    prog_params = {
        'rcan': float(prog_line[0]),
        'scan': float(prog_line[1]),
        'sno': float(prog_line[2]),
        'albs': float(prog_line[3]),
        'rhos': float(prog_line[4]),
        'gro': float(prog_line[5])
    }

    return prog_params

In [8]:
def _analyze_class_file(
    class_file: Union[PathLike, str],
) -> Dict[str, Union[Dict, str]]:
    """
    Analyze the CLASS file and return a dictionary containing the parsed sections.

    Parameters
    ----------
    class_file : Union[PathLike | str]
        The path to the CLASS file to be analyzed.

    Returns
    -------
    Tuple[Dict[str, Union[Dict, str]]]
        A dictionary containing the parsed sections of the CLASS file.
    """
    # read the text file
    text = Path(class_file).read_text(encoding="utf-8")
    
    # Split where there is at least one completely blank line (possibly with spaces)
    sections = re.split(r'\r?\n\s*\r?\n', text.strip())
    
    # first section is typically the information section
    # the middle sections are CLASS computational unit blocks, each
    #     containing vegetation, soil, hydrology, and prognostic parameters
    # the last section are the dates that should not be processed and 
    #     its content does not matter for the analysis

    # building dictionaries out of the first section needed for 
    # MESHFLOW's `meshflow.utility.render_class_template` function
    info_entry, case_entry = \
        _parse_class_meta_data(sections[0])

    # create an empty gru_entry dictionary to be further
    # populated by the following iterative loop
    gru_entry = {}

    # iterating over the sections until the last one
    for idx, section in enumerate(sections[1:-1], start=1):
        # divide the section into a dictionary of sections
        class_section = class_section_divide(section=section)

        # determine GRU type, based on CLASS assumptions:
        #    1. needleleaf forest
        #    2. broadleaf forest
        #    3. cropland
        #    4. grassland
        #    5. urban, barren land, or imprevious area
        section_landcover_type = _determine_gru_type(
            line=class_section['veg1'].splitlines()[0],
        )
        # based on the number extracted above, we can name the
        # GRU class
        class_name_dict = {
            1: "needleleaf",
            2: "broadleaf",
            3: "crop",
            4: "grassland",
            5: "urban",
        }

        # parse the sections -- hard-coded as there are no
        # other alternatives
        veg1_params = _parse_class_veg1(
            veg_section=class_section['veg1'],
            gru_idx=section_landcover_type,
        )
        veg2_params = _parse_class_veg2(
            veg_section=class_section['veg2'],
            gru_idx=section_landcover_type,
        )
        hyd1_params = _parse_class_hyd1(
            hyd_line=class_section['hyd1'],
        )
        hyd2_params = _parse_class_hyd2(
            hyd_line=class_section['hyd2'],
        )
        soil_params = _parse_class_soil(
            soil_section=class_section['soil'],
        )
        prog1_params = _parse_class_prog1(
            prog_line=class_section['prog1'],
        )
        prog2_params = _parse_class_prog2(
            prog_line=class_section['prog2'],
        )
        prog3_params = _parse_class_prog3(
            prog_line=class_section['prog3'],
        )

        # make a list of parameters for easier literal unpacking inside
        # the gru_entry dictionary
        param_list = [
            veg1_params,
            veg2_params,
            hyd1_params,
            hyd2_params,
            soil_params,
            prog1_params,
            prog2_params,
            prog3_params,
        ]

        # make sure to make an exception for water-like land covers
        if 'water' in hyd2_params['mid'].lower():
            class_type = 'water'
        elif 'snow' in hyd2_params['mid'].lower():
            class_type = 'water'
        elif 'ice' in hyd2_params['mid'].lower():
            class_type = 'water'
        else:
            class_type = class_name_dict[section_landcover_type]

        # adding class type info
        gru_entry[idx] = {
            'class': class_type,
        }
        # adding parameters
        gru_entry[idx].update({k: v for d in param_list for k, v in d.items()})

    return case_entry, info_entry, gru_entry

In [9]:
case_entry, info_entry, gru_entry = _analyze_class_file(class_file=class_file)

Now that we have the necessary dictionary, we can reproduce the CLASS file:

In [10]:
c = mf.utility.render_class_template(
    class_case=case_entry,
    class_info=info_entry,
    class_grus=gru_entry,
)

In [20]:
print(json.dumps(gru_entry, indent=2,))

{
  "1": {
    "class": "needleleaf",
    "fcan": 1.0,
    "lamx": 1.27,
    "lnz0": 0.166,
    "lamn": 0.975,
    "alvc": 0.044,
    "cmas": 4.718,
    "alic": 0.169,
    "root": 1.21,
    "rsmn": 212.33,
    "qa50": 0.0,
    "vpda": 0.46,
    "vpdb": 0.0,
    "psga": 100.0,
    "psgb": 0.0,
    "drn": 1.0,
    "sdep": 2.0,
    "fare": 1.0,
    "dd": 50.0,
    "xslp": 0.03,
    "xdrainh": 0.37,
    "mann": 0.11,
    "ksat": 0.446,
    "mid": "Needleleaf Forest (temperate)",
    "sand1": 81.73,
    "sand2": 81.73,
    "sand3": 81.73,
    "clay1": 10.53,
    "clay2": 10.53,
    "clay3": 10.53,
    "orgm1": 0.0,
    "orgm2": 0.0,
    "orgm3": 0.0,
    "tbar1": 4.0,
    "tbar2": 2.0,
    "tbar3": 1.0,
    "tcan": 4.0,
    "tsno": 0.0,
    "tpnd": 4.0,
    "thlq1": 0.25,
    "thlq2": 0.15,
    "thlq3": 0.04,
    "thic1": 0.0,
    "thic2": 0.0,
    "thic3": 0.0,
    "zpnd": 0.0,
    "rcan": 0.0,
    "scan": 0.0,
    "sno": 0.0,
    "albs": 0.0,
    "rhos": 0.0,
    "gro": 1.0
  },
  "2": {


Let's also analyze the `hydrology` file and build that as well:

In [None]:
def hydrology_section_divide(
    hydrology_file: os.Pathlike | str,
) -> List[str]:
    """
    """

    return

In [None]:
def _analyze_hydrology(
    hydrology_file : Union[os.PathLike, str],
) -> Dict[str, Union[int, float]]:
    """
    """
    # the first block is the routing block

    return