# Introduction

In this NoteBook, I am just testing to see whether I can create `Ostrich`'s `TPL` files using `MESHFlow`'s Jinja2 templating engines.

This may require some changes in the `Jinja2` templating files to be able to accept strings as values instead of numbers.

In [1]:
import meshflow as mf

from typing import (
    Dict,
    Union,
)

We just directly go ahead and call the templating functions. But before that, it is important to parse the `CLASS` files and build the three distinct dictionaries out of it for the templating functions.

In [2]:
class_file = './wolf-creek-research-basin/mesh/MESH_parameters_CLASS.ini'

In [3]:
# import libraries
import re

from pathlib import Path
from io import StringIO

import pandas as pd

# read the text file
text = Path(class_file).read_text(encoding="utf-8")

# Split where there is at least one completely blank line (possibly with spaces)
sections = re.split(r'\r?\n\s*\r?\n', text.strip())

for i, sec in enumerate(sections, start=1):
    print(f"Section {i} has {len(sec)} characters")

Section 1 has 377 characters
Section 2 has 1438 characters
Section 3 has 1438 characters
Section 4 has 1438 characters
Section 5 has 1438 characters
Section 6 has 1438 characters
Section 7 has 1409 characters
Section 8 has 1409 characters
Section 9 has 1438 characters
Section 10 has 1438 characters
Section 11 has 1438 characters
Section 12 has 1438 characters
Section 13 has 1438 characters
Section 14 has 357 characters


In [4]:
def _remove_comments(string) -> str:
    '''remove comment strings in the CLASS file strings'''
    #return re.sub(r'\s+\d{2}\s.*$', '', sections[0], flags=re.MULTILINE)
    return re.sub(r'\s+\d{2}\s(?:[^\n ]| (?! ))*$', '', string, flags=re.MULTILINE)

In [5]:
# function to read a section in form of a pandas.DataFrame
def class_section_divide(section: str, **read_csv_kwargs):
    '''Refer to the following link for comprehensive, and hard-coded
    values for the CLASS sections are implemented as there is no other
    way around it.
    '''
    # split lines
    lines = section.splitlines()

    # build a dictionary out of CLASS sections
    class_section = {}
    
    # vegetation parameters
    class_section['veg1'] = "\n".join(lines[:4])
    class_section['veg2'] = "\n".join(lines[4:7])
    
    # surface/hydraulic parameters
    class_section['hyd1'] = lines[7]
    class_section['hyd2'] = lines[8]

    # soil parameters
    class_section['soil'] = "\n".join(lines[9:12])

    # prognostic parameters
    class_section['prog1'] = lines[12] if len(lines[12]) > 0 else ""
    class_section['prog2'] = lines[13] if len(lines[13]) > 0 else ""
    class_section['prog3'] = lines[14] if len(lines[14]) > 0 else ""

    # return dictionary
    return class_section

In [6]:
def _parse_class_meta_data(
    case_section : str,
) -> Dict:
    """Parse the CLASS file's meta-data section to extract
    `info_entry` and `case_entry` dictionaries, necessary
    to run MESHFlow's `meshflow.utility.render_class_template`
    function.

    Parameters
    ----------
    case_section : str
        The section of the CLASS file that contains the meta-data.
        It should be a string containing the first four lines of the
        CLASS file, which are:
        - Title
        - Author
        - Place
        - Case information (centroid latitude, longitude, reference heights, etc.)


    Returns
    -------
    info_entry : Dict
        A dictionary containing the author and location information.
    case_entry : Dict
        A dictionary containing the case information, including:
        - Centroid latitude and longitude
        - Reference heights for wind speed, specific humidity, and air temperature
        - Reference height surface roughness
        - Number of land cover types (NL)
        - Number of soil types (NM)
    """
    # remove comments from the section
    case_section = _remove_comments(case_section)
    
    # hard-coded values based on different lines of the CLASS file
    # the indices refer to line numbers in the section
    title_line = case_section.splitlines()[0]
    author_line = case_section.splitlines()[1]
    place_line = case_section.splitlines()[2]
    case_line = case_section.splitlines()[3]

    # now building dictionaries that MESHFlow needs just here for
    # simplicity
    info_entry = {
        "author": author_line.strip(),
        "location": place_line.strip(),
    }

    # now building the `case_entry` containing extra meta-data information
    # about the data

    # first stripping and splitting the `case_line` string
    case_line = case_line.strip().split()
    # build `case_entry` key-value pairs, note that the keys are hard-coded
    # to match `MESHFlow`'s requirements
    case_entry = {
        "centroid_lat": float(case_line[0]), # float value
        "centroid_lon": float(case_line[1]), # float value
        "reference_height_wndspd": float(case_line[2]), # float value
        "reference_height_spechum_airtemp": float(case_line[3]), # float value
        "reference_height_surface_roughness": float(case_line[4]), # float value
        "NL": int(case_line[-2]), # integer value, number of sub-basins
        "NM": int(case_line[-1]), # integer value, number of GRU blocks
    }
    
    return info_entry, case_entry

In [7]:
def _determine_gru_type(
    line : str
) -> int:
    """
    Return the 1-based column index of the first numeric token that is exactly '1.000'
    (or numerically equal to 1.0 with three decimal places) in a line of mixed data.

    The line may contain:
      - Multiple spaces between columns
      - Trailing non-numeric descriptor fields (tokens containing any letter)
      - Other numeric fields (including things like 05)

    Parsing stops once a token containing any alphabetic character appears, assuming
    the remainder are descriptors rather than data columns.

    Args:
        line: A string containing whitespace-separated columns.

    Returns:
        The 1-based column number where the first 1.000 occurs, or None if not found.
    """
    tokens = line.strip().split()
    slice_len = min(5, len(tokens))
    
    # to track mixed GRU types and also 
    gru_type_sum = 0
    
    # iterate over the first line of the vegetation parameter section
    for i in range(slice_len):
        # if a distinct GRU, look for 1.000 value
        if tokens[i] == "1.000":
            return i + 1  # 1-based

        # Calculate the sum until this for loop breaks
        # or ends
        gru_type_sum += float(tokens[i])

    # FIXME: if sum equals to 1, then that means we deal with a mixed GRU
    #        type, and we will have to add the relevant feature to both
    #        MESHFlow and MESHFIAT;
    #        For now, find the first column without non-zero value
    if gru_type_sum == 1:
        for i in range(slice_len):
            if float(tokens[i]) > 0:
                return i + 1

    # Raise an error if it is not a valid CLASS field
    if gru_type_sum == 0:
        raise ValueError("Invalid CLASS GRU type")


In [8]:
def _parse_class_veg1(
    veg_section : str,
    gru_idx : int,
) -> Dict[str, float]:
    """
    """
    # the `veg_section` must only be 4 lines
    veg_lines = veg_section.splitlines()
    
    if len(veg_lines) != 4:
        raise ValueError("The vegetation section must have exactly 4 lines.")

    # gru index is the 1-based index of the GRU type
    # so the index of the first column is gru_idx - 1
    # for the 5th type, the second section of each block
    # will have a value of `0`
    idx = gru_idx - 1

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    if 1 <= gru_idx <= 4: # non-barren-land types
        veg_params = {
            # first-line parameters of the block
            'fcan': float(veg_lines[0].strip().split()[idx]),
            'lamx': float(veg_lines[0].strip().split()[idx + 5]),
            # second-line parameters
            'lnz0': float(veg_lines[1].strip().split()[idx]),
            'lamn': float(veg_lines[1].strip().split()[idx + 5]),
            # third-line parameters
            'alvc': float(veg_lines[2].strip().split()[idx]),
            'cmas': float(veg_lines[2].strip().split()[idx + 5]),
            # fourth-line parameters
            'alic': float(veg_lines[3].strip().split()[idx]),
            'root': float(veg_lines[3].strip().split()[idx + 5]),
        }

    elif gru_index == 5:
        veg_params = {
            # first-line parameters of the block
            'fcan': float(veg_lines[0].strip().split()[idx]),
            'lamx': 0.0,
            # second-line parameters
            'lnz0': float(veg_lines[1].strip().split()[idx]),
            'lamn': 0.0,
            # third-line parameters
            'alvc': float(veg_lines[2].strip().split()[idx]),
            'cmas': 0.0,
            # fourth-line parameters
            'alic': float(veg_lines[3].strip().split()[idx]),
            'root': 0.0,
        }

    else:
        raise ValueError("Invalid GRU index. Must be between 1 and 5.")

    return veg_params

In [9]:
def _parse_class_veg2(
    veg_section : str,
    gru_idx : int,
) -> Dict[str, float]:
    """
    """
    # the `veg_section` must only be 3 lines
    veg_lines = veg_section.splitlines()
    
    if len(veg_lines) != 3:
        raise ValueError("The vegetation section must have exactly 4 lines.")

    # gru index is the 1-based index of the GRU type
    # so the index of the first column is gru_idx - 1
    # for the 5th type, the second section of each block
    # will have a value of `0`
    idx = gru_idx - 1

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    if 1 <= gru_idx <= 4: # non-barren-land types
        veg_params = {
            # first-line parameters of the block
            'rsmn': float(veg_lines[0].strip().split()[idx]),
            'qa50': float(veg_lines[0].strip().split()[idx + 5]),
            # second-line parameters
            'vpda': float(veg_lines[1].strip().split()[idx]),
            'vpdb': float(veg_lines[1].strip().split()[idx + 5]),
            # third-line parameters
            'psga': float(veg_lines[2].strip().split()[idx]),
            'psgb': float(veg_lines[2].strip().split()[idx + 5]),
        }

    elif gru_index == 5:
        param_names = ['rsmn', 'qa50', 'vpda', 'vpdb', 'psga', 'psgb']
        veg_params = {k: 0.0 for k in param_names}

    else:
        raise ValueError("Invalid GRU index. Must be between 1 and 5.")

    return veg_params

In [10]:
def _parse_class_hyd1(
    hyd_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    hyd_line = _remove_comments(hyd_line)

    # strip and split based on whitespace
    hyd_line = hyd_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    veg_params = {
        'drn': float(hyd_line[0]),
        'sdep': float(hyd_line[1]),
        'fare': float(hyd_line[2]),
        'dd': float(hyd_line[3]),
    }

    return veg_params

def _parse_class_hyd2(
    hyd_line : str,
) -> Dict[str, float]:
    """
    """
    # remove comments
    hyd_line = _remove_comments(hyd_line)

    # strip and split based on whitespace
    hyd_line = hyd_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    veg_params = {
        'xslp': float(hyd_line[0]),
        'xdrainh': float(hyd_line[1]),
        'mann': float(hyd_line[2]),
        'ksat': float(hyd_line[3]),
        'mid': " ".join(hyd_line[5:])
    }

    return veg_params

In [None]:
def _parse_class_soil(
    soil_section : str,
) -> Dict[str, float]:
    """
    """

    # remove comments
    hyd_line = _remove_comments(hyd_line)

    # strip and split based on whitespace
    hyd_line = hyd_line.strip().split()

    # please note that the parameters are hard-coded and match the inputs
    # of MESHFlow's `meshflow.utility.render_class_template` function.
    veg_params = {
        'xslp': float(hyd_line[0]),
        'xdrainh': float(hyd_line[1]),
        'mann': float(hyd_line[2]),
        'ksat': float(hyd_line[3]),
        'mid': " ".join(hyd_line[5:])
    }

    return veg_params

In [11]:
sec = class_section_divide(section=sections[3])

In [12]:
sec['veg1']

'   0.500   0.400   0.000   0.100   0.000   1.270   4.148   0.000   1.964     05 5xFCAN/4xLAMX\n   0.166   0.475   0.000  -2.303   0.000   0.975   0.512   0.000   0.863     06 5xLNZ0/4xLAMN\n   0.044   0.049   0.000   0.042   0.000   4.718  23.583   0.000   2.240     07 5xALVC/4xCMAS\n   0.169   0.257   0.000   0.265   0.000   1.210   1.249   0.000   1.750     08 5xALIC/4xROOT'

In [13]:
_determine_gru_type(sec['veg1'].splitlines()[0])

1

In [14]:
_parse_class_meta_data(sections[0])

({'author': 'Kasra Keshavarz', 'location': 'University of Calgary'},
 {'centroid_lat': 51.59,
  'centroid_lon': -116.41,
  'reference_height_wndspd': 40.0,
  'reference_height_spechum_airtemp': 40.0,
  'reference_height_surface_roughness': 50.0,
  'NL': 41,
  'NM': 12})

In [15]:
_parse_class_veg1(
    veg_section=sec['veg1'],
    gru_idx=_determine_gru_type(sec['veg1'].splitlines()[0]),
)

{'fcan': 0.5,
 'lamx': 1.27,
 'lnz0': 0.166,
 'lamn': 0.975,
 'alvc': 0.044,
 'cmas': 4.718,
 'alic': 0.169,
 'root': 1.21}

In [16]:
_parse_class_veg2(
    veg_section=sec['veg2'],
    gru_idx=_determine_gru_type(sec['veg1'].splitlines()[0]),
)

{'rsmn': 212.33,
 'qa50': 37.506,
 'vpda': 0.46,
 'vpdb': 0.765,
 'psga': 100.0,
 'psgb': 5.0}

In [17]:
_parse_class_hyd1(
    hyd_line=sec['hyd1'],
)

   1.000   2.000   1.000  50.000                                             12 DRN/SDEP/FARE/DD


{'drn': 1.0, 'sdep': 2.0, 'fare': 1.0, 'dd': 50.0}

In [18]:
_parse_class_hyd2(
    hyd_line=sec['hyd2'],
)

   0.030   0.370   0.110   0.446  6  Mixed Forest                            13 XSLP/XDRAINH/MANN/KSAT/MID


{'xslp': 0.03,
 'xdrainh': 0.37,
 'mann': 0.11,
 'ksat': 0.446,
 'mid': 'Mixed Forest'}