# Introduction

In this NoteBook, I am just testing to see whether I can create `Ostrich`'s `TPL` files using `MESHFlow`'s Jinja2 templating engines.

This may require some changes in the `Jinja2` templating files to be able to accept strings as values instead of numbers.

In [1]:
import meshflow as mf

from typing import (
    Dict,
    Union,
)

We just directly go ahead and call the templating functions. But before that, it is important to parse the `CLASS` files and build the three distinct dictionaries out of it for the templating functions.

In [2]:
class_file = './wolf-creek-research-basin/mesh/MESH_parameters_CLASS.ini'

In [3]:
# import libraries
import re

from pathlib import Path
from io import StringIO

import pandas as pd

# read the text file
text = Path(class_file).read_text(encoding="utf-8")

# Split where there is at least one completely blank line (possibly with spaces)
sections = re.split(r'\r?\n\s*\r?\n', text.strip())

for i, sec in enumerate(sections, start=1):
    print(f"Section {i} has {len(sec)} characters")

Section 1 has 377 characters
Section 2 has 1438 characters
Section 3 has 1438 characters
Section 4 has 1438 characters
Section 5 has 1438 characters
Section 6 has 1438 characters
Section 7 has 1409 characters
Section 8 has 1409 characters
Section 9 has 1438 characters
Section 10 has 1438 characters
Section 11 has 1438 characters
Section 12 has 1438 characters
Section 13 has 1438 characters
Section 14 has 357 characters


In [4]:
# function to read a section in form of a pandas.DataFrame
def class_section_divide(section: str, **read_csv_kwargs):
    '''Refer to the following link for comprehensive, and hard-coded
    values for the CLASS sections are implemented as there is no other
    way around it.
    '''
    # split lines
    lines = section.splitlines()

    # build a dictionary out of CLASS sections
    class_section = {}
    
    # vegetation parameters
    class_section['veg1'] = "\n".join(lines[:4])
    class_section['veg2'] = "\n".join(lines[4:7])
    
    # surface/hydraulic parameters
    class_section['hyd1'] = lines[7]
    class_section['hyd2'] = lines[8]

    # soil parameters
    class_section['soil'] = "\n".join(lines[9:12])

    # prognostic parameters
    class_section['prog1'] = lines[12] if len(lines[12]) > 0 else ""
    class_section['prog2'] = lines[13] if len(lines[13]) > 0 else ""
    class_section['prog3'] = lines[14] if len(lines[14]) > 0 else ""

    # return dictionary
    return class_section

In [5]:
def _remove_comments(line) -> str:
    '''remove comment strings in the CLASS file strings'''
    return re.sub(r'\s+\d{2}\s.*$', '', sections[0], flags=re.MULTILINE)

In [6]:
def _determine_gru_type(
    line : str
) -> int:
    """
    Return the 1-based column index of the first numeric token that is exactly '1.000'
    (or numerically equal to 1.0 with three decimal places) in a line of mixed data.

    The line may contain:
      - Multiple spaces between columns
      - Trailing non-numeric descriptor fields (tokens containing any letter)
      - Other numeric fields (including things like 05)

    Parsing stops once a token containing any alphabetic character appears, assuming
    the remainder are descriptors rather than data columns.

    Args:
        line: A string containing whitespace-separated columns.

    Returns:
        The 1-based column number where the first 1.000 occurs, or None if not found.
    """
    tokens = line.strip().split()
    slice_len = min(5, len(tokens))
    
    # to track mixed GRU types and also 
    gru_type_sum = 0
    
    # iterate over the first line of the vegetation parameter section
    for i in range(slice_len):
        # if a distinct GRU, look for 1.000 value
        if tokens[i] == "1.000":
            return i + 1  # 1-based

        # Calculate the sum until this for loop breaks
        # or ends
        gru_type_sum += float(tokens[i])

    # FIXME: if sum equals to 1, then that means we deal with a mixed GRU
    #        type, and we will have to add the relevant feature to both
    #        MESHFlow and MESHFIAT;
    #        For now, find the first column without non-zero value
    if gru_type_sum == 1:
        for i in range(slice_len):
            if float(tokens[i]) > 0:
                return i + 1

    # Raise an error if it is not a valid CLASS field
    if gru_type_sum == 0:
        raise ValueError("Invalid CLASS GRU type")


In [7]:
def _parse_class_meta_data(
    case_section : str,
) -> Dict:
    """Containing 3 lines, and each will be assigned to 
    """
    # remove comments from the section
    case_section = _remove_comments(case_section)
    
    # hard-coded values based on different lines of the CLASS file
    # the indices refer to line numbers in the section
    title_line = case_section.splitlines()[0]
    author_line = case_section.splitlines()[1]
    place_line = case_section.splitlines()[2]
    case_line = case_section.splitlines()[3]

    # now building dictionaries that MESHFlow needs just here for
    # simplicity
    info_entry = {
        "author": author_line.strip(),
        "location": place_line.strip(),
    }
    
    return info_entry

In [8]:
def _parse_class_veg1(
    veg_section : str,
    gru_type : int,
    cols_to_read : int = 9,
    **kwargs
):
    """For the first block of vegetation parameters have 9 columns of data
    and the rest of the line should be dropped.

    To understand the type of GRU, we have to find the first "1.000" value
    in the first line and 
    """
    # start going over each line in this section
    return

In [9]:
sec = class_section_divide(section=sections[3])

In [10]:
_determine_gru_type(sec['veg1'].splitlines()[0])

1

In [11]:
_parse_class_meta_data(sections[0])

{'author': 'Kasra Keshavarz', 'location': 'University of Calgary'}