In [1]:
import re

def parse_inc_file(filepath: str) -> dict:
    # Define regular expressions for different parts of the content
    regex_class = re.compile(r'.*?CLASS$')
    regex_name = re.compile(r'.*?NAME.*?\"(.*?)\"')
    regex_group = re.compile(r'.*?GROUP.*?\"(.*?)\"')
    regex_expression = re.compile(r'.*?EXPRESSION.*?\((.*?)\)')
    regex_style = re.compile(r'.*?STYLE$')
    regex_color = re.compile(r'.*?COLOR\s*?(.*?)$')
    regex_end = re.compile(r'.*?END')

    # Read the file line by line
    with open(filepath, 'r') as file:
        lines = file.readlines()

    # Parse the file content
    parsed_data = []
    current_class = None
    end_count = 0
    for line in lines:
        line = line.strip()
        # pprint(line)

        # Match CLASS
        if regex_class.match(line):
            current_class = {}
            # pprint(current_class)
        # Match NAME
        elif (match := regex_name.match(line)):
            current_class['name'] = match.group(1)
            # pprint(current_class)

        # Match GROUP
        elif (match := regex_group.match(line)):
            current_class['group'] = match.group(1)
            # pprint(current_class)

        # Match EXPRESSION
        elif (match := regex_expression.match(line)):
            pattern = r"[-+]?\d*\.\d+|[-+]?\d+"
            floats = [float(num) for num in re.findall(pattern, match.group(1))]
            current_class['expression'] = floats
            # pprint(current_class)

        # Match STYLE
        elif regex_style.match(line):
            current_class['style'] = {}
            # pprint(current_class)

        # Match COLOR
        elif (match := regex_color.match(line)):
            ints = [int(num) for num in match.group(1).split()]
            web_color = f"#{ints[0]:02x}{ints[1]:02x}{ints[2]:02x}"
            current_class['style']['color'] = web_color
        # Match END
        elif regex_end.match(line):
            # print(current_class)
            if end_count == 0:
                end_count = 1
            else:
                end_count = 0
                parsed_data.append(current_class)    

    my_cmap = {}
    my_cmap['name'] = parsed_data[0]['group']
    my_cmap['values'] = [item['expression'][0] for item in parsed_data]
    my_cmap['color'] = [item['style']['color'] for item in parsed_data]
    my_cmap['values'].append(parsed_data[-1]['expression'][-1])
    return my_cmap
# display(parsed_data)
# len(parsed_data) 
my_file = '/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/ABSOLUTEVORTICITY.inc'
cmap_data = parse_inc_file(my_file)
cmap_data

{'name': 'ABSOLUTEVORTICITY',
 'values': [-0.0004,
  -0.00035,
  -0.0003,
  -0.00025,
  -0.0002,
  -0.00015,
  -0.0001,
  -5e-05,
  0.0,
  5e-05,
  0.0001,
  0.00015,
  0.0002,
  0.00025,
  0.0003,
  0.00035,
  0.0004,
  0.00045,
  0.0005,
  0.00055],
 'color': ['#808080',
  '#989898',
  '#a9a9a9',
  '#b8b8b8',
  '#c8c8c8',
  '#d3d3d3',
  '#e0e0e0',
  '#f5f5f5',
  '#ccffcc',
  '#66ff66',
  '#00cc00',
  '#99cc00',
  '#cccc33',
  '#ffff00',
  '#ffcc00',
  '#ff9933',
  '#ff3300',
  '#cc0000',
  '#800000']}

In [47]:
from thefuzz import fuzz, process

query = "ABSOLUTEVORTICITY"
search_results = ["description absolute vorticity with much more", "potential Vorticity", "Potential", "Temperature"]

matches = process.extractOne(query, search_results)
print(matches)

('description absolute vorticity with much more', 85)


In [40]:
<span style="color: #808080">This is an example of an HTML color in JupyterLab</span>
<span style="color: #989898">This is an example of an HTML color in JupyterLab</span>
<span style="color: #a9a9a9">This is an example of an HTML color in JupyterLab</span>
<span style="color: #b8b8b8">This is an example of an HTML color in JupyterLab</span>
<span style="color: #c8c8c8">This is an example of an HTML color in JupyterLab</span>
<span style="color: #d3d3d3">This is an example of an HTML color in JupyterLab</span>
<span style="color: #e0e0e0">This is an example of an HTML color in JupyterLab</span>
<span style="color: #f5f5f5">This is an example of an HTML color in JupyterLab</span>
<span style="color: #ccffcc">This is an example of an HTML color in JupyterLab</span>
<span style="color: #66ff66">This is an example of an HTML color in JupyterLab</span>
<span style="color: #00cc00">This is an example of an HTML color in JupyterLab</span>
<span style="color: #99cc00">This is an example of an HTML color in JupyterLab</span>
<span style="color: #cccc33">This is an example of an HTML color in JupyterLab</span>
<span style="color: #ffff00">This is an example of an HTML color in JupyterLab</span>
<span style="color: #ffcc00">This is an example of an HTML color in JupyterLab</span>
<span style="color: #ff9933">This is an example of an HTML color in JupyterLab</span>
<span style="color: #ff3300">This is an example of an HTML color in JupyterLab</span>
<span style="color: #cc0000">This is an example of an HTML color in JupyterLab</span>
<span style="color: #800000">This is an example of an HTML color in JupyterLab</span>

'#808080'

In [83]:
import os
import re
import json

# .inc file data model
class GeometIncFile:
    def __init__(self, filepath):
        self.filepath = filepath
        self.inc = dict()
        self.__nested_keys = list() # Current nested keys

    def __repr__(self):
        return f"{self.__class__.__name__}('{self.filepath}')"
    
    def _add_nested_key(self, k, is_list=False):
        self._set_nested_key_value(k, dict(), is_list)
        self.__nested_keys.append(k)
    
    def _remove_nested_key(self):
        self.__nested_keys.pop()
        
    def _set_nested_key_value(self, k, v, is_list=False):
        o = self.inc
        for key in self.__nested_keys:
            o = o[key] if type(o) == dict else o[-1][key]
        if type(o) == list:
            o = o[-1]
            
        if k in o:
            if type(o[k]) == dict:
                raise Exception(f"Invalid nested key '{k}' with value '{v}'. Key already exists")
            else:
                o[k].append(v)
        elif is_list:
            o[k] = [v]
        else:
            o[k] = v
        
    def toJSON(self):
        return json.dumps(self.inc, indent=2)

# Regular expressions to match & parse .inc files
class GeometIncRe:
    re_keyword = re.compile(r'^([A-Z]+)') # Keyword e.g. CLASS
    re_empty = re.compile(r'^$') # Nothing
    re_string_sq = re.compile(r"^'(.*)'$") # Single quote string e.g. 'test'
    re_string_dq = re.compile(r'^"(.*)"$') # Double quote string e.g. "test"
    re_int_list = re.compile(r'^((?:[+-]?\d+ *)+)(?:END)?$') # List of integers e.g. 1 2 3
    re_float_list = re.compile(r'^((?:[+-]?\d*\.?\d+ *)+)(?:END)?$') # List of floats e.g. 1.0 2.0 3.0
    re_alphanumeric = re.compile(r'^(\w+)$') # Alphanumeric sequence e.g. test_123
    re_expression = re.compile(r'^\((.+)\)$') # Expression in parentheses e.g. (...)
    re_bracket = re.compile(r'^\[(\w+)\]$') # Alphanumeric sequence in square brackets e.g. [test_123]
    re_inline_keywords = re.compile(r'^([A-Z]+) +.+$') # Multiple inline keywords e.g. CLASS NAME 'test'

# .inc files keywords with special structure features
class GeometIncKey:
    LIST_OF_BLOCK = ['CLASS', 'STYLE'] # Keywords with multiple sub-blocks
    END_OF_BLOCK = ['END'] # End of current block
    
# Enum for match types of keywords.
# Defines how to process the current key/value in GeometIncParser
class GeometIncMatch:
    BLOCK = 1 # Nested block start/end
    VALUE = 2 # Normal key/value
    INLINE = 3 # Inline block
    NO_MATCH = 4 # Regular expressions don't match
    
# Parser for Geomet .inc files
class GeometIncParser:
    # Preprocesses string value
    @staticmethod
    def __preprocess_value(v):
        return v.strip()
    
    # Tries to match passed value to regular expressions
    # Converts value to appropriate type
    # Returns dict of match_type (GeometIncMatch enum) and converted value
    @staticmethod
    def __match_re(v):
        match_type = GeometIncMatch.NO_MATCH
        converted_value = None
        
        if m := GeometIncRe.re_empty.match(v):
            match_type = GeometIncMatch.BLOCK
        elif m := GeometIncRe.re_string_sq.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = m.group(1)
        elif m := GeometIncRe.re_string_dq.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = m.group(1)
        elif m := GeometIncRe.re_int_list.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = [int(v) for v in m.group(1).split()]
        elif m := GeometIncRe.re_float_list.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = [float(v) for v in m.group(1).split()]
        elif m := GeometIncRe.re_alphanumeric.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = m.group(1)
        elif m := GeometIncRe.re_expression.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = m.group(1)
        elif m := GeometIncRe.re_bracket.match(v):
            match_type = GeometIncMatch.VALUE
            converted_value = m.group(1)
        elif m := GeometIncRe.re_inline_keywords.match(v):
            match_type = GeometIncMatch.INLINE
            converted_value = m.group(1)
        
        return dict(match_type=match_type, converted_value=converted_value)
    
    @staticmethod
    def __parse_inc_key_value(f, o, k, v):
        match_type, converted_value = GeometIncParser.__match_re(v).values()
        if match_type == GeometIncMatch.BLOCK:
            if k in GeometIncKey.LIST_OF_BLOCK:
                o._add_nested_key(k, True)
            elif k in GeometIncKey.END_OF_BLOCK:
                o._remove_nested_key()
            else:
                o._add_nested_key(k)
        elif match_type == GeometIncMatch.VALUE:
            o._set_nested_key_value(k, converted_value)
        elif match_type == GeometIncMatch.INLINE:
            GeometIncParser.__parse_inc_key_value(f, o, k, '')
            new_value = GeometIncParser.__preprocess_value(v[len(converted_value):])
            GeometIncParser.__parse_inc_key_value(f, o, converted_value, new_value)
        else:
            raise Exception(f"Invalid keyword '{k}' with value '{v}' in {f}")
            
    # Parses a .inc file and returns GeometIncFile object
    @staticmethod
    def parse_file(f):
        with open(f, 'r') as file:
            lines = file.readlines()
            
        result = GeometIncFile(f)
        for l in lines:
            l = GeometIncParser.__preprocess_value(l)
            if m := GeometIncRe.re_keyword.match(l):
                k = m.group(1)
                v = GeometIncParser.__preprocess_value(l[len(k):])
                GeometIncParser.__parse_inc_key_value(f, result, k, v)
                
        return result
    
    # Parses all .inc files in directory and returns a list of GeometIncFile objects
    @staticmethod
    def parse_dir(d):
        filepaths = [os.path.join(d, f) for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
        return [GeometIncParser.parse_file(f) for f in filepaths]

In [85]:
filepath = '/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/ABSOLUTEVORTICITY.inc'
o = GeometIncParser.parse_file(filepath)
print(o)
print(o.inc)
print(o.toJSON())

GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/ABSOLUTEVORTICITY.inc')
{'CLASS': [{'NAME': '-40 -35 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.000400 AND [pixel] < -0.000350', 'STYLE': [{'COLOR': [128, 128, 128]}]}, {'NAME': '-35 -30 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.000350 AND [pixel] < -0.000300', 'STYLE': [{'COLOR': [152, 152, 152]}]}, {'NAME': '-30 -25 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.000300 AND [pixel] < -0.000250', 'STYLE': [{'COLOR': [169, 169, 169]}]}, {'NAME': '-25 -20 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.000250 AND [pixel] < -0.000200', 'STYLE': [{'COLOR': [184, 184, 184]}]}, {'NAME': '-20 -15 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.000200 AND [pixel] < -0.000150', 'STYLE': [{'COLOR': [200, 200, 200]}]}, {'NAME': '-15 -10 1E-5 s-1', 'GROUP': 'ABSOLUTEVORTICITY', 'EXPRESSION': '[pixel] >= -0.0

In [86]:
dirpath = '/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class'
GeometIncParser.parse_dir(dirpath)

[GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/SEA_CURARROW_100MTO2000M.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/ALBEDO-LINEAR.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/DrainageArea_S.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/RADARURPPRECIPR8_Fr.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/SNOWMASS.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/Radar-Coverage_Inv-LightGray.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/HUMIDITYSPEC-50-200mb-LINEAR.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/RADAR_COVERAGE_BLUE-OUTLINE.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/TEMPERATURE-KELVIN.inc'),
 GeometIncFile('/home/mde000/ss6/geomet-parser/geomet/etc/mapserv/class/EATM-PM_KGM2.inc'),
 GeometIn