In [2]:
import re
from word2number import w2n

def is_number(s):
    pattern = r'^(\d+)(\.(\d+))?$' #whole number or decimal
    match = re.fullmatch(pattern, s)
    return bool(match)


def text_to_number(text):
    
    units = {"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
             "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15, "sixteen": 16,
             "seventeen": 17, "eighteen": 18, "nineteen": 19}
    tens = {"twenty": 20, "thirty": 30, "forty": 40, "fifty": 50, "sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90}
    scales = {"hundred": 100, "thousand": 1000, "million": 1000000, "billion": 1000000000, "trillion": 1000000000000}

    num_words = text.replace("-", " ").lower().split()
    current = result = 0
    for word in num_words:
        if is_number(word):
            current += float(word)
        if word.isdigit():
            current += int(word)
        elif word in units:
            current += units[word]
        elif word in tens:
            current += tens[word]
        elif word in scales:
            current *= scales[word]
            if word == "hundred":
                continue
            result += current
            current = 0
    return result + current

def encode_numbers_in_text(text):
    num_word_pattern = re.compile(
        r"\b(\d+(\.\d+)?|zero|one|two|three|four|five|six|seven|eight|nine|ten|"
        r"eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|"
        r"twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|"
        r"hundred|thousand|million|billion|trillion)"
        r"(?:[-\s]+(?:and)?[\s]*(?:zero|one|two|three|four|five|six|seven|eight|nine|ten|"
        r"eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|"
        r"twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion))*\b",
        re.IGNORECASE
    )

    def replace_func(match):
        number_words = match.group(0)
        try:
            number = text_to_number(number_words)
            # print(number)
            digit_count = len(str(number))
            if isinstance(number, int):
                return f"<se>{digit_count}<de>{number_words.strip()}<fe>"
            else:
                num_str = str(number)
                split_num_str = num_str.split('.')
                whole_digits_len = len(split_num_str[0])
                frac_digits_len = len(split_num_str[1])
                decimal_digit_count = '.'.join([str(whole_digits_len), str(frac_digits_len)])
                return f"<se>{decimal_digit_count}<de>{number_words.strip()}<fe>"
        except Exception as e:
            return number_words

    encoded_text = re.sub(num_word_pattern, replace_func, text)
    return encoded_text

example_text = "There are three hundred and fifty people who are camping in an area of seven hundred and sixty eight feet by four thousand eight hundred feet, in 12864712.2 feet of snow."
encoded_text = encode_numbers_in_text(example_text)
encoded_text

'There are <se>3<de>three hundred and fifty<fe> people who are camping in an area of <se>3<de>seven hundred and sixty eight<fe> feet by <se>4<de>four thousand eight hundred<fe> feet, in <se>8.1<de>12864712.2<fe> feet of snow.'