# Cooklang on Material for mkdocs

## Helpers

In [1]:
import re
####################################################################################################
#                                                                                                  #
#                                      Helper Functions                                            #
#                                                                                                  #
#################################################################################################### 

def find_specials(step: str, start_char="#") -> list[str]:
    matches = []
    item = ""
    matching: bool = False
    specials = ["~", "@", "#"]
    for i, x in enumerate(step):
        if x == start_char:
            if start_char == "~" and step[i - 1] == "{":
                continue  # Skip - approx value in ingredient
            matching = True
            item += x
            continue
        if matching and x in specials:
            if " " in item:
                item = item.split(" ")[0]
            elif "." in item:
                item = item.split(".")[0]
            matches.append(item)
            matching = False
            item = ""
        if matching and x == "}":
            item += x
            matches.append(item)
            matching = False
            item = ""
        if matching:
            item += x

    if matching:
        if " " in item:
            item = item.split(" ")[0]
        elif "." in item:
            item = item.split(".")[0]
        matches.append(item)
    return matches

In [2]:
def find_ingredients(step: str) -> list[str]:
    """Find ingredients in a recipe step"""
    return find_specials(step, "@")

In [3]:
def parse_ingredient(item: str) -> dict[str, str]:
    """Parse an ingredient string
    eg. @salt or @milk{4%cup}
    """
    if item[0] != "@":
        raise ValueError("Ingredients should start with @")
    if item[-1] != "}":
        return {
            "type": "ingredient",
            "name": item[1:],
            "quantity": "some",
            "units": "",
        }
    name, quantity = item.split("{", maxsplit=1)
    val, units = parse_quantity(quantity[0:-1])
    return {
        "type": "ingredient",
        "name": name[1:],
        "quantity": val or "as needed",
        "units": units,
    }

In [4]:
def parse_quantity(item: str) -> list[str, str]:
    """Parse the quantity portion of an ingredient
    e.g. 2%kg
    """
    if "%" not in item:
        if " " not in item: #in case the ingredient is just specified as a number with no unit
            return [item, "number"]
        else:
            return [item, ""]
    return item.split("%", maxsplit=1)
    

In [5]:
def find_cookware(step: str) -> list[str]:
    """Find ingredients in a recipe step"""
    return find_specials(step, "#")

In [6]:
def parse_cookware(item: str) -> dict[str, str]:
        """Parse cookware item
        e.g. #pot or #potato masher{}
        """
        if item[0] != "#":
            raise ValueError("Cookware should start with #")
        item = item.replace("{}", "")
        return item[1:] 

In [7]:
def find_timers(step: str) -> list[str]:
        """Find timers in a recipe step"""
        return find_specials(step, "~")

In [8]:
def parse_timer(item: str) -> dict[str, str]:
        """Parse timer string
        e.g. ~eggs{3%minutes} or ~{25%minutes}
        """
        if item[0] != "~":
            raise ValueError("Timer should start with ~")
        name, quantity = item.split("{", maxsplit=1)
        val, units = parse_quantity(quantity[0:-1])
        return {
            "type": "timer",
            "name": name[1:],
            "quantity": val,
            "units": units,
        }

In [9]:
def replace_amount(value):
    # Replace fractions with decimals
    if '/' in value:
        numerator, denominator = value.split('/')
        try:
            value = str(float(numerator) / float(denominator))
        except ZeroDivisionError:
            value = '0'

    # Replace ranges with the highest value
    if '-' in value:
        value = value.split('-')[-1]

    # Replace worded items with 0
    if value.isalpha() or re.match(r'^[a-zA-Z\s]+$', value):
        value = '0'

    # Additional replacements
    value = value.lower().strip()
    if value == 'as needed' or value == 'to taste' or value == 'to taste (optional)':
        value = '0'
    elif value.endswith('l') or value.endswith('ml'):
        value = value[:-1]

    return value

In [10]:
def replace_unit(unit):
    # Remove information in brackets
    unit = re.sub(r'\(.*?\)', '', unit)

    # Remove trailing whitespace
    unit = unit.upper().strip()

    return unit

## Plantuml steps creator

In [11]:
import re

####################################################################################################
#                                                                                                  #
#             Newlines in the input_string after specified number of characters-(KEEP)             #
#                                                                                                  #
####################################################################################################

def insert_newlines(input_string: str, chars_per_line: int):
    """Inserts newline in the input_string after specified number of characters from char_per_line"""
    # Use regex to find words and HTML elements
    words = re.findall(r'<[^>]+>.*?</[^>]+>|[^<\s]+', input_string)
    mod = ""
    count = 0
    for word in words:
        if count + len(word) > chars_per_line:
            mod += "\n\t"
            count = 0
        mod += word + " "
        count += len(word) + 1
    return mod.strip() 

In [12]:
####################################################################################################
#                                                                                                  #
#                         Function to return Plantuml compliant String-(KEEP)                      #
#                                                                                                  #
####################################################################################################

def puml(step: str):
    """
    This function takes a single step and checks for any If statements and transforms the string for if loop where needed.
    It removes some of the markdown formatting, from the stringy, applies some formatting specific to plantuml 
    and calls insert_newline ffunction to ensure plantuml boxes don't overflow before returning the string.
    """
    out = ""

    # Convert step into uppercase for uniform comparison
    p_step = step.upper().strip()
    # Check if step starts with IF and contains a THEN 
    # If so, it is a candidate for If Then Else syntax of plantuml
    # If not treat it as normal step
    if p_step.startswith('IF') and 'THEN' in p_step:
        # Replace 'ELSE IF' with 'ELSEIF' so there is no clash with final ELSE statement
        if 'ELSE IF' in p_step:
            p_step = p_step.replace('ELSE IF','ELSEIF')
        # Create a variable to first remove just IF from the step
        if_removed = ''.join(re.split(r"\bIF\b", p_step)).strip()
        # Using above, remove 'ELSE'. This will be a list with two items
        else_removed_l = re.split(r"\bELSE\b", if_removed)
        # Now will If and Else removed, break the sentencefirst item from above list
        # at ELSEIF and store in another list below
        elif_removed_l = re.split(r"\bELSEIF\b", else_removed_l[0])
        # For every item in above list, break it down at THEN and store in a new list
        then_removed_l = []
        for elif_removed in elif_removed_l:
            then_removed_l += re.split(r"\bTHEN\b", elif_removed)
        # Initiate if loop parsing
        i = 0
        while i < len(then_removed_l): 
            if i==0:
                # The very first entry in then_removed_l is condition for if statement 
                # and second entry is then statement
                out += f'\tif ({insert_newlines(then_removed_l[i].strip(),20)}?) then (yes)\n\t\t:{insert_newlines(then_removed_l[i+1].strip().capitalize(),30)};\n'
                i = i+2
            elif i % 2 == 0:
                # Add elseif condition 
                #Logic is that variable then_removed_l has every even item as an elseif condition 
                # and every odd item as then statement
                out+= f'\t(no) elseif ({insert_newlines(then_removed_l[i].strip().capitalize(),20)}?) then (yes)\n'
                i = i+1
            else:
                #Every odd entry is a then statement so use it to create the then statement
                out+=f'\t\t:{insert_newlines(then_removed_l[i].strip().capitalize(),30)};\n'
                i = i + 1
        # Check if ELSE exists in the step and if it does include the final else statement
        if len(else_removed_l)>1:
            out += f'\telse (no)\n\t\t:{insert_newlines(else_removed_l[1].strip().capitalize(),30)};\n'
        else:
            out += f'\telse (no)\n\t\t\n'
        out += f'\tendif\n'
    elif step != '':
        if step.startswith('**') and step.endswith('**'):
            # If the step starts with ** and ends with **, apply different formatting and remove **
            step = step.replace("**","")
            out += f'\t#Maroon:<color: white>{insert_newlines(p_step.replace("`","").strip(),50)}</color>/\n'
        elif step.startswith('--'):
            # If step starts with -- then its a single line comment
            out += f'\t{insert_newlines(p_step.replace("`","").strip().capitalize(),50)}'
        elif step.startswith('[-'):
            # If step starts with [- then it is a string of multiline comments with newline in it
            # break each new line and then break it further based on number of characters.
            p_steps_split = p_step.splitlines()
            for p_step_split in p_steps_split:
                out += f'\n\t{insert_newlines(p_step_split.replace("`","").strip().capitalize(),50)}'
            out = f"{out}\n"
        else:
            # If the step does not start with ** and ends with **, apply standard formatting
            out += f'\t:{insert_newlines(p_step.replace("`","").strip().capitalize(),50)};\n'
    #out = f'{steps_string}\n\n{out}'
    # Return final markdown for plantuml step
    return out

## Nutritionix API Call

In [13]:
import os
import requests
import json
import pandas as pd

####################################################################################################
#                                                                                                  #
#                Function to convert response from the API into a usable dataframe                 #
#                                                                                                  #
####################################################################################################

def create_nutrient_dataframe(df, ing_str):
    if isinstance(df, pd.DataFrame):
        nutrient_info = {
            208: {'name': 'Calories', 'drv': 2250, 'unit': 'kcal', 'values': []},
            606: {'name': 'Saturated Fat', 'drv': 20, 'unit': 'g', 'values': []},
            204: {'name': 'Total Fat', 'drv': 70, 'unit': 'g', 'values': []},
            205: {'name': 'Carbohydrate', 'drv': 260, 'unit': 'g', 'values': []},
            269: {'name': 'Sugars', 'drv': 30, 'unit': 'g', 'values': []},
            203: {'name': 'Protein', 'drv': 50, 'unit': 'g', 'values': []},
            291: {'name': 'Dietary fiber', 'drv': 30, 'unit': 'g', 'values': []},
            645: {'name': 'Monounsaturated Fat', 'drv': 0.0, 'unit': 'g', 'values': []},
            646: {'name': 'Polyunsaturated Fat', 'drv': 0.0, 'unit': 'g', 'values': []},
            605: {'name': 'Trans Fat', 'drv': 0.0, 'unit': 'g', 'values': []},
            601: {'name': 'Cholesterol', 'drv': 300, 'unit': 'mg', 'values': []},
            307: {'name': 'Sodium', 'drv': 2300, 'unit': 'mg', 'values': []},
            306: {'name': 'Potassium K', 'drv': 3500, 'unit': 'mg', 'values': []},
            301: {'name': 'Calcium', 'drv': 700, 'unit': 'mg', 'values': []},
            303: {'name': 'Iron', 'drv': 11.75, 'unit': 'mg', 'values': []},
            309: {'name': 'Zinc', 'drv': 8.25, 'unit': 'mg', 'values': []},
            317: {'name': 'Selenium', 'drv': 75, 'unit': 'Âµg', 'values': []},
            328: {'name': 'Vitamin D (D2 + D3)', 'drv': 10, 'unit': 'Âµg', 'values': []},
            323: {'name': 'Vitamin E', 'drv': 15, 'unit': 'mg', 'values': []},
            415: {'name': 'Vitamin B-6', 'drv': 1.4, 'unit': 'mg', 'values': []},
            418: {'name': 'Vitamin B-12', 'drv': 2.4, 'unit': 'Âµg', 'values': []},
            262: {'name': 'Caffeine', 'drv': 400, 'unit': 'mg', 'values': []},
        }

        food_names = df['food_name'].tolist()
        
        # Conditional assignment for input_strs
        if len(ing_str) == len(df['food_name']):
            input_strs = ing_str 
        else:
            input_strs = ['Not all ingredients returned'] * len(df['food_name'])

        net_carbs = [round(((row['nf_total_carbohydrate'] - row['nf_dietary_fiber']) / row['serving_weight_grams']) * 100, 2) for _, row in df.iterrows()]

        for _, row in df.iterrows():
            for nutrient_id, nutrient_data in nutrient_info.items():
                nutrient_value = 0
                for entry in row['full_nutrients']:
                    if entry.get('attr_id') == nutrient_id:
                        nutrient_value = entry['value'] if row['serving_weight_grams'] == 100 else round((entry['value'] / row['serving_weight_grams']) * 100, 2)
                        break  # Exit inner loop once nutrient is found
                nutrient_data['values'].append(nutrient_value)

        new_df = pd.DataFrame({
            'input_str': input_strs,
            'source': '[Nutritionix](https://www.nutritionix.com)',
            'food_name': food_names,
            'net_carb': net_carbs,
            'serving_unit': 'gms',
            'serving_weight_grams': '100',
        })

        for nutrient_id, nutrient_data in nutrient_info.items():
            nutrient_col_name = nutrient_data['name'].replace(' ', '_')
            new_df[nutrient_col_name] = nutrient_data['values']
            new_df[nutrient_col_name + '_drv'] = nutrient_data['drv']
            # Add unit column for each nutrient
            new_df[nutrient_col_name + '_unit'] = nutrient_data['unit'] 

        return new_df

    else:
        print('Not a dataframe')
        return df

In [14]:
####################################################################################################
#                                                                                                  #
#                            Main Function to call Nutritioninx API                                #
#                                                                                                  #
####################################################################################################

def get_nutritionix_data(ingredients_str):
    # Replace with your actual NutritionX API endpoint, app ID, and app key
    api_url = "https://trackapi.nutritionix.com/v2/natural/nutrients"
    app_id = os.environ.get("NUTRITIONIX_APP_ID")
    app_key = os.environ.get("NUTRITIONIX_APP_KEY")

    if not ingredients_str:
        #print("Empty ingredient string")
        return None

    payload = {"query": ingredients_str}
    headers = {
        "Content-Type": "application/json",
        "x-app-id": app_id,
        "x-app-key": app_key,
        "line_delimited": "TRUE",
    }

    try:
        response = requests.post(api_url, data=json.dumps(payload), headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        nutrition_data = response.json()

        if "foods" not in nutrition_data:
            #print("Unexpected response format from Nutritionix API")
            return None
        
        df = pd.json_normalize(nutrition_data.get('foods', []))

        df['full_nutrients'] = df['full_nutrients'].apply(
            lambda x: json.loads(x.replace("'", '"')) if not isinstance(x, list) else x
        )

        ingredient_list = [ingredient.split(' -')[0] for ingredient in ingredients_str.splitlines()]

        try:
            new_df = create_nutrient_dataframe(df, ingredient_list)
            return new_df
        except Exception as e:
            print(f"An error occurred during DataFrame creation: {e}")
            return df
    except requests.exceptions.RequestException as e:
        print(f"Error during API request: {e} while trying to get the ingredient {ingredients_str}")
        return None

## Nutrition Label Creator

In [15]:
import pandas as pd
#from parser.helpers import find_ingredients, parse_ingredient, replace_amount, replace_unit
#from parser.Nutritionix_api_call import get_nutritionix_data

####################################################################################################
#                                                                                                  #
#                      Function to create nutrient summary dataframe                               #
#                                                                                                  #
####################################################################################################
    
def calculate_nutrient_summary_per_weight(
    df, 
    recipe_weight_in_gms,
    specified_weight,
    nutrient_col_list,
    total_nutrients, 
    average_nutrient_drv,
    average_nutrient_unit,
    nutrient_drv_columns,
    nutrient_unit_columns
):
    # Create a new row for the specified weight
    specified_nutrients = {
        'recipe_weight_in_gms': specified_weight,
        **{f'{nutrient}': [(total_nutrients[nutrient] / recipe_weight_in_gms) * specified_weight] for nutrient in nutrient_col_list},
        **{f'{nutrient_drv}': [average_nutrient_drv[nutrient_drv]] for nutrient_drv in nutrient_drv_columns},
        **{f'{nutrient_unit}': [average_nutrient_unit[nutrient_unit]] for nutrient_unit in nutrient_unit_columns},
    }
    specified_weight_df = pd.DataFrame(specified_nutrients, index=[0])
    return specified_weight_df

In [16]:
####################################################################################################
#                                                                                                  #
#                          Function to create total nutrient weight dict                           #
#                                          (KEEP)                                                  #
####################################################################################################
def fn_total_df_weight(input_string) -> tuple[dict, str, int]:
#def fn_total_df_weight(input_string):
    lines = input_string.splitlines()
    all_recipe_ingredients = []
    step_count = 1
    serving_size = 1
    for line in lines:
        if line.strip() != "" and not line.startswith(">>"):
            step = line.strip()
            for ingredient in find_ingredients(step):
                parsed_ingredient = parse_ingredient(ingredient)
                parsed_ingredient['step'] = step_count
                all_recipe_ingredients.append(parsed_ingredient)
            if step != '':
                if step.startswith('**') and step.endswith('**'):
                    pass
                else:
                    step_count+=1
        elif line.startswith(">> Serving"):
            serving_size_str =  line.lstrip(">> ").strip().split(": ")[1].split(" ")[0]
            # Convert to integer
            try:
                serving_size = int(serving_size_str)  # Convert the string to an integer
            except ValueError:
                serving_size = 1  # Set to 1 if conversion fails
#            print(f"Serving Size: {serving_size}")
    # Read ingredient and unit csv files
    df_ingredient_db = pd.read_csv('ingredient_nutrient_db.csv')
    df_units_db = pd.read_csv('unit_db.csv')
    # Create a lookup dictionary from df_units_db
    unit_lookup_dict = df_units_db.set_index('Unit')['eq_gms'].to_dict()    
    df_recipe_ingredients = pd.DataFrame(all_recipe_ingredients)
    ##### Code to create nutrition label
    df_recipe_ingredients['cleaned_quantity'] = pd.to_numeric(df_recipe_ingredients['quantity'].apply(replace_amount), errors='coerce')
    df_recipe_ingredients['cleaned_unit'] = df_recipe_ingredients['units'].apply(replace_unit)
    df_recipe_ingredients['quantity_in_gms'] = df_recipe_ingredients['cleaned_quantity'] * df_recipe_ingredients['cleaned_unit'].map(unit_lookup_dict).fillna(0)
    df_recipe_ingredients = df_recipe_ingredients.map(lambda x: x.upper() if isinstance(x, str) else x)
    #print("*******************df_recipe_ingredients***********")
    #print(df_recipe_ingredients.keys())
    #print(df_ingredient_db.keys())

    # Perform the left merge with ingredient database
    merged_df = df_recipe_ingredients.merge(df_ingredient_db, left_on='name', right_on='input_str', how='left')

    # Filter to get found entries
    found_entries = merged_df[merged_df['input_str'].notna()]
    #print("*******************found_entries***********")
    #print(found_entries)
    #Set nutrient columns
    nutrient_columns = ['net_carb', 'Calories', 'Total_Fat', 'Saturated_Fat', 'Carbohydrate', 'Sugars', 'Protein',
                    'Dietary_fiber', 'Monounsaturated_Fat', 'Polyunsaturated_Fat',
                    'Trans_Fat', 'Cholesterol', 'Sodium', 'Potassium_K',
                    'Calcium', 'Iron', 'Zinc', 'Selenium',
                    'Vitamin_D_(D2_+_D3)', 'Vitamin_E',
                    'Vitamin_B-6', 'Vitamin_B-12', 'Caffeine']

    # found_entries is a slice of another DataFrame so create a copy
    found_entries = found_entries.copy()

    # Cast nutrient columns to float64
    found_entries[nutrient_columns] = found_entries[nutrient_columns].astype('float64')

    # Update nutrient values
    for nutrient in nutrient_columns:
        found_entries.loc[:, nutrient] = (found_entries[nutrient] / found_entries['serving_weight_grams']) * found_entries['quantity_in_gms']
    # Prepare data for calling calculate_nutrient_summary_per_weight function
    total_weight_df = pd.DataFrame()
    # Define the columns for nutrient calculation
    nutrient_drv_columns = []
    nutrient_unit_columns = []
    for nutrient in nutrient_columns:
        if nutrient != 'net_carb':
            nutrient_drv_columns.append(f'{nutrient}_drv')
            nutrient_unit_columns.append(f'{nutrient}_unit')
    # Calculate the total weight of the recipe in grams
    recipe_weight_in_gms = found_entries['quantity_in_gms'].sum()
    # Sum the updated nutrient values for the entire recipe
    total_nutrients = found_entries[nutrient_columns].sum()
    average_nutrient_drv = found_entries[nutrient_drv_columns].mean()
    average_nutrient_unit = found_entries[nutrient_unit_columns].iloc[0]
    #get caluclations for 100gms of recipe
    calculation_weights = [recipe_weight_in_gms,100,recipe_weight_in_gms/serving_size, 2*recipe_weight_in_gms/serving_size]
    #print(calculation_weights)

    for calculation_weight in calculation_weights:
        total_weight_df = pd.concat([total_weight_df,(
            calculate_nutrient_summary_per_weight(
                found_entries, 
                recipe_weight_in_gms,
                calculation_weight,
                nutrient_columns,
                total_nutrients,
                average_nutrient_drv,
                average_nutrient_unit,
                nutrient_drv_columns,
                nutrient_unit_columns
            )
        )], ignore_index=True)

    new_order = ['recipe_weight_in_gms','net_carb']
    for nutrient in nutrient_columns:
        if nutrient != 'net_carb':
            # Check if DRV is 0
            if total_weight_df[f'{nutrient}_drv'].iloc[0] == 0:
                total_weight_df[f'{nutrient}_drv_%'] = ""  # Assign special value
            else:
                total_weight_df[f'{nutrient}_drv_%'] = round((total_weight_df[f'{nutrient}'] / total_weight_df[f'{nutrient}_drv']) * 100,2)
            new_order.append(f'{nutrient}')
            new_order.append(f'{nutrient}_unit')
            new_order.append(f'{nutrient}_drv_%')
            new_order.append(f'{nutrient}_drv')
    # Display the new DataFrame
    #print(new_order)
    total_weight_df = total_weight_df[new_order].round(2)
    # Deal with Not found entries
    not_found_entries = merged_df[merged_df['input_str'].isna()]
    #print("*******************not_found_entries***********")
    #print(not_found_entries)
    if isinstance(not_found_entries,pd.DataFrame):
        # Create the string
        #not_found_string = '\n'.join(not_found_entries['name'] + ' - ' + not_found_entries['quantity_in_gms'].astype(str) + ' gms')
        not_found_string_search = '\n'.join(not_found_entries['name'] + ' - 100 gms')
        #print(not_found_string_search)
        not_found_df = get_nutritionix_data(not_found_string_search)
        if isinstance(not_found_df,pd.DataFrame):
            table_string = not_found_df.to_markdown(index=False).replace('\n','\n\t')
            copy_block_string = not_found_df.to_csv(index=False, header=False,sep=',').replace('\n','\n\t\t')
            missing_ingredients_string = f"Following ingredient was not found on database. It's values from Nutirionix database are as shown in the table below.\n\n\t{table_string}\n\n\tIf these are correct, these can be added to ingredient database simply by copying the code block and pasting in the csv file.\n\n\t??? warning \"Copy for ingredient db\"\n\t\t```\n\t\t{copy_block_string}```"
            #print(missing_ingredients_string)
        else:
            missing_ingredients_string = ""
    # Renaming columns
    found_entries.rename(columns={
        'name': 'Ingredient',
        'quantity': 'Quantity',
        'quantity_in_gms': 'Qty in gms',
        'source': 'Source',
        'cleaned_unit': 'Unit',
        'net_carb': 'Net Carbs (gms)'
    }, inplace=True)
    net_carbs_table_found_ingredients = found_entries[['Ingredient', 'Quantity', 'Unit', 'Qty in gms', 'Source', 'Net Carbs (gms)']].round(2).to_markdown(index=False)
    #nutrient_labels = f"## Nutrition Label\n\n{get_label_string(total_weight_df,missing_ingredients_string,serving_size)}"
    return total_weight_df.to_dict(), missing_ingredients_string, serving_size, net_carbs_table_found_ingredients

## Ingredients String Creator

In [17]:
import re
import pandas as pd
#from parser.helpers import find_ingredients, parse_ingredient

###########################################################################################
###                                                                                     ###
###      Function to return recipe ingredients as dict and nutrition_info_addendum      ###
###                                        (KEEP)                                       ###
###########################################################################################

def fn_recipe_ingredients(input_string):
    """
    Function takes cooklang block content as input. 
    1) Parses it to get ingredients, qty and units and returns as a dict. 
    2) Additionally, the function creates a Nutritionix link for all ingredients
    which is returned as an info admonition along with a code block 
    that allows copy of all ingredients.
    """
    lines = input_string.splitlines()
    recipe_ingredients = {}
    all_recipe_ingredients = []
    step_count = 1
    serving_size = 1
    for line in lines:
        if line.strip() != "" and not line.startswith(">>"):
            step = line.strip()
            for ingredient in find_ingredients(step):
                parsed_ingredient = parse_ingredient(ingredient)
                parsed_ingredient['step'] = step_count
                all_recipe_ingredients.append(parsed_ingredient)
            if step != '':
                if step.startswith('**') and step.endswith('**'):
                    pass
                else:
                    step_count+=1
        elif line.startswith(">> Serving"):
            serving_size_str =  line.lstrip(">> ").strip().split(": ")[1].split(" ")[0]
            # Convert to integer
            try:
                serving_size = int(serving_size_str)  # Convert the string to an integer
            except ValueError:
                serving_size = 1  # Set to 1 if conversion fails
            #print(f"Serving Size: {serving_size}")

    #print(all_recipe_ingredients)
    recipe_ingredients = {}
    
    # Organize ingredients
    for recipe_ingredient in all_recipe_ingredients:
        ingredient_name = recipe_ingredient['name'].title()
        ingredient_amount = recipe_ingredient['quantity']
        ingredient_unit = recipe_ingredient['units']
        ingredient_step = recipe_ingredient['step']
        ingredient_key = ingredient_name
        
        if ingredient_key not in recipe_ingredients:
            recipe_ingredients[ingredient_key] = {'with_units': [], 'without_units': set()}
        
        if ingredient_unit:
            recipe_ingredients[ingredient_key]['with_units'].append((ingredient_amount, ingredient_unit, ingredient_step))
        else:
            recipe_ingredients[ingredient_key]['without_units'].add(ingredient_amount)
    ingredient_string = ''
    ingredient_count = 1
    for recipe_ingredient_key in recipe_ingredients.keys():
        for ingredient_amount, ingredient_unit,ingredient_step in recipe_ingredients[recipe_ingredient_key]['with_units']:
            stripped_ingredient_unit = re.sub(r'\(.*?\)', '', ingredient_unit).upper().strip()
            ingredient_string += f"{recipe_ingredient_key} - {ingredient_amount} {stripped_ingredient_unit}\n"
        # Add non-unit entries, ensuring uniqueness
        for non_unit_amount in recipe_ingredients[recipe_ingredient_key]['without_units']:
            ingredient_string += f"{recipe_ingredient_key} - {non_unit_amount}\n"
        #ing_table_var += "<tr>"
        ingredient_count+= 1
        #ingredient_string += "\n"
    
    if not isinstance(serving_size, (int, float)):
    # If it's a string or any other type, set it to 1
        serving_size = 1

    ###### additional info
    ingredient_string = ingredient_string.replace('\n','\n\t\t')
    label_string = ingredient_string.replace('\n\t\t','%0A').replace(' ','%20').replace('/','%2F')
    #nutrition_label_link = f'Get the nutrition label and other nutrition details for entire recipe on [this link](https://www.nutritionix.com/natural-demo?line_delimited&use_raw_foods&q={label_string}&s={serving_size})' + '{target=_blank}. If something is not right, copy the ingredients from below and paste in the box, adjust as needed.'
    nutrition_label_link = f'(https://www.nutritionix.com/natural-demo?line_delimited&use_raw_foods&q={label_string}&s={serving_size})' + '{target=_blank}'
    nutrition_info_addendum = f'\n\n??? site-info "[Nutritionix Link]{nutrition_label_link}"\n\tCopy the ingredients from below and adjust as needed.\n\t??? site-tip "Copy Ingredients"\n\t\t```\n\t\t{ingredient_string}```\n'

    return recipe_ingredients, nutrition_info_addendum

## Steps Creator

In [18]:
#from parser.helpers import find_cookware, parse_cookware, parse_timer, find_timers, find_ingredients, parse_ingredient 
#from parser.plantuml_steps_creator import puml

####################################################################################################
#                                                                                                  #
#                  Function to get formatted steps and plantuml steps in list                      #
#                                                                                                  #
####################################################################################################

def fn_steps_list(input_str):
    parsed_cookwares = set()
    steps_list = []
    p_steps_list = []
    multiline_note = []
    multiline_flag = False
    lines = input_str.splitlines()
    step_count = 1
    p_step = ""
    for line in lines:
        if line.strip() != "" and not line.startswith(">>"):
            step = line.strip()
            for cookware in find_cookware(step):
                parsed_cookware = parse_cookware(cookware)
                parsed_cookwares.add(parsed_cookware.title())
                p_step = step.replace(cookware, f"{parsed_cookware}")
                step = step.replace(cookware, f"{parsed_cookware}")
            for timer in find_timers(step):
                parsed_timer = parse_timer(timer)['quantity'] + ' ' + parse_timer(timer)['units']
                p_step = step.replace(timer, parsed_timer)
                step = step.replace(timer, f':material-timer-sand-full: {parsed_timer}')
            for ingredient in find_ingredients(step):
                parsed_ingredient = parse_ingredient(ingredient)
                ingredient_name = parsed_ingredient['name']
                ingredient_quantity = parsed_ingredient['quantity']
                ingredient_unit = parsed_ingredient['units']
                if ingredient_unit != '':
                    if ingredient_unit != 'Number':
                        p_step = p_step.replace(ingredient, f'<i>{ingredient_quantity} {ingredient_unit}</i> <b> {ingredient_name}</b>')
                        step = step.replace(ingredient, f'<cookmark><em>{ingredient_quantity} {ingredient_unit}</em> <strong> {ingredient_name}</strong></cookmark>')
                    else:
                        p_step = p_step.replace(ingredient, f'<i>{ingredient_quantity}</i> <b> {ingredient_name}</b>')
                        step = step.replace(ingredient, f'<cookmark><em>{ingredient_quantity}</em> <strong> {ingredient_name}</strong></cookmark>')
                else:
                    p_step = p_step.replace(ingredient, f'<b>{ingredient_name}</b> ({ingredient_quantity})')
                    step = step.replace(ingredient, f'<cookmark><strong>{ingredient_name}</strong></cookmark> ({ingredient_quantity})')
            if step.startswith('[-'):
                multiline_note.append(step)
                multiline_flag = True
            else:
                if step.endswith('-]'):
                    multiline_note.append(step)
                    multiline_step = '\n\t'.join(multiline_note).replace('[- ','<multiline>\n\t').replace(' -]','\n</multiline>').replace('[-','<multiline>\n\t').replace('-]','\n</multiline>')
                    multiline_flag = False
                    steps_list.append(multiline_step)
                    p_multiline_step = puml('\n'.join(multiline_note)).replace('[- ','note right\n\t').replace(' -]','\n\tend note').replace('[-','note right\n\t').replace('-]','\nend note')
                    p_steps_list.append(p_multiline_step)
                    multiline_note = []
                elif multiline_flag:
                    multiline_note.append(step)
                else:
                    if step.startswith('**') and step.endswith('**'):
                        p_steps_list.append(puml(step))
                        step_section = f"<strong>{step.replace('**', '')}</strong>"
                        steps_list.append(step_section)
                    else:
                        if step.startswith('--'):
                            p_note = puml(step).replace('-- ','note right\n\t').replace('--','note right\n\t')
                            p_steps_list.append(f"{p_note}\n\tendnote\n")
                            step_note = step.replace('--','<note>')
                            steps_list.append(f"{step_note}</note>")
                        else:
                            p_steps_list.append(f"{puml(p_step).replace(':',f':<b>Step {step_count}</b>: ')}")
                            steps_list.append(f"<strong>Step {step_count}</strong>: {step}")
                            step_count+= 1
    return steps_list, p_steps_list, parsed_cookwares

## Main Parser

In [19]:
import re

####################################################################################################
#                                                                                                  #
#                           Function to Extract Cooklang Block                                     #
#                                                                                                  #
####################################################################################################
    
def fn_extract_cooklang_blocks(content):
    # Regular expression to find all occurrences of text between "```cooklang" and "```"
    pattern = r'```cooklang(.*?)```'
    
    # Use re.DOTALL to make '.' match newlines as well
    matches = re.findall(pattern, content, re.DOTALL)
    
    # Strip whitespace from each match and return the list
    return [match.strip() for match in matches]

## Hooks.py

In [20]:
import os
from jinja2 import Environment, FileSystemLoader
#from parser.ingredients_string_creator import fn_recipe_ingredients
#from parser.nutrition_labels_creator import fn_total_df_weight
#from parser.parse_recipe import fn_extract_cooklang_blocks
#from parser.steps_creator import fn_steps_list

def on_page_markdown(markdown, page, **kwargs):
    current_page_path = page.file.src_path
    current_dir = os.path.dirname(current_page_path)
    # Define the base image path
    base_image_path = 'assets/images/'
    # Check if the current directory is within the Recipes directory
    recipes_dir = os.path.abspath('Recipes')  # Get the absolute path of the Recipes directory
    current_dir_abs = os.path.abspath(current_dir)  # Get the absolute path of the current directory

    # Determine the relative path to the images
    if os.path.commonpath([recipes_dir, current_dir_abs]) == recipes_dir:
        # If current_dir is a subdirectory of Recipes, calculate how many levels to go up
        relative_path_for_image = os.path.relpath(base_image_path, current_dir_abs)
    else:
        # Default to the base image path if not in Recipes
        relative_path_for_image = base_image_path

    #print(relative_path_for_image)

    cooklang_content = fn_extract_cooklang_blocks(markdown)
    if cooklang_content:
        # Create Jinja2 environment, specifying overrides folder as search path
        env = Environment(loader=FileSystemLoader(['parser', '.'])) 
        template = env.get_template('recipe_template.html')
        for content in cooklang_content:
            # Extract data from your functions
            meta_data = {} 
            lines = content.splitlines()
            for line in lines:
                if line.strip() != "" and line.startswith(">>"):
                    key, value = line.lstrip(">> ").strip().split(": ")
                    meta_data[key.strip()] = value.strip()

            steps_list,p_steps_list,cookware = fn_steps_list(content.strip())
            total_weight_dict, missing_ingredients_string, serving_size,net_carbs_table_found_ingredients = fn_total_df_weight(content.strip())
            recipe_ingredients, nutrition_info_addendum = fn_recipe_ingredients(content.strip())
            
            rendered_html = template.render(
		image_path=relative_path_for_image,
                recipe_ingredients=recipe_ingredients,
                metadata=meta_data,
                cookware=cookware,
                steps=steps_list,
                process=p_steps_list,
                total_weight_df = total_weight_dict,
                serving_size=serving_size,
		missing_ingredients_string=missing_ingredients_string,
                cooklang_block=content,
                nutrition_info_addendum=nutrition_info_addendum,
		net_carbs_table_found_ingredients=net_carbs_table_found_ingredients,
                page=page
            )
            markdown = markdown.replace(f"```cooklang\n{content}\n```", rendered_html)
    return markdown

# Test functions with a sample

In [21]:
relative_path_for_image = "assets/images/"
content = """
>> Title: Low Carb Pizza
>> Serving Size: 6
>> Cooking Time: 30 minutes (Prep Time - 90 minutes)
>> Category: Italian
>> Type: Vegetarian
>> Image: recipe_9_low_carb_pizza.jpg
>> Image-Caption: Low Carb Pizza and Garlic Bread.

**Common Prep Steps**

Add @Olive Oil{1%tbsp} and @sugar{1%tsp} in @water{300%ml} and mix well.
Take a #bowl{} and add @vital wheat gluten{1%cup}.
Add @wheat bran{1/4%cup}, @oat flour{1/4%cup}.
Add @flax meal{1/4%cup}, @Almond Flour{1/2%cup}.
Add @baking powder{1%tsp}, @salt{1%tsp}, @dried yeast{7%gms(1 sachet)}.
Add liquid mix to dry mix and knead for ~{5%minutes}.
Once consolidated, place the dough on oiled #baking sheet{}.
Flatten it with hand at first and with #roller pin{} later to bring it to about 14 inch diameter. 
If you want your pizza to be round then using a #large round utensil{} as guide and #knife or pizza cutter{} trim uneven sides to get a circular shape else just use the #knife or pizza cutter{} to make a rectangular shape.
Cover it with a slightly damp cloth and keep it aside to rise.
Use the trimmed dough to make another ball of dough and flatten it first with hand and then with roller pin to a size of roughly 8 to 10 inch.
Let the two items rise for a ~{40-60%minutes} though leaving for about ~{2%hrs} gets better results.

**Garlic Bread**

Take the smaller base and Using a #fork{} pierce the base equidistantly.
In a #microwaveable bowl{} take @salted butter{30%gms}.
Using a #grater{}, grate @Garlic{3-4%cloves(Large)} into the #microwaveable bowl{}.
Place the #microwaveable bowl{} into #microwave{} for ~{20-30%seconds}.
Using an #oil brush or spatula{} spread the melted butter and Garlic onto the smaller pierced base.
Put this into the #oven{} at 180°C for ~{10%minutes}.
[- This can also be done using Air Fryer at similar temperature. 
Although it will quite possibly require less time than 10 minutes. -]
Take it out after that and cut it into rectangular pieces using #pizza cutter{}.
-- It might be better to let it cool down a bit as it allows for a bit of self-baking.

**Pizza  Toppings**

While any topping can be used, I started by cutting @Yellow Capsicum{35%gms(1/3)} with #knife{}.
Then cut @Brown chestnut mushrooms{30%gms(3-4)}, @onion{50%gms(1/2)} and @canned pineapple{50%gms(1 ring)} using #knife{} and keep them separately for later use.
Cut @Paneer{125%gms} and put in a #small bowl{}.
Add @virgin olive oil{2%tsp}, @tandoori masala{2%tsp}, @dried fenugreek leaves{2%tsp} and @Pink Himalayan Salt{to taste} and mix well.

**Pizza**

Take the larger base and spread evenly [@pasta sauce{3%tbsp}](https://kutt.it/pesto-sauce) on it using #spatula{}.
Then spread a thin layer of @grated mozzarella cheese{40%gms}.
Add all veggie pizza toppings as much as possible without piling up and arranged side to side.
Then add paneer cubes, these can be piled on top of veggies, if there isn't enough space.
Now spread evenly @grated extra mature cheddar cheese{50%gms}.
Then add @jalapenos{10%gms(8-10 rings)}.
Finally sprinkle some more @grated mozzarella cheese{20%gms}.
Put this into the #oven{} at 180°C for ~{20%minutes}.
Take it out after that and cut it using #pizza cutter{}.
"""

## Simulate Hook

In [22]:
import inspect, os
stack = inspect.stack()
current_dir = os.path.dirname(stack[0].filename)
#print(current_dir)

In [23]:
from jinja2 import Environment, FileSystemLoader
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template('recipe_template.html')
from IPython.display import HTML

#current_dir = os.path.dirname()
# Define the base image path
base_image_path = '../assets/images/'
# Check if the current directory is within the Recipes directory
recipes_dir = os.path.abspath('Recipes')  # Get the absolute path of the Recipes directory
current_dir_abs = os.path.abspath(current_dir)  # Get the absolute path of the current directory

# Determine the relative path to the images
if os.path.commonpath([recipes_dir, current_dir_abs]) == recipes_dir:
    # If current_dir is a subdirectory of Recipes, calculate how many levels to go up
    relative_path_for_image = os.path.relpath(base_image_path, current_dir_abs)
else:
    # Default to the base image path if not in Recipes
    relative_path_for_image = base_image_path

page = {'title':'low carb pizza', 'url':'/recipes/breakfast%20options/pizza.html'}
#print(relative_path_for_image)

meta_data = {}

lines = content.splitlines()
for line in lines:
    if line.strip() != "" and line.startswith(">>"):
        key, value = line.lstrip(">> ").strip().split(": ")
        meta_data[key.strip()] = value.strip()

steps_list,p_steps_list,cookware = fn_steps_list(content.strip())
total_weight_dict, missing_ingredients_string, serving_size,net_carbs_table_found_ingredients = fn_total_df_weight(content.strip())
recipe_ingredients, nutrition_info_addendum = fn_recipe_ingredients(content.strip())

rendered_html = template.render(
image_path=relative_path_for_image,
    recipe_ingredients=recipe_ingredients,
    metadata=meta_data,
    cookware=cookware,
    steps=steps_list,
    process=p_steps_list,
    total_weight_df = total_weight_dict,
    serving_size=serving_size,
    missing_ingredients_string=missing_ingredients_string,
    cooklang_block=content,
    nutrition_info_addendum=nutrition_info_addendum,
    net_carbs_table_found_ingredients=net_carbs_table_found_ingredients,
    page=page
)

## Final Output

In [24]:
print(rendered_html)

<script type="application/ld+json">
	{
		"@context": "https://schema.org",
		"@type": "Recipe",
		"name": "Low Carb Pizza",
		"image": "../assets/images//recipe_9_low_carb_pizza.jpg",
		"description": "Low Carb Pizza and Garlic Bread.",
		"author": {
		"@type": "Person",
		"name": "/recipes/breakfast%20options/pizza.html"
		},
		"recipeCategory": "Vegetarian",
		"recipeCuisine": "Italian",
		"keywords": "",
		"recipeYield": "6",
		"prepTime": "PTM",
		"cookTime": "PT30 minutes (Prep Time - 90 minutes)M",
		"totalTime": "PTM",
		"ingredients": [
				"1 tbsp Olive Oil"
				"1 tsp Sugar"
				"300 ml Water"
				"1 cup Vital Wheat Gluten"
				"1/4 cup Wheat Bran"
				"1/4 cup Oat Flour"
				"1/4 cup Flax Meal"
				"1/2 cup Almond Flour"
				"1 tsp Baking Powder"
				"1 tsp Salt"
				"7 gms(1 sachet) Dried Yeast"
				"30 gms Salted Butter"
				"3-4 cloves(Large) Garlic"
				"35 gms(1/3) Yellow Capsicum"
				"30 gms(3-4) Brown Chestnut Mushrooms"
				"50 gms(1/2) Onion"
				"50 gms(1 ring) Ca

## Final output in HTML

In [25]:
import markdown
rendered_markdown = markdown.markdown(rendered_html)
display(HTML(rendered_markdown))

S.No,Ingredient,Amount,Step
1.0,Olive Oil,1 tbsp,Step 1
2.0,Sugar,1 tsp,Step 1
3.0,Water,300 ml,Step 1
4.0,Vital Wheat Gluten,1 cup,Step 2
5.0,Wheat Bran,1/4 cup,Step 3
6.0,Oat Flour,1/4 cup,Step 3
7.0,Flax Meal,1/4 cup,Step 4
8.0,Almond Flour,1/2 cup,Step 4
9.0,Baking Powder,1 tsp,Step 5
10.0,Salt,1 tsp,Step 5
