# Extract all ingredients from all recipes and Net Carb Calculation Test

In [363]:
import os
import re
import yaml

def extract_recipe_data(file_path, file_name):
    recipe_data = ''
    with open(file_path, 'r') as file:
        content = file.read()

    # Extract the recipe data section
    if 'parse_recipe()' in content:
        recipe_data = content.split('parse_recipe()}}')[0].split('"""')[1].strip()
        #print(file_name)
        recipe_data = recipe_data + "\n\n" + f':FileName {file_name}'
        return recipe_data
    else:
        return None

def process_directory(directory):
    collated_recipe_data = []
    for root, dirs, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.md'):
                file_path = os.path.join(root, file_name)
                recipe_name = ' '.join(file_name.split('.')[0].split('_')[:3]).capitalize()
                print(f'[{recipe_name}]({directory}/{file_name})')
                recipe_data = extract_recipe_data(file_path, file_name)
                
                if recipe_data:
                    #print(f"Recipe data from {file_path}: {file_name}")
                    #print(recipe_data)
                    #print("-" * 40)
                    collated_recipe_data.append(recipe_data)
    return collated_recipe_data
# Example usage
directory_path = './docs/Recipes'
results = ''
results = process_directory(directory_path)
#result = extract_recipe_data(directory_path)
print(results[1])

[00-prepare-to-beat](./docs/Recipes/00-prepare-to-beat.md)
[Tags](./docs/Recipes/tags.md)
[Recipe 1 flaxseed](./docs/Recipes/recipe_1_flaxseed_porridge.md)
[Recipe 2 wheat](./docs/Recipes/recipe_2_wheat_gluten_bread.md)
[Recipe 3 almond](./docs/Recipes/recipe_3_almond_bread.md)
[Recipe 4 keto](./docs/Recipes/recipe_4_keto_dhokla.md)
[Recipe 5 scrambled](./docs/Recipes/recipe_5_scrambled_tofu.md)
[Recipe 1 peanut](./docs/Recipes/recipe_1_peanut_mugcake.md)
[Recipe 2 bottlegourd](./docs/Recipes/recipe_2_bottlegourd_halwa.md)
[Recipe 3 protein](./docs/Recipes/recipe_3_protein_fudge.md)
[Test](./docs/Recipes/test.md)
[Recipe 1 sparkling](./docs/Recipes/recipe_1_sparkling_cinnamon.md)
[Recipe 10 palak](./docs/Recipes/recipe_10_palak_mushroom.md)
[Recipe 11 methi](./docs/Recipes/recipe_11_methi_malai_paneer.md)
[Recipe 1 tava](./docs/Recipes/recipe_1_tava_roti.md)
[Recipe 2 tandoori](./docs/Recipes/recipe_2_tandoori_roti.md)
[Recipe 3 naan](./docs/Recipes/recipe_3_naan_bread.md)
[Recipe 4 ke

# Parsing for cooklang notation

In [306]:
def parse_quantity(item: str) -> list[str, str]:
    """Parse the quantity portion of an ingredient
    e.g. 2%kg
    """
    if "%" not in item:
        return [item, ""]
    return item.split("%", maxsplit=1)


def parse_ingredient(item: str) -> dict[str, str]:
    """Parse an ingredient string
    eg. @salt or @milk{4%cup}
    """
    if item[0] != "@":
        raise ValueError("Ingredients should start with @")
    if item[-1] != "}":
        return {
            "type": "ingredient",
            "name": item[1:],
            "quantity": "some",
            "units": "",
        }
    name, quantity = item.split("{", maxsplit=1)
    val, units = parse_quantity(quantity[0:-1])
    return {
        "type": "ingredient",
        "name": name[1:],
        "quantity": val or "as needed",
        "units": units,
    }
def find_ingredients(step: str) -> list[str]:
    """Find ingredients in a recipe step"""
    return find_specials(step, "@")

def find_specials(step: str, start_char="#") -> list[str]:
    matches = []
    item = ""
    matching: bool = False
    specials = ["~", "@", "#"]
    for i, x in enumerate(step):
        if x == start_char:
            if start_char == "~" and step[i - 1] == "{":
                continue  # Skip - approx value in ingredient
            matching = True
            item += x
            continue
        if matching and x in specials:
            if " " in item:
                item = item.split(" ")[0]
            elif "." in item:
                item = item.split(".")[0]
            matches.append(item)
            matching = False
            item = ""
        if matching and x == "}":
            item += x
            matches.append(item)
            matching = False
            item = ""
        if matching:
            item += x

    if matching:
        if " " in item:
            item = item.split(" ")[0]
        elif "." in item:
            item = item.split(".")[0]
        matches.append(item)
    return matches

def obtain_ingredients(recipe):
    matches = []
    for item in find_ingredients(recipe):
        matches.append(parse_ingredient(item))
    ingredients = {}
    
    #matches
    for match in matches:
        ingredient_name = match['name'].title()
        amount = match['quantity']
        unit = match['units']

        ingredient_key = ingredient_name
        if ingredient_key in ingredients:
            ingredients[ingredient_key].append((amount, unit))
        else:
            ingredients[ingredient_key] = [(amount, unit)]
            
    return ingredients

# Include Filename with ingredients to identify which recipe it belongs to

In [307]:
input_string = ''
file_ingredients = {}
for result in results:
    filename = result.split(":FileName")[1]
    #input_string += result +"\n\n"
    ingredients = obtain_ingredients(result)
    file_ingredients[filename] = ingredients
file_ingredients.keys()
#ingredients

dict_keys([' recipe_1_flaxseed_porridge.md', ' recipe_2_wheat_gluten_bread.md', ' recipe_3_almond_bread.md', ' recipe_4_keto_dhokla.md', ' recipe_5_scrambled_tofu.md', ' recipe_1_peanut_mugcake.md', ' recipe_2_bottlegourd_halwa.md', ' recipe_3_protein_fudge.md', ' test.md', ' recipe_1_sparkling_cinnamon.md', ' recipe_10_palak_mushroom.md', ' recipe_1_tava_roti.md', ' recipe_2_tandoori_roti.md', ' recipe_3_naan_bread.md', ' recipe_4_keto_gatte.md', ' recipe_5_baigan_bharta.md', ' recipe_6_fried_okra.md', ' recipe_7_fried_cauliflower.md', ' recipe_8_soya_malai.md', ' recipe_9_low_carb_pizza.md', ' recipe_1_cheese_crackers.md', ' recipe_2_paneer_crackers.md', ' recipe_3_protein_cutlets.md', ' recipe_4_keto_potato.md', ' recipe_5_salad_for_pasta.md', ' recipe_6_fries.md', ' recipe_7_mint_spinach_dip.md'])

In [308]:
import pandas as pd
from fractions import Fraction

# Create an empty list to store data frames
dfs = []

# Iterate over the` dictionary items and create a data frame for each ingredient
for filename, ingredients in file_ingredients.items():
    ingredients = file_ingredients[filename]
    for ingredient, amounts in ingredients.items():
        temp_df = pd.DataFrame(amounts, columns=['Amount', 'Unit'])
        temp_df['Unit'] = temp_df['Unit'].str.upper()
        temp_df['Recipe_name'] = filename.upper()
        temp_df['Ingredient'] = ingredient.upper()
        dfs.append(temp_df)
# Concatenate the data frames into a single data frame
df = pd.concat(dfs, ignore_index=True)

# Print the DataFrame
df[['Recipe_name', 'Ingredient','Amount', 'Unit']]

Unnamed: 0,Recipe_name,Ingredient,Amount,Unit
0,RECIPE_1_FLAXSEED_PORRIDGE.MD,ALMONDS,1,TABLESPOON
1,RECIPE_1_FLAXSEED_PORRIDGE.MD,WALNUTS,1,TABLESPOON
2,RECIPE_1_FLAXSEED_PORRIDGE.MD,CASHEWS,1,TABLESPOON
3,RECIPE_1_FLAXSEED_PORRIDGE.MD,PISTACHIOS,1,TABLESPOON
4,RECIPE_1_FLAXSEED_PORRIDGE.MD,MACADEMIA NUTS,1,TABLESPOON
...,...,...,...,...
308,RECIPE_7_MINT_SPINACH_DIP.MD,VINE TOMATOES,250,GMS(4 MEDIUM)
309,RECIPE_7_MINT_SPINACH_DIP.MD,VIRGIN OLIVE OIL,1.5,TBSP
310,RECIPE_7_MINT_SPINACH_DIP.MD,RED CAPSICUM,1/6,CHOPPED
311,RECIPE_7_MINT_SPINACH_DIP.MD,PINK HIMALAYAN SALT,to taste,


# Update the all ingredients database to have uniform ingredient qty

In [309]:
import re

def replace_amount(value):
    # Replace fractions with decimals
    if '/' in value:
        numerator, denominator = value.split('/')
        try:
            value = str(float(numerator) / float(denominator))
        except ZeroDivisionError:
            value = '0'
    
    # Replace ranges with the highest value
    if '-' in value:
        value = value.split('-')[-1]
    
    # Replace worded items with 0
    if value.isalpha() or re.match(r'^[a-zA-Z\s]+$', value):
        value = '0'
    
    # Additional replacements
    value = value.lower().strip()
    if value == 'as needed' or value == 'to taste' or value == 'to taste (optional)':
        value = '0'
    elif value.endswith('l') or value.endswith('ml'):
        value = value[:-1]
    
    return value

# Apply the replacement function to the 'Amount' column
df['New Amount'] = df['Amount'].apply(replace_amount)
df['New Amount'].unique()

array(['1', '5', '2.5', '1.5', '300', '0.25', '0.5', '7', '0.75', '0',
       '2', '3', '150', '10', '30', '15', '100', '200', '250', '8', '6',
       '60', '125', '120', '80', '4', '400', '50', '500', '20', '75',
       '25', '35', '40', '0.16666666666666666'], dtype=object)

In [310]:
df['Unit'].unique()

array(['TABLESPOON', 'TABLESPOONS', 'TEASPOON', 'HANDFUL', 'TBSP', 'TSP',
       'ML', 'CUP', 'GMS(1 SACHET)', 'CUP(86G)', '', 'SPRAYS',
       'GMS(MEDIUM SIZED)', 'GMS(MEDIUM SIZED CHOPPED)', 'CLOVES(GRATED)',
       'GMS(1-2 CHOPPED)', 'GMS(2 MEDIUM CHOPPED)',
       'TBSP(1 FRESHLY SQUEEZED)', 'GMS(1 BUNCH)', 'GMS(3 MEDIUM)',
       'CHOPPED', 'CLOVE', 'TEASPOONS', 'MEDIUM SIZE', 'GMS(1 LARGE)',
       'TBSP(1 INCH)', 'CLOVES', 'GMS', 'GRATED', 'GMS(5 PIECES)',
       'GMS(10 PIECES)', 'TSP(5 PEELED CRUSHED)', 'L', 'LITRE',
       'GMS(LARGE)', 'TSP(1 TO 2 CHILLIES)', 'GMS(4 TABLESPOONS)',
       'GMS(2 TEASPOONS)', 'MEDIUM', 'KG(BLACK LARGE)', 'INCH',
       'GMS(4-5 FINELY CHOPPED)', 'GMS(1 SMALL BUNCH, CHOPPED)',
       'GMS(1-2 FINELY CHOPPED, OPTIONAL)', 'GMS(1 SMALL CHOPPED)',
       'GMS(1 SMALL BUNCH)', 'TBSP CHOPPED', 'CLOVES(LARGE)', 'GMS(1/3)',
       'GMS(3-4)', 'GMS(1/2)', 'GMS(1 RING)', 'GMS(8-10%RINGS)', 'BUNCH',
       'BIG', 'CUPS', 'TABLESPOON (OPTIONAL)', 'GMS(1 

# Likewise for unit

In [311]:
import re

def replace_unit(unit):
    # Remove information in brackets
    unit = re.sub(r'\(.*?\)', '', unit)
    
    # Remove trailing whitespace
    unit = unit.upper().strip()
    
    return unit

# Apply the replacement function to the 'Unit' column
df['New Unit'] = df['Unit'].apply(replace_unit)

# Check new dataframe with all ingredients and new coumns

In [312]:
df['New Unit'].unique()

array(['TABLESPOON', 'TABLESPOONS', 'TEASPOON', 'HANDFUL', 'TBSP', 'TSP',
       'ML', 'CUP', 'GMS', '', 'SPRAYS', 'CLOVES', 'CHOPPED', 'CLOVE',
       'TEASPOONS', 'MEDIUM SIZE', 'GRATED', 'L', 'LITRE', 'MEDIUM', 'KG',
       'INCH', 'TBSP CHOPPED', 'BUNCH', 'BIG', 'CUPS'], dtype=object)

In [313]:
df['Ingredient'].unique()

array(['ALMONDS', 'WALNUTS', 'CASHEWS', 'PISTACHIOS', 'MACADEMIA NUTS',
       'GOLDEN FLAXSEED POWDER', 'FULL FAT GREEK YOGURT', 'WATER',
       'ERYTHRITOL', 'BLUEBERRIES', 'RASPBERIIES', 'OLIVE OIL', 'SUGAR',
       'VITAL WHEAT GLUTEN', 'WHEAT BRAN', 'OAT FLOUR', 'FLAX MEAL',
       'ALMOND FLOUR', 'BAKING SODA', 'SALT', 'DRIED YEAST',
       'BLANCHED ALMOND FLOUR', 'BAKING POWDER', 'LUKEWARM WATER',
       'BAKING SPRAY', 'AVOCADO', 'ONION', 'GARLIC', 'TOMATO',
       'GREEN CHILLIES', 'LEMON', 'BLACK PEPPER',
       'FRESH CORRIANDER LEAVES', 'FRESH MINT', 'GREEN CHILLI',
       'CORIANDER AND GREEN MINT DIP', 'BLACK MUSTARD', 'HOT WATER',
       'SUGARFREE TOMATO KETCHUP', 'GINGER', 'RED ONION', 'AVOCADO OIL',
       'CUMIN', 'TURMERIC', 'RED CHILLI POWDER', 'TOFU',
       'PINK HIMALAYAN SALT', 'PEPPER', 'EGG', 'SMOOTH PEANUT BUTTER',
       'BOTTLEGOURD', 'CLARIFIED BUTTER (GHEE)', 'DOUBLE CREAM',
       'CARDAMOM POWDER', 'CASHEW NUTS', 'ALMOND NUTS', 'PISTACHIO NUTS',
     

In [314]:
df

Unnamed: 0,Amount,Unit,Recipe_name,Ingredient,New Amount,New Unit
0,1,TABLESPOON,RECIPE_1_FLAXSEED_PORRIDGE.MD,ALMONDS,1,TABLESPOON
1,1,TABLESPOON,RECIPE_1_FLAXSEED_PORRIDGE.MD,WALNUTS,1,TABLESPOON
2,1,TABLESPOON,RECIPE_1_FLAXSEED_PORRIDGE.MD,CASHEWS,1,TABLESPOON
3,1,TABLESPOON,RECIPE_1_FLAXSEED_PORRIDGE.MD,PISTACHIOS,1,TABLESPOON
4,1,TABLESPOON,RECIPE_1_FLAXSEED_PORRIDGE.MD,MACADEMIA NUTS,1,TABLESPOON
...,...,...,...,...,...,...
308,250,GMS(4 MEDIUM),RECIPE_7_MINT_SPINACH_DIP.MD,VINE TOMATOES,250,GMS
309,1.5,TBSP,RECIPE_7_MINT_SPINACH_DIP.MD,VIRGIN OLIVE OIL,1.5,TBSP
310,1/6,CHOPPED,RECIPE_7_MINT_SPINACH_DIP.MD,RED CAPSICUM,0.16666666666666666,CHOPPED
311,to taste,,RECIPE_7_MINT_SPINACH_DIP.MD,PINK HIMALAYAN SALT,0,


# Create all_recipe_ingredients.csv

In [315]:
df.to_csv('all_recipe_ingredients.csv')

# Create dataframes with unique unit and ingredients

In [316]:
unique_ing_df = df.drop_duplicates(subset='Ingredient')
unique_ing_df.to_csv('unique_ingredients.csv')

In [317]:
unique_unit_df = df.drop_duplicates(subset='New Unit')
unique_unit_df.to_csv('unique_units.csv')

# Functions used in parse_recipe

In [642]:
    import requests
    from thefuzz import fuzz, process
    def read_upload(obj_file,str_sheetname,col_list=None, date_col_list=False, file_type='xlsx', file_name=''):
        dataframe=""    
        try:
            if obj_file is not None:
                if file_type == 'csv':
                    dataframe = pd.read_csv(obj_file, usecols=col_list, parse_dates = date_col_list).applymap(lambda s: s.upper() if type(s) == str else s).fillna('')
                else:
                    dataframe = pd.read_excel(obj_file,sheet_name = str_sheetname, usecols=col_list, parse_dates = date_col_list).applymap(lambda s: s.upper() if type(s) == str else s).fillna('')
            else:
                dataframe= file_name+" is Empty - Provide a file name first"
                #print(dataframe)
        except ValueError as e:
            print("Problem: "+ e.args[0])
        finally:
            return dataframe

    def get_nccdb_nutrient_information(api_key, ingredients, nutrient_ids=[1008, 1005, 1079, 1003, 1004]):
        #print(f'get_nccdb_nutrient_information --- {ingredients}')
        url = "https://api.nal.usda.gov/fdc/v1/foods/search"
        headers = {"Content-Type": "application/json"}
        nutrient_data = []  # List to store the extracted nutrient information
        df_options_nccdb_inner = pd.DataFrame()
        for ingredient in ingredients:
            #print(ingredient)
            params = {
                "query": ingredient,
                "pageSize": 10,
                "api_key": api_key
            }

            response = requests.get(url, headers=headers, params=params)
            response_json = response.json()

            foods = response_json.get('foods', [])
            #print(len(foods))
            if foods:
                for i in range(len(foods)):
                    food = foods[i]
                    #print(food['description'])
                    nutrient_info = {
                        'Food Name': ''.join(food['description'].upper().split(','))
                    }
                    for nutrient in food.get('foodNutrients', []):
                        if nutrient['nutrientId'] in nutrient_ids:
                            nutrient_info[nutrient['nutrientName']] = nutrient['value']
                    nutrient_data.append(nutrient_info)
                df_nccdb = pd.DataFrame(nutrient_data)
                patterns = ['RECIPE', 'CANS', 'BISCUIT', 'NO FRUIT PIECES', \
            'FLAVOURS','BEEF','DISH','LAMB','PIE','DESSERT','INCLUDING',\
            'SAMPLES','MANUFACTURER','CURRY','BRAMLEY','CLOVER','LIGHT MEAT','COOKED',\
           'LEAN AND','EDIBLE PORTION OF','EDIBLE CONVERSION FACTOR','BOILED','STEAMED',\
           'JARS','FISH','ROAST','FRIED','GRILLED', 'MEAT AND','JUICE DRINK', 'CARBONATED',\
           'KEBAB','CRAB','WEIGHT LOSS','FILLETS','CAKE','SALTED', 'SMOKED','GOOSE','HARE',\
            'HAM','BAKED','TURKEY','PUNJABI','TONGUE','SABJI','OX','TUNA','HOMEMADE','WITH SUGAR',\
            'STEWED','FAT FREE','PUDDING','SAUSAGE','PORK','PASTRY','RETAIL','SOUP','SQUID','STEAK',\
            'STEW','WEDGES','SWEETS','BOILED','JAM','INFUSION','BURGER','TOFFEE','TRIFLE','TOPPING',\
           'TURKEY','CASSEROLE','DIP','PICKLED','SAUCE']
                pattern = '|'.join(patterns)  # Combine multiple patterns with the OR operator

                df_nccdb = df_nccdb[~(df_nccdb['Food Name'].str.contains(pattern, flags=re.IGNORECASE))]
                choices = df_nccdb['Food Name'].unique()
                comp_var = process.extract(ingredient, choices, limit=6)
             #   print(comp_var)
              #  print(len(comp_var))

                if comp_var:
                    for i in range(len(comp_var)):
                        if comp_var[i][1] >=90:
                            df_filtered = df_nccdb[df_nccdb['Food Name'] == comp_var[i][0]].reset_index(drop=True)
                            df_filtered['Searched Ingredient'] = ingredient
                            df_options_nccdb_inner = pd.concat([df_options_nccdb_inner, df_filtered])
                            if comp_var[i][1] == 100:
                                break
                    #df_options_nccdb_inner['Searched Ingredient'] = ingredient

        #df = pd.DataFrame(nutrient_data)
        #print(df_options_nccdb_inner.keys())
        if not df_options_nccdb_inner.empty:
            df_options_nccdb_inner['Net Carbs'] = df_o.emptyptions_nccdb_inner['Carbohydrate, by difference'] - df_options_nccdb_inner['Fiber, total dietary'].fillna(0)
        #df_options_nccdb_inner.rename(columns = { 'Carbohydrate, by difference' : 'Carbohydrate (g)','Protein' : 'Protein (g)', 'Total lipid (fat)' : 'Fat (g)', 'Energy' : 'Energy (kcal) (kcal)'}, inplace=True)
        #Total lipid (fat) |   Carbohydrate, by difference |   Energy |   Fiber, total dietary
        #df = df_options_nccdb_inner[['Searched Ingredient','Food Name', 'Carbohydrate (g)', 'Fiber, total dietary', 'Net Carbs', 'Protein (g)', 'Fat (g)', 'Energy (kcal) (kcal)']]
        #print(df)
        return df_options_nccdb_inner#[['Searched Ingredient','Food Name', 'Carbohydrate (g)', 'Fiber, total dietary', 'Net Carbs', 'Protein (g)', 'Fat (g)', 'Energy (kcal) (kcal)']].drop_duplicates(subset='Food Name')     
    
    def get_matches(df):
        """ runs a fuzzy search on provided dataframe """
        #from rapidfuzz import process
        obj_file = './docs/assets/tables/McCance_Widdowsons_Composition_of_Foods_Integrated_Dataset_2021.xlsx'
        str_sheetname = '1.3 Proximates'
        col_list = ['Food Name','Description','Carbohydrate (g)', 'Protein (g)','Fat (g)','Energy (kcal) (kcal)','AOAC fibre (g)']
        df_uk_food_db = read_upload(obj_file,str_sheetname,col_list, date_col_list=False, file_type='xlsx', file_name='McCance_Widdowsons_Composition_of_Foods_Integrated_Dataset_2021.xlsx')
        #print(df_uk_food_db.shape)
        #df_uk_food_db = df_uk_food_db[~[df_uk_food_db['Description'] contains ['Recipe']]]
        patterns = ['RECIPE', 'CANS', 'BISCUIT', 'NO FRUIT PIECES', \
            'FLAVOURS','BEEF','DISH','LAMB','PIE','DESSERT','INCLUDING',\
            'SAMPLES','MANUFACTURER','CURRY','BRAMLEY','CLOVER','LIGHT MEAT','COOKED',\
           'LEAN AND','EDIBLE PORTION OF','EDIBLE CONVERSION FACTOR','BOILED','STEAMED',\
           'JARS','FISH','ROAST','FRIED','GRILLED', 'MEAT AND','JUICE DRINK', 'CARBONATED',\
           'KEBAB','CRAB','WEIGHT LOSS','FILLETS','CAKE','SALTED', 'SMOKED','GOOSE','HARE',\
            'HAM','BAKED','TURKEY','PUNJABI','TONGUE','SABJI','OX','TUNA','HOMEMADE','WITH SUGAR',\
            'STEWED','FAT FREE','PUDDING','SAUSAGE','PORK','PASTRY','RETAIL','SOUP','SQUID','STEAK',\
            'STEW','WEDGES','SWEETS','BOILED','JAM','INFUSION','BURGER','TOFFEE','TRIFLE','TOPPING',\
           'TURKEY','CASSEROLE','DIP','PICKLED','SAUCE']
        pattern = '|'.join(patterns)  # Combine multiple patterns with the OR operator

        df_uk_food_db = df_uk_food_db[~((df_uk_food_db['Description'].str.contains(pattern, flags=re.IGNORECASE))\
                                        | (df_uk_food_db['Food Name'].str.contains(pattern, flags=re.IGNORECASE)))]
        df_options = pd.DataFrame()
        df_options_nccdb = pd.DataFrame()
        df_options_nccdb_search_list = pd.DataFrame()
        filtered_list = []
        choices = df_uk_food_db['Food Name']
        print(df['Ingredient'].unique())
        for item in df['Ingredient'].unique():
            comp_var = process.extract(item, choices, limit=6)
            #print(comp_var)
            #print(item)
            if comp_var:
                for i in range(len(comp_var)):
                    if comp_var[i][1] >=90:
                        df_filtered = df_uk_food_db[df_uk_food_db['Food Name'] == comp_var[i][0]][['Food Name','Carbohydrate (g)', 'Protein (g)','Fat (g)','Energy (kcal) (kcal)','AOAC fibre (g)']]
                        df_filtered['Searched Ingredient'] = item
                        df_options = pd.concat([df_options, df_filtered])
                        if comp_var[i][1] == 100:
                            break
        possible_matches = ''
        api_key = 'DEMO_KEY'
        if df_options.empty:
            possible_matches = ''
            ### Pass the whole ingredient list to nccdb as nothing was found on ukdb
            filtered_list = df['Ingredient'].to_list()
            #print(filtered_list)
        else:
            df_options = df_options[['Searched Ingredient','Food Name', 'Carbohydrate (g)', 'Protein (g)', 'Fat (g)','Energy (kcal) (kcal)', 'AOAC fibre (g)']]
            possible_matches_res = '\n\t\t' + df_options.to_markdown(index=False).replace('\n','\n\t\t')
            possible_matches = f'\t\t*Possible matches in [McCance and Widdowson\'s composition of foods integrated dataset](https://www.gov.uk/government'+\
                f'/publications/composition-of-foods-integrated-dataset-cofid#full-publication-update-history)'+\
                "{target=_blank}"+ f' are shown below:*\n{possible_matches_res}'
            ### Pass the ingredients not found on ukdb to nccdb
            filtered_list = df['Ingredient'][~(df['Ingredient'].isin(df_options['Searched Ingredient']))].to_list()
        
        ### Call nccdb for ingredients not available on ukdb    
        if filtered_list:              
            df_options_nccdb = get_nccdb_nutrient_information(api_key, filtered_list).fillna(0)
            
        if df_options_nccdb.empty:
                possible_matches += ''
        else:
            df_options_nccdb.rename(columns = { 'Carbohydrate, by difference' : 'Carbohydrate (g)','Protein' : 'Protein (g)', 'Total lipid (fat)' : 'Fat (g)', 'Energy' : 'Energy (kcal) (kcal)'}, inplace=True)
            df_options_nccdb = df_options_nccdb[['Searched Ingredient','Food Name', 'Carbohydrate (g)', 'Fiber, total dietary', 'Net Carbs', 'Protein (g)', 'Fat (g)', 'Energy (kcal) (kcal)']].drop_duplicates(subset='Food Name')
            possible_matches_nccdb = '\n\n\t\t' + df_options_nccdb.to_markdown(index=False).replace('\n','\n\t\t')
            possible_matches += f'\n\n\t\t*Possible matches in [U.S. Department of Food Central database](https://fdc.nal.usda.gov/fdc-app.html#/'+\
            f')'+\
            "{target=_blank}"+ f' are shown below:*\n{possible_matches_nccdb}'
        #print(possible_matches)
        return possible_matches
        #return df_options_nccdb,df_options

    
    def get_unformatted_line(input_string: str):
        """Takes input_string as input, and removes all cookland notation from steps 
        and retains metadata as is and returned the steps as a list of lines"""

        #Remove individual timer notation ~{25%minutes} or ~{25-30%minutes}
        pattern = r'~{(\d+)(?:-(\d+))?%([^}]+)}'
        replacement = lambda match: f"{match.group(1)}-{match.group(2)} {match.group(3)}" if match.group(2) else f"{match.group(1)} {match.group(3)}"
        input_string = re.sub(pattern, replacement,input_string)
        #split resulting steps with time formatting removed further remove ingredient and cookware formatting
        lines = input_string.replace("{}", '').\
            replace("@", "").\
            replace("%", " ").\
            replace("#", '').\
            replace("~", '').\
            replace("{", ' (').\
            replace("}", ')').\
            splitlines()
        return lines

    def puml(input_string: str):
        inp_str = ''
        style_str = """
            <style>
            activityDiagram {
              diamond {
                BackgroundColor #darkgreen
                LineColor #white
                FontColor white
              }
            }
            </style>
        """
        lines = get_unformatted_line(input_string)
        #Remove metadata of cooklang that starts with >> and store it in variable inp_str
        for line in lines:
            if line.strip() != "" and not line.startswith(">>"):
                inp_str += f'{line.strip()}\n'
        #steps = inp_str.split('\n')
        steps = inp_str.splitlines()
        steps_string = "<div class=\"grid cards\" markdown>\n\n\n-   ## Steps\n\n\t---"
        out = "\n-   ## Process\n\n\t---\n\n\t```plantuml\n\t@startuml\n\t!theme cerulean\n\t"+style_str+"\n\tstart\n"
        for step in steps:
            # Convert step into uppercase for uniform comparison
            p_step = step.upper()
            # Check if step starts with IF and contains a THEN 
            # If so, it is a candidate for If Then Else syntax of plantuml
            # If not treat it as normal step
            if p_step.startswith('IF') and 'THEN' in p_step:
                # Replace 'ELSE IF' with 'ELSEIF' so there is no clash with final ELSE statement
                if 'ELSE IF' in p_step:
                    p_step = p_step.replace('ELSE IF','ELSEIF')
                # Create a variable to first remove just IF from the step
                if_removed = ''.join(re.split(r"\bIF\b", p_step)).strip()
                # Using above, remove 'ELSE'. This will be a list with two items
                else_removed_l = re.split(r"\bELSE\b", if_removed)
                # Now will If and Else removed, break the sentencefirst item from above list
                # at ELSEIF and store in another list below
                elif_removed_l = re.split(r"\bELSEIF\b", else_removed_l[0])
                # For every item in above list, break it down at THEN and store in a new list
                then_removed_l = []
                for elif_removed in elif_removed_l:
                    then_removed_l += re.split(r"\bTHEN\b", elif_removed)
                # Initiate if loop parsing
                i = 0
                while i < len(then_removed_l): 
                    if i==0:
                        # The very first entry in then_removed_l is condition for if statement 
                        # and second entry is then statement
                        out += f'\tif ({insert_newlines(then_removed_l[i].strip(),20)}?) then (yes)\n\t\t:{insert_newlines(then_removed_l[i+1].strip().capitalize(),30)};\n'
                        i = i+2
                    elif i % 2 == 0:
                        # Add elseif condition 
                        #Logic is that variable then_removed_l has every even item as an elseif condition 
                        # and every odd item as then statement
                        out+= f'\t(no) elseif ({insert_newlines(then_removed_l[i].strip(),20)}?) then (yes)\n'
                        i = i+1
                    else:
                        #Every odd entry is a then statement so use it to create the then statement
                        out+=f'\t\t:{insert_newlines(then_removed_l[i].strip().capitalize(),30)};\n'
                        i = i + 1
                # Check if ELSE exists in the step and if it does include the final else statement
                if len(else_removed_l)>1:
                    out += f'\telse (no)\n\t\t:{insert_newlines(else_removed_l[1].strip().capitalize(),30)};\n'
                out += f'\tendif\n'
            # Ignore empty line in steps
            elif step != '':
                if step.startswith('**') and step.endswith('**'):
                    # If the step starts with ** and ends with **, apply different formatting and remove **
                    step = step.replace("**","")
                    out += f'\t#Black:**{insert_newlines(step.strip(),50)}**/\n'
                    step_line = f"\n\n\t### {step}\n\n"
                else:
                    # If the step does not start with ** and ends with **, apply standard formatting
                    out += f'\t:{insert_newlines(step.strip(),50)};\n'
                    step_line = f"\n\t* {step.strip()}"
                steps_string += step_line
        out += f'\tend\n\t@enduml\n\t```\n\n</div>\n\n'
        out = f'{steps_string}\n\n{out}'
        # Return final markdown for steps and plantuml
        return out

    def parse_cookware(item: str) -> dict[str, str]:
        """Parse cookware item
        e.g. #pot or #potato masher{}
        """
        if item[0] != "#":
            raise ValueError("Cookware should start with #")
        item = item.replace("{}", "")
        return item[1:]


    def parse_quantity(item: str) -> list[str, str]:
        """Parse the quantity portion of an ingredient
        e.g. 2%kg
        """
        if "%" not in item:
            return [item, ""]
        return item.split("%", maxsplit=1)


    def parse_ingredient(item: str) -> dict[str, str]:
        """Parse an ingredient string
        eg. @salt or @milk{4%cup}
        """
        if item[0] != "@":
            raise ValueError("Ingredients should start with @")
        if item[-1] != "}":
            return {
                "type": "ingredient",
                "name": item[1:],
                "quantity": "some",
                "units": "",
            }
        name, quantity = item.split("{", maxsplit=1)
        val, units = parse_quantity(quantity[0:-1])
        return {
            "type": "ingredient",
            "name": name[1:],
            "quantity": val or "as needed",
            "units": units,
        }


    def parse_timer(item: str) -> dict[str, str]:
        """Parse timer string
        e.g. ~eggs{3%minutes} or ~{25%minutes}
        """
        if item[0] != "~":
            raise ValueError("Timer should start with ~")
        name, quantity = item.split("{", maxsplit=1)
        val, units = parse_quantity(quantity[0:-1])
        return {
            "type": "timer",
            "name": name[1:],
            "quantity": val,
            "units": units,
        }

    def find_specials(step: str, start_char="#") -> list[str]:
        matches = []
        item = ""
        matching: bool = False
        specials = ["~", "@", "#"]
        for i, x in enumerate(step):
            if x == start_char:
                if start_char == "~" and step[i - 1] == "{":
                    continue  # Skip - approx value in ingredient
                matching = True
                item += x
                continue
            if matching and x in specials:
                if " " in item:
                    item = item.split(" ")[0]
                elif "." in item:
                    item = item.split(".")[0]
                matches.append(item)
                matching = False
                item = ""
            if matching and x == "}":
                item += x
                matches.append(item)
                matching = False
                item = ""
            if matching:
                item += x

        if matching:
            if " " in item:
                item = item.split(" ")[0]
            elif "." in item:
                item = item.split(".")[0]
            matches.append(item)
        return matches

    def find_cookware(step: str) -> list[str]:
        """Find cookware items in a recipe step"""
        return find_specials(step, "#")


    def find_ingredients(step: str) -> list[str]:
        """Find ingredients in a recipe step"""
        return find_specials(step, "@")


    def find_timers(step: str) -> list[str]:
        """Find timers in a recipe step"""
        return find_specials(step, "~")

    def insert_newlines(input_string: str, chars_per_line: int):
        """Inserts newline in the input_string after specified number of characters from char_per_line"""
        mod = ""    
        for i,x in enumerate(input_string):
            if x in ('`','*'):
                x = ''
            elif x ==' ' and i >= chars_per_line:
                x = "\n\t"
                chars_per_line += chars_per_line
            mod += x    
        return mod

# Main parse_recipe function

In [643]:
    import re
    def parse_recipe(input_string):
        cooklang_block = f'\n??? abstract "Recipe in [Cooklang](https://cooklang.org/)' + '{target=_blank' + '}"\n\t```\n\t' + input_string.replace("\n","\n\t") + '```'
        ingredients = {}
        cookwares = set()
        steps = []
        cooking_data = {}
        ################### Extract Ingredients ###########################
        matches = []
        for item in find_ingredients(input_string):
            matches.append(parse_ingredient(item))
        for match in matches:
            ingredient_name = match['name'].title()
            amount = match['quantity']
            unit = match['units']

            ingredient_key = ingredient_name
            if ingredient_key in ingredients:
                ingredients[ingredient_key].append((amount, unit))
            else:
                ingredients[ingredient_key] = [(amount, unit)]
                
        ##################### Extract cookwares ############################
        cookware_matches = find_cookware(input_string)
        for cookware_match in cookware_matches:
            cookware_name = parse_cookware(cookware_match).title()
            cookwares.add(cookware_name)
        
        ##################### Extract cooking data and steps ################
        lines = get_unformatted_line(input_string)
        
        for line in lines:
            if line.strip() != "" and line.startswith(">>"):
                key, value = line.lstrip(">> ").strip().split(": ")
                cooking_data[key.strip()] = value.strip()
            elif line.strip() != "":
                steps.append(line.strip())
        ###################### Ingredient Block ########################
        ingredient_string = ""
        ingredient_string += "<div class=\"grid cards\" markdown>\n\n\n-   ## Ingredients\n\n\t---\n"
        #ingredient_string += "\n## Ingredients\n\n\t---\n"
        ingredient_count = 1
        for ingredient_name in ingredients.keys():
            #check if ingredient has been used more than once so it can be listed accordingly
            if len(ingredients[ingredient_name]) > 1:
                ingredient_line = f"\t\t{ingredient_count}. {ingredient_name}:"
                ingredient_string += ingredient_line + "\n"
                for amount, unit in ingredients[ingredient_name]:
                    ingredient_line = f"\t\t\t- {amount} {unit}"
                    ingredient_string += ingredient_line + "\n"
            else:
                # if ingredient is used only once, list as a single line with amount and units in same line
                ingredient_line = ""
                for amount, unit in ingredients[ingredient_name]:
                    ingredient_line += f"\t\t{ingredient_count}. {ingredient_name}: {amount} {unit}"
                ingredient_string += ingredient_line + "\n"
            ingredient_count += 1
        ###################### Ingredient Listing ########################    
        if cookwares:
            cookware_string = "\n-   ## Cookwares\n\n\t---\n"
            cookware_count = 1
            for cookware in cookwares:
                cookware_line = f"\t{cookware_count}. *{cookware}*"
                cookware_count += 1
                cookware_string += cookware_line + "\n"
        else:
            cookware_string = ''
        cookware_string += '\n\n</div>\n\n'
        if "Title" in cooking_data:
            title = cooking_data["Title"]
            del cooking_data["Title"]
            cooking_data_string = f"## {title}\n\n"
        else:
            cooking_data_string = ""
        ######################### NET CARB TABLE #################
        df_ingredient_db = pd.read_csv('ingredient_db.csv')
        df_unit_db = pd.read_csv('unit_db.csv')
        # Create an empty list to store data frames
        dfs = []
        # Iterate over the dictionary items and create a data frame for each ingredient
        for ingredient, amounts in ingredients.items():
            temp_df = pd.DataFrame(amounts, columns=['Amount', 'Unit'])
            temp_df['Ingredient'] = ingredient
            dfs.append(temp_df)
        # Concatenate the data frames into a single data frame
        df = pd.concat(dfs, ignore_index=True)
        # Now merge df and df_ingredient_db with inner join to get net carb values for recipe ingredients using code below:
        df_merge = df.merge(df_ingredient_db, how='inner', left_on=df['Ingredient'].str.upper(), right_on=df_ingredient_db['Name'].str.upper())
        # Define functions to calculate net carbs, conversion factor etc
        def replace_amount(value):
            # Replace fractions with decimals
            if '/' in value:
                numerator, denominator = value.split('/')
                try:
                    value = str(float(numerator) / float(denominator))
                except ZeroDivisionError:
                    value = '0'

            # Replace ranges with the highest value
            if '-' in value:
                value = value.split('-')[-1]

            # Replace worded items with 0
            if value.isalpha() or re.match(r'^[a-zA-Z\s]+$', value):
                value = '0'

            # Additional replacements
            value = value.lower().strip()
            if value == 'as needed' or value == 'to taste' or value == 'to taste (optional)':
                value = '0'
            elif value.endswith('l') or value.endswith('ml'):
                value = value[:-1]

            return value
        def replace_unit(unit):
            # Remove information in brackets
            unit = re.sub(r'\(.*?\)', '', unit)

            # Remove trailing whitespace
            unit = unit.upper().strip()

            return unit

        def conv_factor(row):
            filtered_units = df_unit_db[df_unit_db['Unit'].str.upper() == replace_unit(row['Unit_x'])]['eq_gms']
            if not filtered_units.empty:
                conv_factor = float(filtered_units.values[0])
                return conv_factor
            return 0
        def calculate_cal_net_carb(row):
            filtered_units = df_unit_db[df_unit_db['Unit'].str.upper() == replace_unit(row['Unit_x'])]['eq_gms']
            if not filtered_units.empty:
                gms_used_in_recipe = float(filtered_units.values[0]) * float(Fraction(replace_amount(row['Amount'])))
                if row['Net_carb/100gms'] != '0' and row['Net_carb/100gms'] != 0:
                    return (gms_used_in_recipe * float(row['Net_carb/100gms']))/100
            return 0
        def calculate_amt_in_gms(row):
            filtered_units = df_unit_db[df_unit_db['Unit'].str.upper() == replace_unit(row['Unit_x'])]['eq_gms']
            if not filtered_units.empty:
                gms_used_in_recipe = float(filtered_units.values[0]) * float(Fraction(replace_amount(row['Amount'])))
                return gms_used_in_recipe
            return 0
        # Add calculated columns to the dataframe:
        df_merge['Conversion Factor'] = df_merge.apply(conv_factor, axis=1)
        df_merge['Amount used in Recipe(gms)'] = df_merge.apply(calculate_amt_in_gms, axis=1)
        df_merge['Calculated Net Carb in recipe'] = df_merge.apply(calculate_cal_net_carb, axis=1).round(2)
        df_merge.rename(columns = {'Unit_x' : 'Recipe Unit', 'Unit_y' : 'Converted Unit'}, inplace=True)
        
        # Create final markdown table:
        grand_total = f"**{df_merge['Calculated Net Carb in recipe'].sum().round(2)}**"
        result = df_merge[['Ingredient', 'Amount','Recipe Unit','Conversion Factor', 'Amount used in Recipe(gms)', 'Net_carb/100gms','Calculated Net Carb in recipe']]

        # Create a DataFrame for the grand total
        grand_total_df = pd.DataFrame({'Net_carb/100gms': '-','Amount':'-','Recipe Unit': '-','Ingredient': ['**Grand Total**'], 'Calculated Net Carb in recipe': [grand_total], 'Conversion Factor': '-','Amount used in Recipe(gms)': '-' })

        # Concatenate the result DataFrame with the grand total DataFrame
        result = pd.concat([result, grand_total_df], ignore_index=True)
        
        # Find ingredients that are not on ingredient_db
        df_ingredients_not_found =  df[~df['Ingredient'].str.upper().isin(df_ingredient_db['Name'].str.upper())]
        not_found = ''
        if df_ingredients_not_found.empty:
            not_found = ''
        else:
            not_found = ', '.join(df_ingredients_not_found['Ingredient'].unique())
            possible_match = get_matches(df_ingredients_not_found)
            
        # Add the result DataFrame as markdown 
        netcarb_string = f'??? Info "Calculated Net Carb Info (Total Net Carbs for entire dish: {grand_total})"\n\t' + result.to_markdown(index=False).replace("\n","\n\t")
        if not_found != '':
            netcarb_string += '\n\n\t!!! warning "Caution"\n\t\t*The calculation is indicative and my lookup list'+\
            ' did not have net carb values for* **' +\
            not_found +\
            '** *and thus had to be excluded in the calculations above.*\n\n'
            if possible_match != '': 
                netcarb_string += possible_match
        ######################### NET CARB TABLE ####################################
        
        ########################### COOKING DATA STRING #############################
        if "Image" in cooking_data:
            if "Image-Caption" in cooking_data:
                image_data_string = f"## Image\n\n<figure markdown>\n![image](../../assets/images/"+cooking_data["Image"]+"){: style=\"width: 920px;height: 430px;object-fit: contain;\"}\n<figcaption>" + cooking_data["Image-Caption"] + "</figcaption>\n</figure>\n\n"
                del cooking_data["Image-Caption"]
                del cooking_data["Image"]
            else:
                image_data_string = f"## Image\n\n<figure markdown>\n![image](../../assets/images/"+cooking_data["Image"]+"){: style=\"width: 920px;height: 430px;object-fit: contain;\"}\n</figure>\n\n"
                del cooking_data["Image"]
        else:
            image_data_string = ""        
        #cooking_data_string += "<div class=\"grid cards\" markdown>\n\n"
        temp_cooking_data_string = ""
        one_cooking_data_string = ""
        two_cooking_data_string = ""
        three_cooking_data_string = ""
        four_cooking_data_string = ""
        for key, value in cooking_data.items():
            if key in ('Cooking Time','Serving Size','Type', 'Source'):
                if key == 'Cooking Time':
                    one_cooking_data_string = f":material-timer: *{value}*"
                elif key == 'Serving Size':
                    two_cooking_data_string = f", :fontawesome-solid-chart-pie: *{value}*" 
                elif key == 'Type':
                    if value == 'Vegetarian':
                        three_cooking_data_string = f", **{key}**: :leafy_green:"
                    elif value == 'Vegetarian with Egg':
                        three_cooking_data_string = f", **{key}**: :leafy_green::egg:"
                    else:
                        three_cooking_data_string = f", **{key}**: :cut_of_meat:"
                elif key == 'Source':
                    four_cooking_data_string = f", **{key}**: [:material-origin:]({value})"                
            else:
                cooking_data_string += f", **{key}**: *{value}*"
        if one_cooking_data_string != "":
            temp_cooking_data_string = one_cooking_data_string 
        if two_cooking_data_string != "":
            temp_cooking_data_string += two_cooking_data_string
        if three_cooking_data_string != "":
            temp_cooking_data_string += three_cooking_data_string
        if four_cooking_data_string != "":
            temp_cooking_data_string += four_cooking_data_string + "{target=_blank}"
        cooking_data_string = f'<div class=\"grid cards\" align = \"center\" markdown>\n\n-   ' +\
        temp_cooking_data_string + cooking_data_string +\
        f' **Total Net Carbs:** {grand_total}\n\n</div>\n\n'
        ############################## Cooking Data ################################
        steps_dia_string = puml(input_string)
        
        
        
        final_output_string = '\n\n' + image_data_string + cooking_data_string + "\n" + \
        ingredient_string + "\n" + cookware_string + "\n" + netcarb_string + "\n\n\n" +\
        steps_dia_string + cooklang_block 
        return final_output_string

# Check for recipes in dataframe to see if it came correctly

In [635]:
input_string="""
Take @Whole Cinnamon Sticks{infuser full} and put it in the #teapot{} infuser.

Boil @water{0.5%L} using a #kettle or microwaveable jar{} on #hob or microwave{}
Add @eggplant{1%tbsp} and @garam masala{1%tbsp} @amla{1%cup}.
Once boiled, pour it into the teapot and let it steep overnight.

Mix the steeped Cinnamon Tea with @Sparkling water{1.5%Litre}.

Serve cold with @ice{as needed}
"""
print(parse_recipe(input_string))

['Whole Cinnamon Sticks' 'Eggplant' 'Amla']


<div class="grid cards" align = "center" markdown>

-    **Total Net Carbs:** **6.78**

</div>


<div class="grid cards" markdown>


-   ## Ingredients

	---
		1. Whole Cinnamon Sticks: infuser full 
		2. Water: 0.5 L
		3. Eggplant: 1 tbsp
		4. Garam Masala: 1 tbsp
		5. Amla: 1 cup
		6. Sparkling Water: 1.5 Litre
		7. Ice: as needed 


-   ## Cookwares

	---
	1. *Hob Or Microwave*
	2. *Teapot*
	3. *Kettle Or Microwaveable Jar*


</div>


??? Info "Calculated Net Carb Info (Total Net Carbs for entire dish: **6.78**)"
	| Ingredient      | Amount    | Recipe Unit   | Conversion Factor   | Amount used in Recipe(gms)   | Net_carb/100gms   | Calculated Net Carb in recipe   |
	|:----------------|:----------|:--------------|:--------------------|:-----------------------------|:------------------|:--------------------------------|
	| Water           | 0.5       | L             | 1000.0              | 500.0                        | 0.0               

In [584]:
input_string = ''
print(len(results))
input_string = results[9].split(':FileName')
print(input_string[0])

27
>> Serving Size: 4
>> Cooking Time:  15 minutes (Prep Time - 15 minutes)
>> Category: Indian
>> Type: Vegetarian

Heat @Avocado oil{1%tbsp} in a #medium sized pan{}. Finely chop with #knife{} and add @red onion{150%gms(Large)}, @ginger{1%tsp} and @garlic{4%cloves}.
Mix with #spatula{} and sautÃ© for ~{3%minutes}.
Then finely chop @green chilli{1%tsp(1 to 2 chillies)} and add to the #saucepan{}.
Now add @cumin seeds{1%tsp} and @mustard seeds{1%tsp}.
Add fresh @baby spinach{300%gms} and cook for ~{5%minutes.}
Now add @mushrooms{400%gms} and @Pink Himalayan Salt{to taste}.
Cook for a further ~{3%minutes}.
Add @garam masala{1%tsp} and @amchoor powder{1%tsp}.
Continue cooking for another 3 to 4 minutes.
Serve hot.




# Test whole macro function

In [644]:
print(parse_recipe(results[9]))

['Amchoor Powder']


<div class="grid cards" align = "center" markdown>

-   :material-timer: *15 minutes (Prep Time - 15 minutes)*, :fontawesome-solid-chart-pie: *4*, **Type**: :leafy_green:, **Category**: *Indian* **Total Net Carbs:** **29.01**

</div>


<div class="grid cards" markdown>


-   ## Ingredients

	---
		1. Avocado Oil: 1 tbsp
		2. Red Onion: 150 gms(Large)
		3. Ginger: 1 tsp
		4. Garlic: 4 cloves
		5. Green Chilli: 1 tsp(1 to 2 chillies)
		6. Cumin Seeds: 1 tsp
		7. Mustard Seeds: 1 tsp
		8. Baby Spinach: 300 gms
		9. Mushrooms: 400 gms
		10. Pink Himalayan Salt: to taste 
		11. Garam Masala: 1 tsp
		12. Amchoor Powder: 1 tsp


-   ## Cookwares

	---
	1. *Saucepan*
	2. *Spatula*
	3. *Medium Sized Pan*
	4. *Knife*


</div>


??? Info "Calculated Net Carb Info (Total Net Carbs for entire dish: **29.01**)"
	| Ingredient          | Amount   | Recipe Unit          | Conversion Factor   | Amount used in Recipe(gms)   | Net_carb/100gms   | Calculated Net Carb in recipe   |
	|:-

# Generate TOC

In [443]:
import os

def generate_toc(directory, indent='    '):
    output = ''

    # Generate TOC for files in the current directory
    for file_name in sorted(os.listdir(directory)):
        if file_name.endswith('.md'):
            output += f'{indent}- {file_name}\n'  # Add the file to the TOC with indentation

    # Recursively generate TOC for subdirectories
    for dir_name in sorted(os.listdir(directory)):
        subdir = os.path.join(directory, dir_name)
        if os.path.isdir(subdir):
            output += f'{indent}- {dir_name}\n'  # Add the directory to the TOC with indentation
            output += generate_toc(subdir, indent + '    ')

    return output

def generate_toc_full(directory):
    output = f'nav:\n{generate_toc(directory)}'
    return output

print(generate_toc_full('./docs/Recipes'))


nav:
    - 00-prepare-to-beat.md
    - tags.md
    - Breakfast Options
        - recipe_1_flaxseed_porridge.md
        - recipe_2_wheat_gluten_bread.md
        - recipe_3_almond_bread.md
        - recipe_4_keto_dhokla.md
        - recipe_5_scrambled_tofu.md
    - Dessert Options
        - recipe_1_peanut_mugcake.md
        - recipe_2_bottlegourd_halwa.md
        - recipe_3_protein_fudge.md
        - test.md
    - Drinks
        - recipe_1_sparkling_cinnamon.md
    - Main Meal Options
        - recipe_10_palak_mushroom.md
        - recipe_11_methi_malai_paneer.md
        - recipe_1_tava_roti.md
        - recipe_2_tandoori_roti.md
        - recipe_3_naan_bread.md
        - recipe_4_keto_gatte.md
        - recipe_5_baigan_bharta.md
        - recipe_6_fried_okra.md
        - recipe_7_fried_cauliflower.md
        - recipe_8_soya_malai.md
        - recipe_9_low_carb_pizza.md
    - Snacks and Sides
        - recipe_1_cheese_crackers.md
        - recipe_2_paneer_crackers.md
        - recipe_3_

In [500]:
import os

def generate_toc_full(directory):
    def generate_toc(directory, indent='',include_dir_name_flag = False):
        output = ''
        # Generate TOC for files in the current directory
        files = [f for f in os.listdir(directory) if f.endswith('.md')]
        sorted_files = sorted(files, key=lambda x: os.path.getctime(os.path.join(directory, x)))
        for file_name in sorted_files:
            #if file_name not in ['tags.md','index.md','test.md']:
                recipe_name = ' '.join(file_name.split('.')[0].split('_')[2:]).title()
                if recipe_name != '':
                    if '\\' in directory:
                        dir_name_mod = directory.split('\\')[1].replace(' ','%20')
                    else:
                        dir_name_mod_l = directory.split('/')
                        dir_name_mod = dir_name_mod_l[len(dir_name_mod_l)-1].replace(' ','%20')
                    if include_dir_name_flag:
                        output += f'{indent}- [{recipe_name}](./{dir_name_mod}/{file_name})\n'  # Add the file to the TOC with indentation
                    else:
                        output += f'{indent}- [{recipe_name}](./{file_name})\n'  # Add the file to the TOC with indentation
             #   elif file_name == '00-prepare-to-beat.md':
             #       output += f'{indent}1. [Prepare for diabetic diet](./{file_name})\n'  # Add the file to the TOC with indentation

        # Recursively generate TOC for subdirectories
        subdirs = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
        sorted_subdirs = sorted(subdirs)
        for dir_name in sorted_subdirs:
            subdir = os.path.join(directory, dir_name)
            output += f'\n{indent}1. **{dir_name}**\n\n    ---\n\n'  # Add the directory to the TOC with indentation
            include_dir_name_flag = True
            output += generate_toc(subdir, indent + '    ',include_dir_name_flag)
            

        return output    
    output = f'{generate_toc(directory)}'
    return output

print(generate_toc_full('./docs/Recipes'))
print(generate_toc_full('./docs/Recipes/Dessert Options'))


1. **Breakfast Options**

    ---

    - [Flaxseed Porridge](./Breakfast%20Options/recipe_1_flaxseed_porridge.md)
    - [Wheat Gluten Bread](./Breakfast%20Options/recipe_2_wheat_gluten_bread.md)
    - [Almond Bread](./Breakfast%20Options/recipe_3_almond_bread.md)
    - [Keto Dhokla](./Breakfast%20Options/recipe_4_keto_dhokla.md)
    - [Scrambled Tofu](./Breakfast%20Options/recipe_5_scrambled_tofu.md)

1. **Dessert Options**

    ---

    - [Peanut Mugcake](./Dessert%20Options/recipe_1_peanut_mugcake.md)
    - [Bottlegourd Halwa](./Dessert%20Options/recipe_2_bottlegourd_halwa.md)
    - [Protein Fudge](./Dessert%20Options/recipe_3_protein_fudge.md)

1. **Drinks**

    ---

    - [Sparkling Cinnamon](./Drinks/recipe_1_sparkling_cinnamon.md)

1. **Main Meal Options**

    ---

    - [Tava Roti](./Main%20Meal%20Options/recipe_1_tava_roti.md)
    - [Tandoori Roti](./Main%20Meal%20Options/recipe_2_tandoori_roti.md)
    - [Naan Bread](./Main%20Meal%20Options/recipe_3_naan_bread.md)
    - [Ket

# Interactive mode to scrape nutrient info from Sainsbury's - not used

In [541]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Instantiate the Chrome driver
driver = webdriver.Chrome('Downloads/chromedriver_win32/chromedriver.exe')

# Navigate to the Sainsbury's website
driver.get('https://www.sainsburys.co.uk/')

# Wait for the cookie banner to appear
cookie_banner = WebDriverWait(driver, 10).until(
    EC.visibility_of_element_located((By.ID, 'onetrust-banner-sdk'))
)

# Find the "Accept All" button and click it
accept_button = driver.find_element(By.ID, 'onetrust-accept-btn-handler')
accept_button.click()
print('clicked')

# Find the search input element by ID
search_input = driver.find_element(By.CSS_SELECTOR, '#term')

# Enter the ingredient value
search_input.send_keys('tomatoes')

# Submit the search form
search_input.submit()
# Wait for the search results to load
search_result = WebDriverWait(driver, 10).until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'h1.si__title'))
)
# Once the results are loaded, you can proceed to extract the nutritional information
# Find the product link and click on it
product_link = driver.find_element(By.CSS_SELECTOR, 'h2.pt__info__description a.pt__link')
product_link.click()

# Wait for the page to load
driver.implicitly_wait(5)

# Extract the nutritional information
nutritional_info = driver.find_element(By.CLASS_NAME, 'nutritionTable')
print(nutritional_info.text)


# Clean up - close the browser
driver.quit()


  driver = webdriver.Chrome('Downloads/chromedriver_win32/chromedriver.exe')


clicked
Typical values (as sold) 100g contains    % based on RI for Average Adult
Energy 66kJ <4.2kJ -
16kcal <1kcal -
Fat <0.5g - -
Saturates <0.1g - -
Carbohydrate 2.9g - -
Sugars 2.9g - -
Fibre 1.0g - -
Protein <0.5g - -
Salt <0.01g - -


In [542]:
def get_nutritional_info(ingredients):
    # Instantiate the Chrome driver
    driver = webdriver.Chrome('Downloads/chromedriver_win32/chromedriver.exe')

    # Navigate to the Sainsbury's website
    driver.get('https://www.sainsburys.co.uk/')

    # Wait for the cookie banner to appear
    cookie_banner = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.ID, 'onetrust-banner-sdk'))
    )

    # Find the "Accept All" button and click it
    accept_button = driver.find_element(By.ID, 'onetrust-accept-btn-handler')
    accept_button.click()
    print('Clicked on Accept All')

    for ingredient in ingredients:
        # Find the search input element by ID
        search_input = driver.find_element(By.CSS_SELECTOR, '#term')

        # Enter the ingredient value
        search_input.clear()
        search_input.send_keys(ingredient)

        # Submit the search form
        search_input.submit()

        # Wait for the search results to load
        search_result = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, 'h1.si__title'))
        )

        # Once the results are loaded, you can proceed to extract the nutritional information
        # Find the product link and click on it
        product_link = driver.find_element(By.CSS_SELECTOR, 'h2.pt__info__description a.pt__link')
        product_link.click()

        # Wait for the page to load
        driver.implicitly_wait(5)

        # Extract the nutritional information
        nutritional_info = driver.find_element(By.CLASS_NAME, 'nutritionTable')
        print(f'Nutritional Information for {ingredient}:\n{nutritional_info.text}\n')

        # Go back to the homepage
        driver.get('https://www.sainsburys.co.uk/')

    # Clean up - close the browser
    driver.quit()


In [545]:
ingredients = ['tomatoes', 'apples', 'bananas', 'Paneer', 'almonds']
get_nutritional_info(ingredients)

  driver = webdriver.Chrome('Downloads/chromedriver_win32/chromedriver.exe')


Clicked on Accept All
Nutritional Information for tomatoes:
Typical values (as sold) 100g contains    % based on RI for Average Adult
Energy 66kJ <4.2kJ -
16kcal <1kcal -
Fat <0.5g - -
Saturates <0.1g - -
Carbohydrate 2.9g - -
Sugars 2.9g - -
Fibre 1.0g - -
Protein <0.5g - -
Salt <0.01g - -

Nutritional Information for apples:
Typical values     % based on RI for Average Adult
Energy 227kJ <4.2kJ -
54kcal <1kcal -
Fat <0.5g - -
Carbohydrate 11.1g - -
Sugars 11.1g - -
Fibre 1.3g - -
Protein 0.6g - -
Salt <0.01g - -

Nutritional Information for bananas:
100g contains    % based on RI for Average Adult
Energy 362kJ <4.2kJ -
85kcal <1kcal -
Fat <0.5g - -
Saturates <0.1g - -
Carbohydrate 19.3g - -
Sugars 18.1g - -
Fibre 1.4g - -
Protein 1.1g - -
Salt <0.01g - -

Nutritional Information for Paneer:
per: 100 g
Energy 730 kJ/
174 kcal
Fat 8.0 g
of which saturates 5.1 g
Carbohydrate 3.2 g
of which sugars 3.2 g
Protein 22 g
Salt 0.10 g

Nutritional Information for almonds:
per 100 ml
Energy 53 k