In [421]:
import pandas as pd
from itertools import zip_longest
import os

from transformers import pipeline

import warnings
warnings.filterwarnings("ignore")

#! pip install transformers

In [422]:
def ner_food(food_name):

    pipe = pipeline("ner", model="davanstrien/deberta-v3-base_fine_tuned_food_ner")

    result = pipe(food_name)

    result = [entity for entity in result if entity['score'] >= 0.3]

    # Iterate over the results to merge consecutive rows with the same entity
    for i in range(len(result) - 1, 0, -1):
        current_entity = result[i]["entity"]
        previous_entity = result[i - 1]["entity"].split('-')[1]

        if current_entity.split('-')[1] == previous_entity:
            # Append the word from the row below to the "word" column
            result[i - 1]["word"] += result[i]["word"]

            # Update the "end" value of the first row with the "end" value of the row below
            result[i - 1]["end"] = result[i]["end"]

            # Delete the current row
            del result[i]

    # Lists to store information
    foods = []
    quantities = []
    units = []

    # Iterate over the resulting entities
    for entity in result:
        if "FOOD" in entity["entity"]:
            current_food = {"food": entity["word"], "food_start": entity["start"], "food_end": entity["end"]}
            foods.append(current_food)
        elif "QUANTITY" in entity["entity"]:
            current_quantity = {"quantity": entity["word"], "quantity_start": entity["start"], "quantity_end": entity["end"]}
            quantities.append(current_quantity)
        elif "UNIT" in entity["entity"]:
            current_unit = {"unit": entity["word"], "unit_start": entity["start"], "unit_end": entity["end"]}
            units.append(current_unit)

    # Create separate DataFrames for each type of information
    df_food = pd.DataFrame(foods)
    df_quantity = pd.DataFrame(quantities)
    df_unit = pd.DataFrame(units)

    # Check if df_quantity and df_unit are empty, and create them if needed
    if df_food.empty:
        df_food = pd.DataFrame(columns=["quantity", "quantity_start", "quantity_end"])

    if df_quantity.empty:
        df_quantity = pd.DataFrame(columns=["quantity", "quantity_start", "quantity_end"])

    if df_unit.empty:
        df_unit = pd.DataFrame(columns=["unit", "unit_start", "unit_end"])


    # Combine the DataFrames
    df_edited = pd.concat([df_food, df_quantity, df_unit], axis=1)

    # Fill NaN values with 0
    df_edited = df_edited.fillna(0)

    for i, rows in df_edited.iterrows():
        df_edited['food'][i] = str(df_edited['food'][i]).replace("▁"," ").strip().lower().capitalize()
        df_edited['quantity'][i] = str(df_edited['quantity'][i]).replace("▁"," ").strip().lower().capitalize()
        df_edited['unit'][i] = str(df_edited['unit'][i]).replace("▁"," ").strip()


    # Replace NaN, nan, and NaN with a specific non-null value (e.g., 0) across the entire DataFrame
    df_edited.replace({pd.NaT: 0, 'Nan': 0, 'nan': 0, 'NaN': 0}, inplace=True)
    df_edited = df_edited.fillna(0)

    df_edited['food'] = df_edited['food'].astype(str)
    df_edited['food_start'] = df_edited['food_start'].astype(int)
    df_edited['food_end'] = df_edited['food_end'].astype(int)

    #df_edited['quantity'] = df_edited['quantity'].astype(float)
    df_edited['quantity_start'] = df_edited['quantity_start'].astype(int)
    df_edited['quantity_end'] = df_edited['quantity_end'].astype(int)

    df_edited['unit'] = df_edited['unit'].astype(str)
    df_edited['unit_start'] = df_edited['unit_start'].astype(int)
    df_edited['unit_end'] = df_edited['unit_end'].astype(int)


    pd.set_option("mode.chained_assignment", None)

    print(f"\n\n\033[1m{food_name}\033[0m\n")

    return df_edited

In [435]:
ner_food(food_name = '100 gr hamburguers, 1 coke 1 fanta 1 cofee')

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.




[1m100 gr hamburguers, 1 coke 1 fanta 1 cofee[0m



Unnamed: 0,food,food_start,food_end,quantity,quantity_start,quantity_end,unit,unit_start,unit_end
0,Hamburguers,6,18,100,0,3,gr,3,6
1,Coke,21,26,1,19,21,0,0,0
2,Fanta,28,34,1,26,28,0,0,0
3,Cofee,36,42,1,34,36,0,0,0
