In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sentence_transformers import SentenceTransformer, util
import spacy

output_dir = "./t5_recipe_generator_pretrained_model"

In [6]:
# BERT Contextual Validation
bert_model = SentenceTransformer('all-MiniLM-L6-v2')

def validate_action_ingredient_contextual(action, ingredient, threshold=0.5):
    action_embedding = bert_model.encode(action)
    ingredient_embedding = bert_model.encode(ingredient)
    similarity = util.cos_sim(action_embedding, ingredient_embedding).item()
    return similarity > threshold

print(validate_action_ingredient_contextual("melt", "chicken"))
print(validate_action_ingredient_contextual("print", "chicken"))
print(validate_action_ingredient_contextual("stir", "chicken"))
print(validate_action_ingredient_contextual("fry", "in ice"))
print(validate_action_ingredient_contextual("fry", "chicken"))
print(validate_action_ingredient_contextual("print", "paper"))
print(validate_action_ingredient_contextual("melt", "ice"))



False
False
False
False
True
True
True


In [7]:
# Spacy Validation
nlp = spacy.load("en_core_web_md")

def validate_action_ingredient_spacy(action, ingredient, threshold=0.5):
    similarity = nlp(action).similarity(nlp(ingredient))
    return similarity > threshold

print(validate_action_ingredient_spacy("melt", "chicken"))
print(validate_action_ingredient_spacy("print", "chicken"))
print(validate_action_ingredient_spacy("stir", "chicken"))
print(validate_action_ingredient_spacy("fry", "in ice"))
print(validate_action_ingredient_spacy("fry", "chicken"))
print(validate_action_ingredient_spacy("print", "paper"))
print(validate_action_ingredient_spacy("melt", "ice"))


False
False
True
False
True
True
False


In [63]:
def validate_word_groupings(recipe, ingredients, n=2, threshold=0.5):
    """
    Validate n-word groupings in the recipe using both BERT and spaCy.
    Returns the number of invalid groupings found in the recipe.
    """
    words = recipe.split()
    invalid_groupings = []

    for i in range(len(words) - n + 1):
        word_group = " ".join(words[i : i + n])
        valid = any(
            validate_action_ingredient_contextual(word_group, ingredient, threshold)
            or validate_action_ingredient_spacy(word_group, ingredient, threshold)
            for ingredient in ingredients
        )
        if not valid:
            invalid_groupings.append(word_group)

    return len(invalid_groupings), invalid_groupings


def find_best_recipe(ingredients_list, model, tokenizer, num_recipes=10, n=2, threshold=0.5):
    """
    Generate a set of recipes and return the one with the least invalid groupings.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    formatted_ingredients = ", ".join(ingredients_list)
    input_text = (
        f"Generate a recipe using these ingredients: {formatted_ingredients}.\n"
        f"Include preparation steps and cooking instructions in a clear, step-by-step format."
    )

    best_recipe = None
    least_invalid_count = float('inf')
    best_invalid_groupings = []

    for _ in range(num_recipes):
        # Generate a recipe
        inputs = tokenizer(
            input_text, return_tensors="pt", padding=True, truncation=True, max_length=512
        ).to(device)

        outputs = model.generate(
            inputs["input_ids"],
            max_length=512,
            do_sample=True,
            top_p=0.9,
            temperature=0.8,
            no_repeat_ngram_size=3,
        )

        raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        recipe = clean_output(raw_output)

        invalid_count, invalid_groupings = validate_word_groupings(recipe, ingredients_list, n, threshold)

        if invalid_count < least_invalid_count:
            best_recipe = recipe
            least_invalid_count = invalid_count
            best_invalid_groupings = invalid_groupings

    print(f"Best Recipe Invalid Count: {least_invalid_count}")
    print(f"Best Recipe Invalid Groupings: {', '.join(best_invalid_groupings)}")
    return best_recipe

def clean_output(output_text):
    output_text = output_text.replace("<RECIPE>", "").replace("<INGR>", "").strip()
    output_text = output_text.capitalize()

    if not output_text.endswith("."):
        output_text += ".\n"
    
    output_text = " ".join(output_text.split())
    return output_text


In [65]:
ingredients_list_1 = ["chicken", "garlic", "onion", "salt", "wine"]
ingredients_list_2 = ["flour", "sugar", "butter", "eggs", "chocolate"]


loaded_model = T5ForConditionalGeneration.from_pretrained(output_dir)
loaded_tokenizer = T5Tokenizer.from_pretrained(output_dir)

best_recipe = find_best_recipe(
    ingredients_list_1, loaded_model, loaded_tokenizer, num_recipes=10, n=2, threshold=0.5
)

print("Best Recipe:")
print(best_recipe)

  similarity = nlp(action).similarity(nlp(ingredient))


Best Recipe Invalid Count: 10
Best Recipe Invalid Groupings: In a, a small, until tender, about 10, 10 minutes, minutes then, a little, little 1, 1 cup, until smooth.
Best Recipe:
In a small bowl combine chicken garlic onion onion salt and wine and cook until tender about 10 minutes then add chicken and garlic to bowl combine toss a little 1 cup chicken broth until smooth.
