In [94]:
import os, json, uuid
from datetime import datetime

import numpy as np
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from ultralytics import YOLO
import unicodedata
import re
import requests

In [2]:
# ---- imports ----
import os, json, uuid
from datetime import datetime

import numpy as np
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from ultralytics import YOLO
import unicodedata
import re

# ---- tiny normalizer ----
def _strip_accents(s: str) -> str:
    s = unicodedata.normalize("NFD", s)
    s = "".join(ch for ch in s if unicodedata.category(ch) != "Mn")
    return unicodedata.normalize("NFKC", s)

def _norm(s: str) -> str:
    s = _strip_accents((s or "").strip().lower())
    s = re.sub(r"\s+", " ", s)
    return s

# ---- NLP wrapper with symptom2id.json support ----
class SymptomClassifier:
    def __init__(self, model_dir, labels_path=None, device=None, max_length=256):
        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.max_length = max_length

        # load human-readable labels
        if labels_path is None:
            labels_path = os.path.join(model_dir, "symptom2id.json")

        if os.path.exists(labels_path):
            with open(labels_path, "r", encoding="utf-8") as f:
                mapping = json.load(f)
            if all(not str(k).isdigit() for k in mapping.keys()):   # symptom -> id
                self.label2id = {str(k): int(v) for k, v in mapping.items()}
                self.id2label = {v: k for k, v in self.label2id.items()}
            else:                                                    # id -> symptom
                self.id2label = {int(k): str(v) for k, v in mapping.items()}
                self.label2id = {v: k for k, v in self.id2label.items()}
        else:
            id2label = getattr(self.model.config, "id2label", None)
            if id2label:
                self.id2label = {int(k): v for k, v in id2label.items()}
            else:
                self.id2label = {i: f"LABEL_{i}" for i in range(self.model.config.num_labels)}

    @torch.inference_mode()
    def predict_one(self, text: str):
        enc = self.tokenizer([text], padding=True, truncation=True,
                             max_length=self.max_length, return_tensors="pt").to(self.device)
        self.model.eval()
        out = self.model(**enc)
        probs = torch.softmax(out.logits, dim=1).detach().cpu().numpy()[0]
        pred_id = int(np.argmax(probs))
        label = self.id2label.get(pred_id, f"LABEL_{pred_id}")
        conf = float(probs[pred_id])
        return label, conf

# ---- instantiate models (adjust paths) ----
MODEL_DIR = r"..\model runs\NLP final model"  # use raw string on Windows
LABELS_PATH = os.path.join(MODEL_DIR, "symptom2id.json")  # if exists

sym_clf = SymptomClassifier(MODEL_DIR, labels_path=LABELS_PATH)

yolo_weights = r"..\notebooks\runs\detect\train 3 - 100 epochs with synthetic\weights\best.pt"
yolo = YOLO(yolo_weights)

# ---- YOLO helper ----
# ---- YOLO helper ---- 
def detect_ingredients_list(image_path: str, conf_threshold: float = 0.1):
    res = yolo(image_path)[0]
    names = res.names  # {id: class_name}
    detected = []
    for i in range(len(res.boxes)):
        cls_id = int(res.boxes.cls[i].item())
        conf = float(res.boxes.conf[i].item())
        if conf < conf_threshold:
            continue
        raw = names.get(cls_id, str(cls_id))
        normed = _norm(raw)   # optional: normalize with your helper
        detected.append(normed)
    return sorted(set(detected))  # unique, normalized list


# ---- main function ----
def NLP_YOLO_predictor_function(text: str, image_path: str) -> pd.DataFrame:
    """
    Runs NLP + YOLO and returns a ONE-ROW DataFrame with list in 'ingredients_list'.
    """
    run_id = str(uuid.uuid4())[:8]
    ts = datetime.now().isoformat(timespec="seconds")

    # Predictions
    symptom_label, conf = sym_clf.predict_one(text)
    ingredients = detect_ingredients_list(image_path)

    # build one-row DF (wrap dict in a list!)
    df_predicted = pd.DataFrame([{
        "run_id": run_id,
        "timestamp": ts,
        "input_text": text,
        "image_path_used": image_path,
        "predicted_symptom": symptom_label,  # human-readable label
        "confidence": conf,
        "ingredients_list": ingredients      # stays as Python list in a single cell
    }])

    deficiency_df = pd.read_csv(r'..\dataset\Nutritional deficiency dataset\deficiencies_dataset.csv')
    deficiency_df = deficiency_df[['sintoma', 'deficiencia de nutrientes', 'disponible en ingredientes']].drop_duplicates().reset_index(drop=True)
    deficiency_df.rename(columns={'sintoma':'predicted_symptom'}, inplace=True)
    df_predicted = pd.merge(df_predicted, deficiency_df, on='predicted_symptom', how='left')  
    df_predicted['deficiencia de nutrientes'] = (df_predicted['deficiencia de nutrientes'].str.split(';').apply(lambda lst: [x.strip() for x in lst if x.strip()])  # strip spaces, drop empties
)  
    return df_predicted

# ---- example call ----
# df_run = NLP_YOLO_predictor_function("últimamente me siento muy cansado", r"..\path\to\fridge.jpg")
# display(df_run)


In [3]:
predicted_df = NLP_YOLO_predictor_function('me duele mucho la cabeza últimamente', r'..\dataset\YOLO - Clean dataset\Real fridge pictures\a1ad4a22-fc60-4d28-8bf6-ac42342e88ca.jpg')


image 1/1 c:\Users\oscar.xu\Desktop\TFM_RecoBite\notebooks\..\dataset\YOLO - Clean dataset\Real fridge pictures\a1ad4a22-fc60-4d28-8bf6-ac42342e88ca.jpg: 768x448 3 Huevoss, 1 Yogur, 8 Limns, 40.4ms
Speed: 3.9ms preprocess, 40.4ms inference, 14.4ms postprocess per image at shape (1, 3, 768, 448)


In [4]:
predicted_df

Unnamed: 0,run_id,timestamp,input_text,image_path_used,predicted_symptom,confidence,ingredients_list,deficiencia de nutrientes,disponible en ingredientes
0,76686894,2025-08-28T00:07:11,me duele mucho la cabeza últimamente,..\dataset\YOLO - Clean dataset\Real fridge pi...,dolor de cabeza,0.5049,"[huevos, limon, yogur]","[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli"


## Translation

In [5]:
recipe_dataset = pd.read_csv(r'..\dataset\Recipes dataset\recipes_dataset.csv')
# Data source is https://huggingface.co/datasets/mbien/recipe_nlg, not uploading because it's too large for Github

In [6]:
en_spa_ingredients = {
    "Tomate": ["tomato", "roma tomato", "cherry tomato", "tomatillo"],
    "Cebolla": ["onion", "red onion", "white onion", "yellow onion", "shallot", "green onion", "spring onion", "scallion"],
    "Patata": ["potato", "new potato", "russet", "yukon gold", "baking potato"],
    "Lechuga/Endivia": ["lettuce", "iceberg", "romaine", "butterhead", "bibb", "cos", "endive", "escarole"],
    "Zanahoria": ["carrot", "baby carrot"],
    "Calabacines": ["zucchini", "courgette", "summer squash"],
    "Pepino": ["cucumber", "english cucumber", "kirby"],
    "Champiñones": ["mushroom", "button mushroom", "cremini", "portobello", "shiitake", "oyster mushroom", "chanterelle", "porcini"],
    "Brocoli": ["broccoli", "broccolini"],
    "Coliflor": ["cauliflower"],

    "Leche": ["milk", "whole milk", "skim milk", "2% milk", "evaporated milk"],
    "Huevos": ["egg", "eggs"],
    "Yogur": ["yogurt", "greek yogurt", "yoghurt"],
    "Queso": ["cheese", "cheddar", "mozzarella", "parmesan", "feta", "gouda", "goat cheese", "blue cheese", "ricotta", "cream cheese", "swiss"],
    "Mantequilla": ["butter", "unsalted butter", "salted butter", "ghee"],

    "Merluza": ["hake"],
    "Gambas/Langostinos": ["shrimp", "prawn", "prawns", "king prawn"],
    "Mix de marisco/molusco": ["seafood mix", "mixed seafood", "clams", "mussels", "oysters", "scallops", "squid", "calamari", "octopus"],
    "Lubina": ["sea bass", "seabass", "branzino", "european seabass"],
    "Salmón": ["salmon"],

    "Plátano": ["banana", "plantain"],
    "Aguacate": ["avocado"],
    "Sandía": ["watermelon"],
    "Limón": ["lemon"],
    "Manzana": ["apple", "granny smith", "gala apple", "fuji apple"],

    "Carne pollo": ["chicken", "chicken breast", "chicken thigh", "chicken leg", "rotisserie chicken", "ground chicken"],
    "Carne cerdo": ["pork", "pork loin", "pork chop", "pork shoulder", "ground pork", "bacon"],
    "Carne vacuno": ["beef", "steak", "ground beef", "sirloin", "ribeye", "chuck", "brisket"],
    "Salchichas": ["sausage", "sausages", "hot dog", "frankfurter", "chorizo", "kielbasa", "bratwurst"],
    "Carne pavo": ["turkey", "ground turkey", "turkey breast", "turkey mince"],
}

spa_spa_ingredients = {
    "Tomate": ["tomate", "jitomate"],
    "Cebolla": ["cebolla", "cebolleta"],
    "Patata": ["patata", "papa"],
    "Lechuga/Endivia": ["lechuga", "endivia", "escarola"],
    "Zanahoria": ["zanahoria"],
    "Calabacines": ["calabacin", "calabacines", "zucchini"],
    "Pepino": ["pepino"],
    "Champiñones": ["champiñon", "champiñones", "seta", "hongos", "portobello", "shiitake"],
    "Brocoli": ["brocoli"],
    "Coliflor": ["coliflor"],
    "Leche": ["leche"],
    "Huevos": ["huevo", "huevos"],
    "Yogur": ["yogur"],
    "Queso": ["queso"],
    "Mantequilla": ["mantequilla", "ghee"],
    "Merluza": ["merluza"],
    "Gambas/Langostinos": ["gamba", "gambas", "langostino", "langostinos", "camarón", "camaron", "camarones"],
    "Mix de marisco/molusco": ["marisco", "molusco", "almeja", "mejillon", "mejillón", "ostras", "calamar", "pulpo"],
    "Lubina": ["lubina"],
    "Salmón": ["salmon", "salmón"],
    "Plátano": ["platano", "plátano", "banana", "banano"],
    "Aguacate": ["aguacate", "palta"],
    "Sandía": ["sandia", "sandía"],
    "Limón": ["limon", "limón"],
    "Manzana": ["manzana"],
    "Carne pollo": ["pollo"],
    "Carne cerdo": ["cerdo"],
    "Carne vacuno": ["vacuno", "ternera", "res"],
    "Salchichas": ["salchicha", "salchichas"],
    "Carne pavo": ["pavo"],
}



In [7]:
import ast, re, unicodedata, torch
from transformers import MarianMTModel, MarianTokenizer

# ---------------------------
# Helpers
# ---------------------------
def strip_accents(s: str) -> str:
    s = unicodedata.normalize("NFD", s)
    s = "".join(ch for ch in s if unicodedata.category(ch) != "Mn")
    return unicodedata.normalize("NFKC", s)

def norm(s: str) -> str:
    s = strip_accents((s or "").strip().lower())
    s = re.sub(r"[^a-z0-9\s\-\/\+]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

# ---------------------------
# 1) Parse NER strings -> lists (keep phrases)
# ---------------------------
recipe_dataset["NER"] = recipe_dataset["NER"].apply(
    lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)

# 2) Normalize full phrases (do NOT split into words)
recipe_dataset["NER_terms"] = recipe_dataset["NER"].apply(
    lambda phrases: [norm(p) for p in phrases if p]
)

# ---------------------------
# 3) Build reverse indices for rule mapping
# ---------------------------
keywords_eng = {norm(kw): cls for cls, kws in en_spa_ingredients.items() for kw in kws}
keywords_spa = {norm(kw): cls for cls, kws in spa_spa_ingredients.items() for kw in kws}

def rule_map_en(term: str):
    t = norm(term)
    if not t: return None
    if t in keywords_eng:
        return keywords_eng[t]
    for kw, cls in keywords_eng.items():
        if kw and kw in t:
            return cls
    return None

def rule_map_es(term: str):
    t = norm(term)
    if not t: return None
    if t in keywords_spa:
        return keywords_spa[t]
    for kw, cls in keywords_spa.items():
        if kw and kw in t:
            return cls
    return None

# ---------------------------
# 4) Collect unique phrase terms
# ---------------------------
unique_terms = sorted({t for terms in recipe_dataset["NER_terms"] for t in terms if t})
print("Unique phrase-level terms:", len(unique_terms))

# ---------------------------
# 5) Rule-map English phrases; collect unknowns to translate
# ---------------------------
eng_rule_map = {}
unknown_en = []
for term in unique_terms:
    cls = rule_map_en(term)
    if cls:
        eng_rule_map[term] = cls
    else:
        unknown_en.append(term)

print("Rule-mapped (EN):", len(eng_rule_map), " | To translate:", len(unknown_en))


Unique phrase-level terms: 196018
Rule-mapped (EN): 43584  | To translate: 152434


In [8]:
# ---------------------------
# 6) Translate unknown phrases with Marian (GPU if available) + progress
# ---------------------------
mt_name = "Helsinki-NLP/opus-mt-en-es"
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

mt_tok = MarianTokenizer.from_pretrained(mt_name)
mt_model = MarianMTModel.from_pretrained(mt_name).to(device)
mt_model.eval()

def translate_batch(texts, src_max_len=256, max_new_tokens=128):
    if not texts:
        return []
    with torch.inference_mode():
        inputs = mt_tok(
            texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=src_max_len,
        ).to(device)
        outputs = mt_model.generate(
            **inputs,
            num_beams=1,            # greedy = fastest
            do_sample=False,
            max_new_tokens=max_new_tokens,
            use_cache=True,
        )
        return mt_tok.batch_decode(outputs, skip_special_tokens=True)

BATCH = 96 if device == "cuda" else 24
en2es = {}
total = len(unknown_en)
for i in range(0, total, BATCH):
    chunk = unknown_en[i:i+BATCH]
    trans = translate_batch(chunk)
    for src, tgt in zip(chunk, trans):
        en2es[src] = tgt
    print(f"Progress: {min(i+BATCH, total)}/{total} translated")

print("Translated unknowns:", len(en2es))


Using device: cuda




Progress: 96/152434 translated
Progress: 192/152434 translated
Progress: 288/152434 translated
Progress: 384/152434 translated
Progress: 480/152434 translated
Progress: 576/152434 translated
Progress: 672/152434 translated
Progress: 768/152434 translated
Progress: 864/152434 translated
Progress: 960/152434 translated
Progress: 1056/152434 translated
Progress: 1152/152434 translated
Progress: 1248/152434 translated
Progress: 1344/152434 translated
Progress: 1440/152434 translated
Progress: 1536/152434 translated
Progress: 1632/152434 translated
Progress: 1728/152434 translated
Progress: 1824/152434 translated
Progress: 1920/152434 translated
Progress: 2016/152434 translated
Progress: 2112/152434 translated
Progress: 2208/152434 translated
Progress: 2304/152434 translated
Progress: 2400/152434 translated
Progress: 2496/152434 translated
Progress: 2592/152434 translated
Progress: 2688/152434 translated
Progress: 2784/152434 translated
Progress: 2880/152434 translated
Progress: 2976/152434

In [9]:
# ---------------------------
# 7) Merge mappings (rule-based has priority)
# ---------------------------
full_mapping = {**en2es, **eng_rule_map}  # rule-based overrides MT on conflicts

# ---------------------------
# 8) Map back to dataframe (phrase-by-phrase)
# ---------------------------
def map_list_to_spanish(terms, mapping):
    return [mapping.get(t, t) for t in (terms or [])]

recipe_dataset["NER_terms_es"] = recipe_dataset["NER_terms"].apply(
    lambda ts: map_list_to_spanish(ts, full_mapping)
)

In [10]:
recipe_dataset.to_parquet("recipes_dataset_translated.parquet", engine="pyarrow", index=False)

## Algoritmo de recomendación

In [5]:
translated_recipes = pd.read_csv(r'..\dataset\Recipes dataset\recipes_dataset_translated_ingredients.csv')

In [6]:
translated_recipes.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,NER_terms,NER_terms_es
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"['brown sugar', 'milk', 'vanilla', 'nuts', 'bu...","['brown sugar', 'milk', 'vanilla', 'nuts', 'bu...","['azúcar moreno', 'Leche', 'vainilla', 'frutos..."
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"['beef', 'chicken breasts', 'cream of mushroom...","['beef', 'chicken breasts', 'cream of mushroom...","['Carne vacuno', 'Carne pollo', 'Champiñones',..."
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"['frozen corn', 'cream cheese', 'butter', 'gar...","['frozen corn', 'cream cheese', 'butter', 'gar...","['Maíz congelado', 'Queso', 'Mantequilla', 'aj..."
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"['chicken', 'chicken gravy', 'cream of mushroo...","['chicken', 'chicken gravy', 'cream of mushroo...","['Carne pollo', 'Carne pollo', 'Champiñones', ..."
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"['peanut butter', 'graham cracker crumbs', 'bu...","['peanut butter', 'graham cracker crumbs', 'bu...","['Mantequilla', 'migajas de galleta graham', '..."


In [7]:
predicted_df

Unnamed: 0,run_id,timestamp,input_text,image_path_used,predicted_symptom,confidence,ingredients_list,deficiencia de nutrientes,disponible en ingredientes
0,76686894,2025-08-28T00:07:11,me duele mucho la cabeza últimamente,..\dataset\YOLO - Clean dataset\Real fridge pi...,dolor de cabeza,0.5049,"[huevos, limon, yogur]","[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli"


Construiremos un rankeador de recetas de la siguiente forma, se calcularán las siguientes métricas:

# Metrics for Recipe Recommendation

We define the following metrics for evaluating how suitable a recipe is, given the user’s pantry and nutritional deficiencies.

---
`Jaccard Similarity (Recipe vs Fridge)`

**Formula (mathematical):**

$$
\text{Jaccard}(R,F) = \frac{|R \cap F|}{|R \cup F|}
$$

**In words:**  
Take the number of ingredients that appear **in both** the recipe (R) and the fridge (F), and divide by the number of **unique ingredients** across both lists combined.
- Count how many ingredients appear in both the recipe and the fridge (the overlap).
- Count how many unique ingredients there are if you combine recipe + fridge (no duplicates).
- Divide overlap by that unique total.

**Range:** 0–1 (higher is better)

---

2. Missing Penalization

**Formula (mathematical):**

$$
\text{MissingPen}(R,P) = \frac{|R \setminus P|}{|R|}
$$

**In words:**  
Count how many recipe ingredients are **missing from the fridge**, then divide by the total number of recipe ingredients.
- Count ingredients that are in the recipe but not in the fridge (the “missing” ones).
- Divide by the total number of recipe ingredients.

**Range:** 0–1 (lower is better)

---

3. Deficiency Coverage

**Formula (mathematical):**

$$
\text{DefCov}(R,D) =
\begin{cases}
\dfrac{|R \cap Dhelp|}{|Dlist|}, & |D| > 0
\end{cases}
$$

**In words:**  
Take the list of ingredients that can help with the predicted deficiencies (D). Count how many of these appear in the recipe (R), then divide by the total number of deficiency-related ingredients. If the deficiency list is empty, return 0.

- Starting with the list of deficiency-helping ingredients. (Dhelp)
- Counting how many of those appear in the recipe.(R)
- Divide by the size of the deficiency list.(Dlist)
- If the deficiency list is empty, use 0.

**Range:** 0–1 (higher is better)

---

4. Effort (Instruction Length)

**Formula (mathematical):**

$$
\text{Effort}(s) = \frac{T - T_{\min}}{\max(1, \; T_{\max} - T_{\min})}
$$

Where:  
- $T = \tau(s)$ = number of tokens (e.g. words) in the recipe’s instructions text.  
- $T_{\min}, T_{\max}$ = shortest and longest instruction lengths among the compared recipes.

**In words:**  
Count the words in the recipe instructions. Normalize so that the **shortest recipe = 0** and the **longest = 1**. Longer instructions → higher effort.

- Count the words (or tokens) in the recipe’s instructions. Call this T.
- Find the smallest and largest word counts among the recipes you’re comparing (T_min, T_max).
- Normalize: (T − T_min) ÷ (T_max − T_min).

Shortest instructions get 0, longest get 1.

**Range:** 0–1 (lower is better)


In [74]:
def calculate_score_metrics(recipes_df, fridge_list, deficiency_ingredients_list):
    def safe_to_list(x):
        if isinstance(x, list):
            return x
        if isinstance(x, str):
            try:
                return ast.literal_eval(x)
            except Exception:
                return [i.strip() for i in x.strip("[]").split(",") if i.strip()]
        return []

    # Normalize fridge + deficiency lists
    fridge = set(i.strip().lower() for i in safe_to_list(fridge_list) if i)
    deficiencies_ingredients = set(i.strip().lower() for i in safe_to_list(deficiency_ingredients_list) if i)

    df = recipes_df.copy()
    text_col = "directions"

    # Normalize recipe terms
    df["recipe_ingredients"] = df["NER_terms_es"].apply(lambda x: set(i.strip().lower() for i in safe_to_list(x) if i))

    # --- Jaccard ---
    df['inter'] = df["recipe_ingredients"].apply(lambda r: len(r & fridge))
    df['union'] = df["recipe_ingredients"].apply(lambda r: len(r | fridge))
    df["jaccard_score"] = df['inter']/ df['union']

    # --- Missing penalization ---
    df['recipe_size'] = df["recipe_ingredients"].apply(len)
    df['num_needed'] = df["recipe_ingredients"].apply(lambda r: len(r - fridge))
    df["missing_penalization"] = df['num_needed'] / df['recipe_size']

    # --- Deficiency coverage ---
    hits = df["recipe_ingredients"].apply(lambda r: len(r & deficiencies_ingredients))
    df["DRC_abs"] = hits
    df["DRC_coverage"] = (hits / len(deficiencies_ingredients)) if len(deficiencies_ingredients) > 0 else 0.0

    # --- Effort via token count ---
    tokens = df[text_col].fillna("").astype(str).str.findall(r"\w+").str.len()
    t_min, t_max = (int(tokens.min() or 0), int(tokens.max() or 0)) if len(tokens) else (0, 0)
    df["Effort"] = (tokens - t_min) / max(1, t_max - t_min)
    df["tokens"] = tokens
    df.drop(columns=['NER', 'link', 'source', 'num_needed', 'DRC_abs', 'tokens', 'inter', 'union', 'recipe_size'], inplace=True)
    return df

In [75]:
predicted_df.head()

Unnamed: 0,run_id,timestamp,input_text,image_path_used,predicted_symptom,confidence,ingredients_list,deficiencia de nutrientes,disponible en ingredientes
0,76686894,2025-08-28T00:07:11,me duele mucho la cabeza últimamente,..\dataset\YOLO - Clean dataset\Real fridge pi...,dolor de cabeza,0.5049,"[huevos, limon, yogur]","[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli"


In [76]:
row = predicted_df.iloc[0]   # only row in df after the run
# fridge_list = row['ingredients_list']
fridge_list = row['ingredients_list']
nutritional_deficiency_list = row['deficiencia de nutrientes']
deficiency_ingredients_list = row['disponible en ingredientes']
symptom_list = row['predicted_symptom']

recipe_scores = calculate_score_metrics(translated_recipes,
                                  fridge_list=fridge_list,
                                  deficiency_ingredients_list=deficiency_ingredients_list)

In [81]:
recipe_scores = recipe_scores.merge(
    predicted_df[['predicted_symptom', 'deficiencia de nutrientes',
                  'disponible en ingredientes', 'ingredients_list']]
    .rename(columns={'ingredients_list': 'fridge_ingredients_available'}),
    how='cross'
)


In [82]:
recipe_scores.sort_values(by='jaccard_score', ascending=False).head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,NER_terms,NER_terms_es,recipe_ingredients,jaccard_score,missing_penalization,DRC_coverage,Effort,predicted_symptom,deficiencia de nutrientes,disponible en ingredientes,fridge_ingredients_available
78427,78427,Peach Praline Muffins,"[""8 oz. peach yogurt"", ""2 eggs"", ""1 box Pillsb...","[""Preheat oven to 350 degrees."", ""Generously b...","['peach yogurt', 'eggs', 'bread mix']","['Yogur', 'Huevos', 'mezcla de pan']","{huevos, yogur, mezcla de pan}",0.5,0.333333,0.166667,0.02855,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]"
2003192,2003192,Yogurt Sorbet,"[""3 large egg whites"", ""2/3 cup sugar"", ""3 cup...","[""In a medium bowl, using a handheld electric ...","['egg whites', 'sugar', 'nonfat yogurt']","['Huevos', 'azúcar', 'Yogur']","{huevos, azúcar, yogur}",0.5,0.333333,0.166667,0.02373,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]"
1980932,1980932,Hair Masque,"[""1 mango, ripe"", ""1 tablespoon plain yogurt"",...","[""Blend together in food processor."", ""Divide ...","['mango', 'plain yogurt', 'egg yolks']","['mango', 'Yogur', 'Huevos']","{yogur, mango, huevos}",0.5,0.333333,0.166667,0.005562,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]"
432639,432639,Fruit Sherbet,"[""12 oz. frozen fruit juice concentrate"", ""1 1...","[""In food processor or blender, blend juice, e...","['fruit juice', 'nonfat yogurt', 'regular cont...","['Jugo de frutas', 'Yogur', 'Yogur', 'Huevos']","{huevos, jugo de frutas, yogur}",0.5,0.333333,0.166667,0.012977,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]"
2146286,2146286,Frizz Control Conditioner,"[""4 tablespoons yogurt"", ""1 egg"", ""1 lemon""]","[""Beat yogurt."", ""Add egg mix well add lemon j...","['yogurt', 'egg', 'lemon']","['Yogur', 'Huevos', 'Limón']","{huevos, limón, yogur}",0.5,0.333333,0.166667,0.03337,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]"


## Scoring system

Dado que no tenemos datos de interacción o preferencia de usuarios por las recetas, de momento para el prototipo y para empezar utilizaremos pesos arbitrarios decididos por opinión personal y contexto de negocio que luego se podrá tunear y mejorar utilizando modelos de Machine Learning para obtener los mejores pesos de las métricas de las recetas en base a la preferencia del consumidor.

**Razón detrás de los pesos**

- **Jaccard** (0.5, positivo)
Encourages recipes that use more of the ingredients already in the fridge (waste reduction, availability).

- **Missing Penalization (0.35, negative)**
Discourages recipes requiring many new ingredients. Symmetric weight with Jaccard since both capture pantry fit, but from different angles.

- **Deficiency Coverage (0.20, positive)**
Adds nutritional alignment when deficiencies exist. Medium weight ensures health considerations matter but don’t dominate feasibility.

- **Effort (0.10, negative)**
Penalizes very long instructions. Smaller weight since instruction length is a noisy proxy for complexity y también es el factor menos importante de todos los dispoibles

In [66]:
def rank_recipes(df,
                 w_jac=0.5, w_drc=0.25, w_mp=0.15, w_effort=0.10,
                 top_n=10):
    """
    Add a final_score column and return top-N recipes.
    """
    df = df.copy()
    df["final_score"] = (
        w_jac   * df["jaccard_score"] +
        w_drc   * df["DRC_coverage"] -
        w_mp    * df["missing_penalization"] -
        w_effort* df["Effort"]
    )
    return df.sort_values("final_score", ascending=False).head(top_n)

rank_recipes(recipe_scores)

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,NER_terms,NER_terms_es,recipe,jaccard_score,recipe_size,missing_penalization,DRC_coverage,Effort,predicted_symptom,deficiencia de nutrientes,disponible en ingredientes,ingredients_list,final_score
196793,196793,Low-Fat Pancakes Or Waffles,"[""2 c. Low-fat Baking Mix"", ""2 egg whites"", ""1...","[""Mix all ingredients together; beat for 2 min...","['egg whites', 'yogurt', 'milk']","['Huevos', 'Yogur', 'Leche']","{leche, huevos, yogur}",0.5,3,0.333333,0.333333,0.010382,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.282295
1980932,1980932,Hair Masque,"[""1 mango, ripe"", ""1 tablespoon plain yogurt"",...","[""Blend together in food processor."", ""Divide ...","['mango', 'plain yogurt', 'egg yolks']","['mango', 'Yogur', 'Huevos']","{yogur, mango, huevos}",0.5,3,0.333333,0.166667,0.005562,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.24111
12689,12689,Low-Fat Brownies,"[""1 family size Duncan Hines brownie mix"", ""1/...","[""Combine box mix, yogurt and egg whites."", ""P...","['brownie mix', 'low-fat vanilla yogurt', 'egg...","['brownie mix', 'Yogur', 'Huevos']","{yogur, brownie mix, huevos}",0.5,3,0.333333,0.166667,0.008528,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240814
1786382,1786382,Yoghurt Hair Care,"[""1 egg, beaten"", ""3 -4 tablespoons almond ext...","[""Lightly beat an egg in a bowl."", ""Add yoghur...","['egg', 'almond', 'yoghurt']","['Huevos', 'almendra', 'Yogur']","{huevos, almendra, yogur}",0.5,3,0.333333,0.166667,0.00964,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240703
184007,184007,"Lady Penelope(Frothy, Cadillac-Pink Yogurt Dri...","[""1 part strawberry syrup"", ""1 egg white"", ""1 ...","[""Put all ingredients into a bowl and whiz up ...","['strawberry syrup', 'egg', 'yogurt']","['Almidón de fresa', 'Huevos', 'Yogur']","{huevos, yogur, almidón de fresa}",0.5,3,0.333333,0.166667,0.010382,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240628
432639,432639,Fruit Sherbet,"[""12 oz. frozen fruit juice concentrate"", ""1 1...","[""In food processor or blender, blend juice, e...","['fruit juice', 'nonfat yogurt', 'regular cont...","['Jugo de frutas', 'Yogur', 'Yogur', 'Huevos']","{huevos, jugo de frutas, yogur}",0.5,3,0.333333,0.166667,0.012977,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240369
1686993,1686993,The Secret Ingredient In This Pasta Dough Is Y...,"[""1 cup all purpose flour"", ""1 1/2 tablespoons...","[""Knead all ingredients together in a bowl unt...","['flour', 'yogurt', 'egg']","['Harina', 'Yogur', 'Huevos']","{huevos, harina, yogur}",0.5,3,0.333333,0.166667,0.020022,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239664
463099,463099,Low-Fat Brownie Cookies,"[""1 (21.5 oz.) pkg. Betty Crocker fudge browni...","[""Spray cookie sheet with Pam."", ""Mix brownie ...","['betty crocker fudge brownie mix', 'egg white...","['betty crocker fudge brownie mix', 'Huevos', ...","{huevos, betty crocker fudge brownie mix, yogur}",0.5,3,0.333333,0.166667,0.020393,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239627
2003192,2003192,Yogurt Sorbet,"[""3 large egg whites"", ""2/3 cup sugar"", ""3 cup...","[""In a medium bowl, using a handheld electric ...","['egg whites', 'sugar', 'nonfat yogurt']","['Huevos', 'azúcar', 'Yogur']","{huevos, azúcar, yogur}",0.5,3,0.333333,0.166667,0.02373,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239294
78427,78427,Peach Praline Muffins,"[""8 oz. peach yogurt"", ""2 eggs"", ""1 box Pillsb...","[""Preheat oven to 350 degrees."", ""Generously b...","['peach yogurt', 'eggs', 'bread mix']","['Yogur', 'Huevos', 'mezcla de pan']","{huevos, yogur, mezcla de pan}",0.5,3,0.333333,0.166667,0.02855,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.238812


## LLM as a judge

Primero obtenemos las top 10 recetas según el sistema de scoring que se ha construido

In [None]:
## Paso 1) Obtenemos las top 10 recetas según el sistema de puntuación que hemos construido
top10_recipes = rank_recipes(recipe_scores, top_n=10)
top10_recipes

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,NER_terms,NER_terms_es,recipe_ingredients,jaccard_score,missing_penalization,DRC_coverage,Effort,predicted_symptom,deficiencia de nutrientes,disponible en ingredientes,fridge_ingredients_available,final_score
196793,196793,Low-Fat Pancakes Or Waffles,"[""2 c. Low-fat Baking Mix"", ""2 egg whites"", ""1...","[""Mix all ingredients together; beat for 2 min...","['egg whites', 'yogurt', 'milk']","['Huevos', 'Yogur', 'Leche']","{leche, huevos, yogur}",0.5,0.333333,0.333333,0.010382,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.282295
1980932,1980932,Hair Masque,"[""1 mango, ripe"", ""1 tablespoon plain yogurt"",...","[""Blend together in food processor."", ""Divide ...","['mango', 'plain yogurt', 'egg yolks']","['mango', 'Yogur', 'Huevos']","{yogur, mango, huevos}",0.5,0.333333,0.166667,0.005562,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.24111
12689,12689,Low-Fat Brownies,"[""1 family size Duncan Hines brownie mix"", ""1/...","[""Combine box mix, yogurt and egg whites."", ""P...","['brownie mix', 'low-fat vanilla yogurt', 'egg...","['brownie mix', 'Yogur', 'Huevos']","{yogur, brownie mix, huevos}",0.5,0.333333,0.166667,0.008528,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240814
1786382,1786382,Yoghurt Hair Care,"[""1 egg, beaten"", ""3 -4 tablespoons almond ext...","[""Lightly beat an egg in a bowl."", ""Add yoghur...","['egg', 'almond', 'yoghurt']","['Huevos', 'almendra', 'Yogur']","{huevos, almendra, yogur}",0.5,0.333333,0.166667,0.00964,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240703
184007,184007,"Lady Penelope(Frothy, Cadillac-Pink Yogurt Dri...","[""1 part strawberry syrup"", ""1 egg white"", ""1 ...","[""Put all ingredients into a bowl and whiz up ...","['strawberry syrup', 'egg', 'yogurt']","['Almidón de fresa', 'Huevos', 'Yogur']","{huevos, yogur, almidón de fresa}",0.5,0.333333,0.166667,0.010382,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240628
432639,432639,Fruit Sherbet,"[""12 oz. frozen fruit juice concentrate"", ""1 1...","[""In food processor or blender, blend juice, e...","['fruit juice', 'nonfat yogurt', 'regular cont...","['Jugo de frutas', 'Yogur', 'Yogur', 'Huevos']","{huevos, jugo de frutas, yogur}",0.5,0.333333,0.166667,0.012977,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.240369
1686993,1686993,The Secret Ingredient In This Pasta Dough Is Y...,"[""1 cup all purpose flour"", ""1 1/2 tablespoons...","[""Knead all ingredients together in a bowl unt...","['flour', 'yogurt', 'egg']","['Harina', 'Yogur', 'Huevos']","{huevos, harina, yogur}",0.5,0.333333,0.166667,0.020022,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239664
463099,463099,Low-Fat Brownie Cookies,"[""1 (21.5 oz.) pkg. Betty Crocker fudge browni...","[""Spray cookie sheet with Pam."", ""Mix brownie ...","['betty crocker fudge brownie mix', 'egg white...","['betty crocker fudge brownie mix', 'Huevos', ...","{huevos, betty crocker fudge brownie mix, yogur}",0.5,0.333333,0.166667,0.020393,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239627
2003192,2003192,Yogurt Sorbet,"[""3 large egg whites"", ""2/3 cup sugar"", ""3 cup...","[""In a medium bowl, using a handheld electric ...","['egg whites', 'sugar', 'nonfat yogurt']","['Huevos', 'azúcar', 'Yogur']","{huevos, azúcar, yogur}",0.5,0.333333,0.166667,0.02373,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.239294
78427,78427,Peach Praline Muffins,"[""8 oz. peach yogurt"", ""2 eggs"", ""1 box Pillsb...","[""Preheat oven to 350 degrees."", ""Generously b...","['peach yogurt', 'eggs', 'bread mix']","['Yogur', 'Huevos', 'mezcla de pan']","{huevos, yogur, mezcla de pan}",0.5,0.333333,0.166667,0.02855,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.238812


In [85]:
# Paso 2) Funciones de preprocesamiento para procesar los texto de los top 10 filas que hemos obtenido con rank_recipes

def safe_to_list(value):
    """
    En las columnas a veces hay listas reales (["a","b"]), 
    pero también se pueden encontrar  'listas como texto' ("['a','b']") y a veces simples cadenas "a, b".
    Esta función hace que siempre se devuelva en formato list[str].
    """
    if isinstance(value, list):
        return value
    if isinstance(value, str):
        stripped = value.strip()
        if (stripped.startswith("[") and stripped.endswith("]")) or (stripped.startswith("(") and stripped.endswith(")")):
            try:
                parsed = ast.literal_eval(stripped)
                if isinstance(parsed, (list, tuple)):
                    return list(parsed)
            except Exception:
                pass
        return [item.strip() for item in value.split(",") if item.strip()]
    return []

def format_ingredients(value):
    """
    Esta función es para:
    - Imprimir ingredientes de forma limpia y consistente en el prompt al LLM.
    - Convierte lo que haya (lista real, lista como texto, cadena) a 'a, b, c'.
    """
    ingredients_list = safe_to_list(value) if value is not None else []
    return ", ".join(map(str, ingredients_list))

def format_directions(value, max_chars=900):
    """
    Las instrucciones de la receta a veces viene aparece como una lista de pasos, otras como puramente texto y diferentes formatos. 
    Esta función lo normaliza en un único string legible, y lo recorta para no exceder la ventana de contexto del modelo.
    """
    if isinstance(value, list):
        text = " ".join(map(str, value))
    elif isinstance(value, str):
        possible_list = None
        stripped = value.strip()
        if stripped.startswith("[") and stripped.endswith("]"):
            try:
                possible_list = ast.literal_eval(stripped)
            except Exception:
                possible_list = None
        text = " ".join(possible_list) if isinstance(possible_list, list) else value
    else:
        text = ""
    return text[:max_chars]


In [130]:
top10_df.head(1)

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,NER_terms,NER_terms_es,recipe_ingredients,jaccard_score,missing_penalization,DRC_coverage,Effort,predicted_symptom,deficiencia de nutrientes,disponible en ingredientes,fridge_ingredients_available,final_score
196793,196793,Low-Fat Pancakes Or Waffles,"[""2 c. Low-fat Baking Mix"", ""2 egg whites"", ""1...","[""Mix all ingredients together; beat for 2 min...","['egg whites', 'yogurt', 'milk']","['Huevos', 'Yogur', 'Leche']","{leche, huevos, yogur}",0.5,0.333333,0.333333,0.010382,dolor de cabeza,"[magnesio, riboflavina (b2), potasio]","yogur, leche, platano, patata, tomate, brocoli","[huevos, limon, yogur]",0.282295


In [131]:
# Paso 3) Construir el contexto para el LLM

def make_context_from_top10(top10df) -> str:
    """
    - Toma las 10 filas seleccionadas por la puntuación
    - Para cada receta, construye un bloque con title, recipe ingredients y directions (se llama directions, pero simplemente son las instrucciones de la receta
    - Esto construye el contexto para que el LLM pueda "pensar" y evaluar
    """
    blocks = []
    for _, row in top10_df.iterrows():
        title_text = str(row.get("title", "")).strip()
        ingredients_text = format_ingredients(row.get("ingredients"))
        directions_text = format_directions(row.get("directions"), max_chars=900)

        jaccard = float(row.get("jaccard_score", 0.0))
        drc_cov = float(row.get("DRC_coverage", 0.0))
        missing_pen = float(row.get("missing_penalization", 0.0))
        effort_norm = float(row.get("Effort", 0.0))

        block = (
            f"Title: {title_text}\n"
            f"Recipe ingredients: {ingredients_text}\n"
            f"Directions: {directions_text}\n"
            f"[signals] jaccard={jaccard:.3f} drc_coverage={drc_cov:.3f} "
            f"missing_penalization={missing_pen:.3f} effort={effort_norm:.3f}"
        )
        blocks.append(block)

    return "\n\n---\n\n".join(blocks)


In [143]:
# Paso 4) Construir el prompt para el LLM

def build_user_prompt_readable(user_text, detected_symptom, deficiency_list, available_ingredients, context_text, top_n=3):
    """
    - Define las instrucciones que damos al LLM para que juzgue y evalue en español.
    - Especifica criterios de decisión (reevaluación de disponibilidad en la nevera, si se han cubierto las deficiencias, el esfuerzo, etc).
    - Exige salida en JSON
    - Añadimos el contexto con las 10 recetas que se ha construido previamente en el apso anterior.
    """
    deficiencies_str = ", ".join(deficiency_list) if isinstance(deficiency_list, list) else str(deficiency_list)
    available_str    = ", ".join(available_ingredients) if isinstance(available_ingredients, list) else str(available_ingredients)

    return f"""
Eres un asistente culinario y nutricional que se encargará de escoger las 3 mejores recetas para un usuario en base a los inputs del usuario y el contexto que te voy a dar.

El input del usuario son los siguientes:
Entrada del usuario: {user_text}
Síntoma detectado: {detected_symptom}
Deficiencias a cubrir: {deficiencies_str}
Ingredientes disponibles: {available_str}

Tienes a continuación 10 recetas candidatas (contexto).
Tarea: elige las {top_n} MEJORES recetas usando SOLO el contexto.
Criterios:
- Tienes que verificar si la receta de verdad es para una comida. Es decir, descarta recetas de cremas, mascarillas y otras cosas que usan ingredientes de comida pero realmente no son para la consumición
- Tienes que verificar que la receta utiliza ingredientes que el usuario tiene en la nevera, dentro de lo posible, evitar casos donde el usuario solo tiene un ingrediente
- Tienes que verificar si la receta cubriría con la posible deficiencie nutricional
- Si hay algún empate o dudas, verifica que la receta es de esfuerzo razonable (menos pasos)

El texto que debes devolver tiene que seguir el siguiente formato, para cada receta elegida, tienes que dirigirte al usuario y explicarle:
- Primero, como texto general decirle el posible síntoma que has detectado y que deficiencias nutriocionales puede padecer de {deficiency_list}
- Segundo, decirle que en base a lo anterior y los ingredientes detectados en la nevera ({available_str}) le recomiendas las siguientes 3 recetas

Para cada receta:
- El nombre de la receta
- Los ingredientes necesarios de la receta
- Explica primero posible síntoma que has detectado (asegurar de mencionar que siempre es mejor visitar el médico) y las deficiencias nutricionales que pueden causarlo. 
- explica en 1–2 frases por qué ayuda al síntoma (cita qué alimentos o ingredientes cubren las deficiencias)
- sugiere algunas sustituciones lógicas o sugiere qué ingredientes el usuario podría saltarse de la receta. (No hace falta que sustituyas ingredientes comunes que todo el mundo tendría como agua, sal, aceite, etc.)
- Las instrucciones de la receta


Contexto:
{context_text}
""".strip()


In [144]:
# Paso 5) Llamamos el chat de OLLAMA

def call_ollama_chat(model_name, system_prompt_text, user_prompt_text, temperature=0.2, num_ctx=8192):
    """
    Se envian dos mensajes al servidor local de Ollama:
      1) system: establece el comportamiento general ("usa solo el contexto", "JSON estricto").
      2) user: el proceso de evaluación que debe tomar el LLM con criterios y el contexto de recetas.
    """
    url = "http://localhost:11434/api/chat"
    payload = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": system_prompt_text},
            {"role": "user",   "content": user_prompt_text}
        ],
        "options": {"temperature": temperature, "num_ctx": num_ctx},
        "stream": False
    }
    response = requests.post(url, json=payload, timeout=120)
    response.raise_for_status()
    return response.json()["message"]["content"]


In [145]:
def run_rag_over_top10_human_output(
    top10_df: pd.DataFrame,
    user_text: str,
    model_name: str = "llama3.1:8b",
    top_n_final: int = 3
) -> str:
    """
    Flujo completo:
    1) Lee metadatos del run (síntoma, deficiencias, disponibles) de la primera fila.
    2) Construye CONTEXTO con las 10 recetas (incluye DIRECTIONS).
    3) Construye prompt para salida legible (viñetas).
    4) Llama a Ollama y devuelve TEXTO listo para el usuario final.
    """
    first_row = top10_df.iloc[0]
    detected_symptom    = str(first_row.get("predicted_symptom", "")).strip()
    deficiency_list     = safe_to_list(first_row.get("deficiencia de nutrientes", []))
    available_from_col  = safe_to_list(first_row.get("disponible en ingredientes", []))
    available_from_fridge  = safe_to_list(first_row.get("ingredients_list", []))
    available_ingredients = sorted({ing.lower() for ing in available_from_fridge})

    context_text = make_context_from_top10(top10_df)

    system_prompt_text = (
        "Eres estricto: solo usas el CONTEXTO proporcionado; "
        "si algo no aparece en el contexto, di que no está disponible. "
        "Responde de forma clara para un usuario final."
    )
    user_prompt_text = build_user_prompt_readable(
        user_text=user_text,
        detected_symptom=detected_symptom,
        deficiency_list=deficiency_list,
        available_ingredients=available_ingredients,
        context_text=context_text,
        top_n=top_n_final
    )

    return call_ollama_chat(
        model_name=model_name,
        system_prompt_text=system_prompt_text,
        user_prompt_text=user_prompt_text,
        temperature=0.2  # baja temperatura = más consistente
    )


In [146]:
rag_result = run_rag_over_top10_human_output(
    top10_df=top10_df,
    user_text="me duele la cabeza últimamente",
    model_name="llama3.1:8b",
    top_n_final=3
)

In [147]:
print(rag_result)

**Posible síntoma detectado:** dolor de cabeza
**Deficiencias nutricionales que pueden causarlo:** magnesio, riboflavina (b2), potasio

En base a lo anterior y los ingredientes detectados en la nevera (yogur, huevos, leche descremada) te recomiendo las siguientes 3 recetas:

**Receta 1: Low-Fat Pancakes Or Waffles**

* Ingredientes necesarios: 2 c. de mezcla para panqueques o waffles baja en grasa, 2 claras de huevo, 1/2 taza de yogur no grasoso
* Explicación: El dolor de cabeza puede estar relacionado con la deficiencia de magnesio y potasio. El yogur es una buena fuente de ambos nutrientes.
* Sustituciones lógicas: Puedes usar leche descremada en lugar de agua para agregar más potasio a la receta.
* Instrucciones: Mezcla todos los ingredientes juntos, bate durante 2 minutos. Cocina en una sartén no adherente con un spray antiadherente.

**Receta 2: Fruit Sherbet**

* Ingredientes necesarios: 12 oz de concentrado de jugo de frutas congeladas, 1 1/2 tazas de yogur no grasoso, 1 contene