# Parte 1: Entender el sitio web objetivo

- Analizar la estructura de la pagina web a ser analizada

- Indentificar los elementos HTML que contienen los datos buscados

Integrantes:

- Ramirez Mishel

- Zaldubide Danna

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install beautifulsoup4



In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0.post1


In [None]:
# Librerias
import requests
from bs4 import BeautifulSoup
import time
import re
import os
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import regexp_tokenize

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

## Obtener enlaces válidos

In [None]:
base_url = 'https://www.allrecipes.com/recipes/17562/dinner/'
visited = set()
recipe_urls = set()

def is_real_recipe_url(href):
    # Solo URLs como: https://www.allrecipes.com/[nombre]-recipe-[id]
    return re.match(r'^https://www\.allrecipes\.com/.+-recipe-\d+/?$', href)

def crawl_recipes(url, max_recipes):
    if url in visited or len(recipe_urls) >= max_recipes:
        return
    visited.add(url)

    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')

        for link in soup.find_all('a', href=True):
            if len(recipe_urls) >= max_recipes:
                  break

            href = link['href'].strip()
            # Ignorar javascript, anclas, o enlaces vacíos
            if not href.startswith('http') or 'allrecipes.com' not in href:
                continue

            # Guardar solo si es receta valida
            if is_real_recipe_url(href):
                if href not in recipe_urls:
                    recipe_urls.add(href)

            # Continuar crawling solo por subcategorías útiles
            elif '/recipes/' in href and href not in visited:
                crawl_recipes(href, max_recipes)

        time.sleep(1)

    except Exception as e:
        print(f"Error en {url}: {e}")

In [None]:
output_file = 'recetas_urls.txt'
html_folder = 'recetas_html'

def guardar_urls_en_txt():
    with open(output_file, 'w', encoding='utf-8') as f:
        for url in sorted(recipe_urls):
            f.write(url + '\n')
    print(f"\nURLs guardadas en: {output_file}")

## Descargar HTML válidos

In [None]:
# Descargar HTML de recetas en carpeta
def descargar_html_de_recetas(max_recetas):
    if not os.path.exists(output_file):
        print("No se encontró el archivo de URLs. Ejecuta primero el crawling.")
        return

    if not os.path.exists(html_folder):
        os.makedirs(html_folder)

    with open(output_file, 'r', encoding='utf-8') as f:
        urls = [line.strip() for line in f if line.strip()]

    urls = urls[:max_recetas]

    print(f"\nDescargando {len(urls)} recetas...")
    for i, url in enumerate(urls, 1):
        try:
            response = requests.get(url, timeout=10)
            filename = f"receta_{i}.html"
            path = os.path.join(html_folder, filename)
            with open(path, 'w', encoding='utf-8') as html_file:
                html_file.write(response.text)
            print(f"Guardado: {filename}")
            time.sleep(1)
        except Exception as e:
            print(f"Error al descargar {url}: {e}")

In [None]:
# Mostrar resultados finales
MAX_RECETAS = 100
crawl_recipes(base_url, MAX_RECETAS)
print(f"Se encontraron {len(recipe_urls)} recetas válidas:")
for url in sorted(recipe_urls):
    print(url)

guardar_urls_en_txt()
descargar_html_de_recetas(MAX_RECETAS)

Se encontraron 91 recetas válidas:
https://www.allrecipes.com/15-minute-creamy-garlic-basil-pasta-recipe-11759962
https://www.allrecipes.com/asian-inspired-pork-and-mushroom-sloppy-joes-recipe-7369808
https://www.allrecipes.com/avocado-caprese-salad-recipe-8737146
https://www.allrecipes.com/ayam-bakar-indonesian-grilled-chicken-recipe-11766874
https://www.allrecipes.com/baked-breakfast-taquitos-recipe-7368609
https://www.allrecipes.com/bang-bang-blended-chicken-burger-recipe-7370938
https://www.allrecipes.com/big-batch-limoncello-basil-spritz-recipe-8598096
https://www.allrecipes.com/bihari-kabab-recipe-7509081
https://www.allrecipes.com/bobotie-south-african-beef-casserole-recipe-8738400
https://www.allrecipes.com/caesar-butter-recipe-8641116
https://www.allrecipes.com/cannellini-bean-salad-with-pesto-recipe-8662294
https://www.allrecipes.com/caper-pesto-recipe-8741129
https://www.allrecipes.com/caprese-stuffed-portobello-mushrooms-recipe-8622413
https://www.allrecipes.com/caramel-app

## Extraer información

In [None]:
def extract_information(html):
    soup = BeautifulSoup(html, "html.parser")

    def get_meta_content(prop):
        tag = soup.find("meta", {"property": prop})
        return tag["content"].strip() if tag and tag.has_attr("content") else None

    # Título y Descripción
    title = get_meta_content("og:title")
    description = get_meta_content("og:description")

    # Valoración
    valoration_section = soup.find("div", attrs={
        "data-tracking-category": "User Recipe Action",
        "class": "comp mm-recipes-review-bar__rating mntl-text-block text-label-300"
    })
    valoration = valoration_section.text.strip() if valoration_section else None

    # Tiempo y Porciones
    time, servings = None, None
    detail_items = soup.select(".mm-recipes-details__item")

    for item in detail_items:
        label_tag = item.select_one(".mm-recipes-details__label")
        value_tag = item.select_one(".mm-recipes-details__value")

        if not label_tag or not value_tag:
            continue

        label = label_tag.text.strip().rstrip(':')
        value = value_tag.text.strip()

        if label == "Total Time":
            time = value
        elif label == "Servings":
            servings = value

    # Ingredientes
    ingredients = [
        li.text.strip()
        for li in soup.find_all("li", class_="mm-recipes-structured-ingredients__list-item")
    ]

    # Preparación / Instrucciones
    instructions = [
        li.get_text(strip=True)
        for li in soup.select("li.mntl-sc-block-group--LI")
    ]

    # Nutrición
    nutrition = None
    nutrition_table = soup.find("table", class_="mm-recipes-nutrition-facts-summary__table")

    if nutrition_table:
        nutrition_data = []
        for row in nutrition_table.find_all("tr"):
            cells = row.find_all("td")
            if len(cells) == 2:
                value = cells[0].get_text(strip=True)
                label = cells[1].get_text(strip=True)
                nutrition_data.append(f"{label}: {value}")
        nutrition = "; ".join(nutrition_data) if nutrition_data else None

    # Imagen
    image_url = get_meta_content("og:image")

    return {
        "title": title,
        "description": description,
        "valoration": valoration,
        "time": time,
        "servings": servings,
        "ingredients": ingredients,
        "instructions": instructions,
        "nutrition": nutrition,
        "image_url": image_url
    }


## Construir corpus en dataframe

In [None]:
# Ruta a tu carpeta
carpeta = "recetas_html"
corpus = []

# Iterar por todos los archivos HTML
for archivo in os.listdir(carpeta):
    if archivo.endswith(".html"):
        ruta = os.path.join(carpeta, archivo)
        with open(ruta, "r", encoding="utf-8") as f:
            html = f.read()
            receta_info = extract_information(html)
            receta_info["filename"] = archivo
            corpus.append(receta_info)

# Crear el DataFrame
df = pd.DataFrame(corpus)

# Reordenar columnas
columnas = ['filename'] + [col for col in df.columns if col != 'filename']
df = df[columnas]
df

Unnamed: 0,filename,title,description,valoration,time,servings,ingredients,instructions,nutrition,image_url
0,receta_31.html,Garlicky Grilled Pesto Chicken,This garlicky grilled pesto chicken uses fresh...,4.3,1 hr,4,"[2 cups firmly packed fresh basil leaves, plus...",[Gather all ingredients.Kyle Carpenter / Food ...,Calories: 766; Fat: 50g; Carbs: 3g; Protein: 73g,https://www.allrecipes.com/thmb/CPqZq1O4a6CEft...
1,receta_48.html,Mint Oreo Icebox Cake,This mint Oreo icebox cake uses few ingredient...,5.0,8 hrs 15 mins,10,"[2 ½ cups heavy cream, 1 cup confectioners sug...",[Beat heavy cream in the bowl of a stand mixer...,Calories: 644; Fat: 40g; Carbs: 68g; Protein: 6g,https://www.allrecipes.com/thmb/7wA60L-q8hLAnO...
2,receta_71.html,Spring Roll Bowl,This delicious spring roll bowl is full of cri...,4.8,25 mins,4,"[4 ounces vermicelli noodles, 1 pound large sh...",[Gather all ingredients.Dotdash Meredith Food ...,Calories: 574; Fat: 32g; Carbs: 38g; Protein: 38g,https://www.allrecipes.com/thmb/lZMOmRAl54GD6L...
3,receta_56.html,Pesto Chicken Caprese,This pesto chicken caprese comes out wonderful...,,30 mins,4,[2 (10- to 12-ounce) skinless boneless chicken...,[Gather all ingredients. Preheat the oven to 4...,Calories: 506; Fat: 28g; Carbs: 4g; Protein: 56g,https://www.allrecipes.com/thmb/cigJur0bm353m8...
4,receta_84.html,Thai Peanut Butter Ramen,This Thai peanut butter ramen is one of many w...,4.3,15 mins,3,"[2 (3 ounce) packages instant ramen noodles, s...",[Bring a large pot of water to a boil over hig...,Calories: 503; Fat: 27g; Carbs: 57g; Protein: 15g,https://www.allrecipes.com/thmb/-K5EVttM9Vrqzp...
...,...,...,...,...,...,...,...,...,...,...
86,receta_22.html,Chive Butter,"An herb butter, like this chive butter, is gre...",,2 hrs 5 mins,8,"[1/2 cup unsalted butter, softened, 2 tablespo...",[Place butter in a small bowl at room temperat...,Calories: 102; Fat: 12g; Carbs: 0g; Protein: 0g,https://www.allrecipes.com/thmb/BW-MV1DhV4X0U-...
87,receta_34.html,Greek Steak Marinade,"This Greek steak marinade, with 3 ingredients ...",,5 mins,4,"[1/4 cup extra-virgin olive oil, 2 tablespoons...","[Whisk extra virgin olive oil, oregano, Greek ...",Calories: 126; Fat: 14g; Carbs: 2g; Protein: 0g,https://www.allrecipes.com/thmb/McM-C_XEPykhSx...
88,receta_64.html,Shallot and Chive Boursin Dip,This simple shallot and chive Boursin dip come...,,5 mins,6,[1 (5.2 ounce) package shallot & chive Gournay...,"[Combine Boursin, mayonnaise, yogurt, pickle b...",Calories: 169; Fat: 15g; Carbs: 5g; Protein: 3g,https://www.allrecipes.com/thmb/PTcFHX6esqXQJ-...
89,receta_76.html,Strawberry Lime-Sage Shrub,"This strawberry lime-sage shrub is a bright, z...",,5 hrs,16,"[4 limes, divided, 1 cup white sugar, 1 cup wa...",[Zest and juice 2 limes to yield 1 teaspoon ze...,Calories: 63; Fat: 0g; Carbs: 16g; Protein: 0g,https://www.allrecipes.com/thmb/gnvnfksk6kQhzk...


In [None]:
df['raw'] = df['title'] + '. ' + df['description'] + ' Ingredients: ' + df['ingredients'].apply(lambda x: ', '.join(x) if isinstance(x, list) else str(x)) + df['time'] + df['nutrition'] + df['instructions'].apply(lambda x: '\n'.join(x) if isinstance(x, list) else str(x))
df

Unnamed: 0,filename,title,description,valoration,time,servings,ingredients,instructions,nutrition,image_url,raw
0,receta_31.html,Garlicky Grilled Pesto Chicken,This garlicky grilled pesto chicken uses fresh...,4.3,1 hr,4,"[2 cups firmly packed fresh basil leaves, plus...",[Gather all ingredients.Kyle Carpenter / Food ...,Calories: 766; Fat: 50g; Carbs: 3g; Protein: 73g,https://www.allrecipes.com/thmb/CPqZq1O4a6CEft...,Garlicky Grilled Pesto Chicken. This garlicky ...
1,receta_48.html,Mint Oreo Icebox Cake,This mint Oreo icebox cake uses few ingredient...,5.0,8 hrs 15 mins,10,"[2 ½ cups heavy cream, 1 cup confectioners sug...",[Beat heavy cream in the bowl of a stand mixer...,Calories: 644; Fat: 40g; Carbs: 68g; Protein: 6g,https://www.allrecipes.com/thmb/7wA60L-q8hLAnO...,Mint Oreo Icebox Cake. This mint Oreo icebox c...
2,receta_71.html,Spring Roll Bowl,This delicious spring roll bowl is full of cri...,4.8,25 mins,4,"[4 ounces vermicelli noodles, 1 pound large sh...",[Gather all ingredients.Dotdash Meredith Food ...,Calories: 574; Fat: 32g; Carbs: 38g; Protein: 38g,https://www.allrecipes.com/thmb/lZMOmRAl54GD6L...,Spring Roll Bowl. This delicious spring roll b...
3,receta_56.html,Pesto Chicken Caprese,This pesto chicken caprese comes out wonderful...,,30 mins,4,[2 (10- to 12-ounce) skinless boneless chicken...,[Gather all ingredients. Preheat the oven to 4...,Calories: 506; Fat: 28g; Carbs: 4g; Protein: 56g,https://www.allrecipes.com/thmb/cigJur0bm353m8...,Pesto Chicken Caprese. This pesto chicken capr...
4,receta_84.html,Thai Peanut Butter Ramen,This Thai peanut butter ramen is one of many w...,4.3,15 mins,3,"[2 (3 ounce) packages instant ramen noodles, s...",[Bring a large pot of water to a boil over hig...,Calories: 503; Fat: 27g; Carbs: 57g; Protein: 15g,https://www.allrecipes.com/thmb/-K5EVttM9Vrqzp...,Thai Peanut Butter Ramen. This Thai peanut but...
...,...,...,...,...,...,...,...,...,...,...,...
86,receta_22.html,Chive Butter,"An herb butter, like this chive butter, is gre...",,2 hrs 5 mins,8,"[1/2 cup unsalted butter, softened, 2 tablespo...",[Place butter in a small bowl at room temperat...,Calories: 102; Fat: 12g; Carbs: 0g; Protein: 0g,https://www.allrecipes.com/thmb/BW-MV1DhV4X0U-...,"Chive Butter. An herb butter, like this chive ..."
87,receta_34.html,Greek Steak Marinade,"This Greek steak marinade, with 3 ingredients ...",,5 mins,4,"[1/4 cup extra-virgin olive oil, 2 tablespoons...","[Whisk extra virgin olive oil, oregano, Greek ...",Calories: 126; Fat: 14g; Carbs: 2g; Protein: 0g,https://www.allrecipes.com/thmb/McM-C_XEPykhSx...,Greek Steak Marinade. This Greek steak marinad...
88,receta_64.html,Shallot and Chive Boursin Dip,This simple shallot and chive Boursin dip come...,,5 mins,6,[1 (5.2 ounce) package shallot & chive Gournay...,"[Combine Boursin, mayonnaise, yogurt, pickle b...",Calories: 169; Fat: 15g; Carbs: 5g; Protein: 3g,https://www.allrecipes.com/thmb/PTcFHX6esqXQJ-...,Shallot and Chive Boursin Dip. This simple sha...
89,receta_76.html,Strawberry Lime-Sage Shrub,"This strawberry lime-sage shrub is a bright, z...",,5 hrs,16,"[4 limes, divided, 1 cup white sugar, 1 cup wa...",[Zest and juice 2 limes to yield 1 teaspoon ze...,Calories: 63; Fat: 0g; Carbs: 16g; Protein: 0g,https://www.allrecipes.com/thmb/gnvnfksk6kQhzk...,Strawberry Lime-Sage Shrub. This strawberry li...


## Preprocesamiento

In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_doc(doc):
    if not isinstance(doc, str):
        return ""
    tokens = regexp_tokenize(doc.lower(), r'\w+')
    tokens = [t for t in tokens if t not in stop_words]
    tokens = [lemmatizer.lemmatize(t) for t in tokens]
    return ' '.join(tokens)


In [None]:
df["preprocessed"] = df["raw"].apply(preprocess_doc)
df

Unnamed: 0,filename,title,description,valoration,time,servings,ingredients,instructions,nutrition,image_url,raw,preprocessed
0,receta_31.html,Garlicky Grilled Pesto Chicken,This garlicky grilled pesto chicken uses fresh...,4.3,1 hr,4,"[2 cups firmly packed fresh basil leaves, plus...",[Gather all ingredients.Kyle Carpenter / Food ...,Calories: 766; Fat: 50g; Carbs: 3g; Protein: 73g,https://www.allrecipes.com/thmb/CPqZq1O4a6CEft...,Garlicky Grilled Pesto Chicken. This garlicky ...,garlicky grilled pesto chicken garlicky grille...
1,receta_48.html,Mint Oreo Icebox Cake,This mint Oreo icebox cake uses few ingredient...,5.0,8 hrs 15 mins,10,"[2 ½ cups heavy cream, 1 cup confectioners sug...",[Beat heavy cream in the bowl of a stand mixer...,Calories: 644; Fat: 40g; Carbs: 68g; Protein: 6g,https://www.allrecipes.com/thmb/7wA60L-q8hLAnO...,Mint Oreo Icebox Cake. This mint Oreo icebox c...,mint oreo icebox cake mint oreo icebox cake us...
2,receta_71.html,Spring Roll Bowl,This delicious spring roll bowl is full of cri...,4.8,25 mins,4,"[4 ounces vermicelli noodles, 1 pound large sh...",[Gather all ingredients.Dotdash Meredith Food ...,Calories: 574; Fat: 32g; Carbs: 38g; Protein: 38g,https://www.allrecipes.com/thmb/lZMOmRAl54GD6L...,Spring Roll Bowl. This delicious spring roll b...,spring roll bowl delicious spring roll bowl fu...
3,receta_56.html,Pesto Chicken Caprese,This pesto chicken caprese comes out wonderful...,,30 mins,4,[2 (10- to 12-ounce) skinless boneless chicken...,[Gather all ingredients. Preheat the oven to 4...,Calories: 506; Fat: 28g; Carbs: 4g; Protein: 56g,https://www.allrecipes.com/thmb/cigJur0bm353m8...,Pesto Chicken Caprese. This pesto chicken capr...,pesto chicken caprese pesto chicken caprese co...
4,receta_84.html,Thai Peanut Butter Ramen,This Thai peanut butter ramen is one of many w...,4.3,15 mins,3,"[2 (3 ounce) packages instant ramen noodles, s...",[Bring a large pot of water to a boil over hig...,Calories: 503; Fat: 27g; Carbs: 57g; Protein: 15g,https://www.allrecipes.com/thmb/-K5EVttM9Vrqzp...,Thai Peanut Butter Ramen. This Thai peanut but...,thai peanut butter ramen thai peanut butter ra...
...,...,...,...,...,...,...,...,...,...,...,...,...
86,receta_22.html,Chive Butter,"An herb butter, like this chive butter, is gre...",,2 hrs 5 mins,8,"[1/2 cup unsalted butter, softened, 2 tablespo...",[Place butter in a small bowl at room temperat...,Calories: 102; Fat: 12g; Carbs: 0g; Protein: 0g,https://www.allrecipes.com/thmb/BW-MV1DhV4X0U-...,"Chive Butter. An herb butter, like this chive ...",chive butter herb butter like chive butter gre...
87,receta_34.html,Greek Steak Marinade,"This Greek steak marinade, with 3 ingredients ...",,5 mins,4,"[1/4 cup extra-virgin olive oil, 2 tablespoons...","[Whisk extra virgin olive oil, oregano, Greek ...",Calories: 126; Fat: 14g; Carbs: 2g; Protein: 0g,https://www.allrecipes.com/thmb/McM-C_XEPykhSx...,Greek Steak Marinade. This Greek steak marinad...,greek steak marinade greek steak marinade 3 in...
88,receta_64.html,Shallot and Chive Boursin Dip,This simple shallot and chive Boursin dip come...,,5 mins,6,[1 (5.2 ounce) package shallot & chive Gournay...,"[Combine Boursin, mayonnaise, yogurt, pickle b...",Calories: 169; Fat: 15g; Carbs: 5g; Protein: 3g,https://www.allrecipes.com/thmb/PTcFHX6esqXQJ-...,Shallot and Chive Boursin Dip. This simple sha...,shallot chive boursin dip simple shallot chive...
89,receta_76.html,Strawberry Lime-Sage Shrub,"This strawberry lime-sage shrub is a bright, z...",,5 hrs,16,"[4 limes, divided, 1 cup white sugar, 1 cup wa...",[Zest and juice 2 limes to yield 1 teaspoon ze...,Calories: 63; Fat: 0g; Carbs: 16g; Protein: 0g,https://www.allrecipes.com/thmb/gnvnfksk6kQhzk...,Strawberry Lime-Sage Shrub. This strawberry li...,strawberry lime sage shrub strawberry lime sag...


## Transformación a Embeddings

In [None]:
# --- Cargar el modelo  ---
model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# --- Función para agregar embeddings a un DataFrame ---
def add_embeddings(df, column='preprocessed', model=model):
    print("Generando embeddings...")
    embeddings = model.encode(df[column].tolist(), show_progress_bar=True)
    df["embeddings"] = embeddings.tolist()
    print("Embeddings generados y agregados al DataFrame.")
    return df


In [None]:
df_sections = add_embeddings(df, column='preprocessed')
df_sections

Generando embeddings...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Embeddings generados y agregados al DataFrame.


Unnamed: 0,filename,title,description,valoration,time,servings,ingredients,instructions,nutrition,image_url,raw,preprocessed,embeddings
0,receta_31.html,Garlicky Grilled Pesto Chicken,This garlicky grilled pesto chicken uses fresh...,4.3,1 hr,4,"[2 cups firmly packed fresh basil leaves, plus...",[Gather all ingredients.Kyle Carpenter / Food ...,Calories: 766; Fat: 50g; Carbs: 3g; Protein: 73g,https://www.allrecipes.com/thmb/CPqZq1O4a6CEft...,Garlicky Grilled Pesto Chicken. This garlicky ...,garlicky grilled pesto chicken garlicky grille...,"[-0.06378534436225891, -0.026391921564936638, ..."
1,receta_48.html,Mint Oreo Icebox Cake,This mint Oreo icebox cake uses few ingredient...,5.0,8 hrs 15 mins,10,"[2 ½ cups heavy cream, 1 cup confectioners sug...",[Beat heavy cream in the bowl of a stand mixer...,Calories: 644; Fat: 40g; Carbs: 68g; Protein: 6g,https://www.allrecipes.com/thmb/7wA60L-q8hLAnO...,Mint Oreo Icebox Cake. This mint Oreo icebox c...,mint oreo icebox cake mint oreo icebox cake us...,"[-0.023016946390271187, -0.03419332578778267, ..."
2,receta_71.html,Spring Roll Bowl,This delicious spring roll bowl is full of cri...,4.8,25 mins,4,"[4 ounces vermicelli noodles, 1 pound large sh...",[Gather all ingredients.Dotdash Meredith Food ...,Calories: 574; Fat: 32g; Carbs: 38g; Protein: 38g,https://www.allrecipes.com/thmb/lZMOmRAl54GD6L...,Spring Roll Bowl. This delicious spring roll b...,spring roll bowl delicious spring roll bowl fu...,"[-0.0390448272228241, -0.01403211709111929, -0..."
3,receta_56.html,Pesto Chicken Caprese,This pesto chicken caprese comes out wonderful...,,30 mins,4,[2 (10- to 12-ounce) skinless boneless chicken...,[Gather all ingredients. Preheat the oven to 4...,Calories: 506; Fat: 28g; Carbs: 4g; Protein: 56g,https://www.allrecipes.com/thmb/cigJur0bm353m8...,Pesto Chicken Caprese. This pesto chicken capr...,pesto chicken caprese pesto chicken caprese co...,"[-0.0244549959897995, -0.052012231200933456, -..."
4,receta_84.html,Thai Peanut Butter Ramen,This Thai peanut butter ramen is one of many w...,4.3,15 mins,3,"[2 (3 ounce) packages instant ramen noodles, s...",[Bring a large pot of water to a boil over hig...,Calories: 503; Fat: 27g; Carbs: 57g; Protein: 15g,https://www.allrecipes.com/thmb/-K5EVttM9Vrqzp...,Thai Peanut Butter Ramen. This Thai peanut but...,thai peanut butter ramen thai peanut butter ra...,"[-0.09066343307495117, -0.05139745771884918, 0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,receta_22.html,Chive Butter,"An herb butter, like this chive butter, is gre...",,2 hrs 5 mins,8,"[1/2 cup unsalted butter, softened, 2 tablespo...",[Place butter in a small bowl at room temperat...,Calories: 102; Fat: 12g; Carbs: 0g; Protein: 0g,https://www.allrecipes.com/thmb/BW-MV1DhV4X0U-...,"Chive Butter. An herb butter, like this chive ...",chive butter herb butter like chive butter gre...,"[-0.0414767824113369, -0.06992742419242859, 0...."
87,receta_34.html,Greek Steak Marinade,"This Greek steak marinade, with 3 ingredients ...",,5 mins,4,"[1/4 cup extra-virgin olive oil, 2 tablespoons...","[Whisk extra virgin olive oil, oregano, Greek ...",Calories: 126; Fat: 14g; Carbs: 2g; Protein: 0g,https://www.allrecipes.com/thmb/McM-C_XEPykhSx...,Greek Steak Marinade. This Greek steak marinad...,greek steak marinade greek steak marinade 3 in...,"[-0.047157417982816696, -0.04599572345614433, ..."
88,receta_64.html,Shallot and Chive Boursin Dip,This simple shallot and chive Boursin dip come...,,5 mins,6,[1 (5.2 ounce) package shallot & chive Gournay...,"[Combine Boursin, mayonnaise, yogurt, pickle b...",Calories: 169; Fat: 15g; Carbs: 5g; Protein: 3g,https://www.allrecipes.com/thmb/PTcFHX6esqXQJ-...,Shallot and Chive Boursin Dip. This simple sha...,shallot chive boursin dip simple shallot chive...,"[-0.05039184167981148, -0.07827013731002808, 0..."
89,receta_76.html,Strawberry Lime-Sage Shrub,"This strawberry lime-sage shrub is a bright, z...",,5 hrs,16,"[4 limes, divided, 1 cup white sugar, 1 cup wa...",[Zest and juice 2 limes to yield 1 teaspoon ze...,Calories: 63; Fat: 0g; Carbs: 16g; Protein: 0g,https://www.allrecipes.com/thmb/gnvnfksk6kQhzk...,Strawberry Lime-Sage Shrub. This strawberry li...,strawberry lime sage shrub strawberry lime sag...,"[-0.023219430819153786, -0.08700355887413025, ..."


## Indexación en base de datos vectorial

In [None]:
embeddings = np.array(df_sections['embeddings'].tolist()).astype('float32')

dimension = embeddings.shape[1]
print(dimension)

index = faiss.IndexFlatL2(dimension)

index.add(np.array(embeddings))
print(index)

384
<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7f3455f751a0> >


## Búsqueda y obtención del contexto

In [None]:
# --- Top-k más similares ---
query = "Peri Peri Chicken"

query_embedding = model.encode([query]).astype('float32')

k = 10
distances, indices = index.search(query_embedding, k)

# --- Paso 4: Imprimir resultados ---
print("\n=== TOP SECCIONES MÁS SIMILARES ===")
for i, idx in enumerate(indices[0]):
    row = df_sections.iloc[idx]
    print(f"Archivo: {row['filename']}")
    print(f"Texto preprocesado: {row['raw']}")
    print(f"Distancia: {distances[0][i]:.4f}")
    print("-" * 80)


=== TOP SECCIONES MÁS SIMILARES ===
Archivo: receta_55.html
Texto preprocesado: Peri Peri Chicken. This peri peri chicken is made with my take on African peri peri sauce using fresh and dried chiles. The marinade is incredibly flavorful and gives the chicken a beautiful color as well. Ingredients: 6 cloves garlic, 4 red Fresno chili peppers, seeded, 1 habanero pepper, seeded, 1/2 cup diced red bell pepper, or other sweet pepper, 1 tablespoon smoked paprika, 1 tablespoon fresh thyme leaves, 1 1/2 teaspoons kosher salt, 1 teaspoon freshly ground black pepper, 1 teaspoon ground cayenne pepper, 1 teaspoon white sugar, 1/2 teaspoon onion powder, 1/2 cup mild olive oil, or vegetable oil, 1/3 cup sherry vinegar, 2 lemons, juiced, 4 chicken leg/thigh quarters, kosher salt to taste to season chicken9 hrs 15 minsCalories: 501; Fat: 34g; Carbs: 37g; Protein: 20gCombine garlic, Fresno chili peppers, habanero pepper, bell pepper, smoked paprika, thyme, kosher salt, black pepper, cayenne pepper, su

## Generación de Respuesta

In [None]:
OPENAI_API_KEY = ''

In [None]:
client = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
context = "\n\n".join(df_sections.iloc[indices[0]]["raw"].values)

In [None]:
prompt = f"""Eres una aplicación de Retrieval Augmented Generation que siempre responde en español. Usa el siguiente contexto para responder la pregunta, y la respuesta debe incluir:
- El título y descripción de una receta
- La lista completa de ingredientes
- El tiempo total de preparación
- Información nutricional relevante
- Instrucciones paso a paso para cocinar la receta
Si la respuesta no está en el contexto, di que no sabes.

Contexto:
{context}

Pregunta:
El usuario está preguntando sobre: {query}
"""

In [None]:
response = client.responses.create(
    model="gpt-4.1",
    temperature=0.3,
    input=prompt
)

print(response.output_text)

**Título:** Pollo Peri Peri

**Descripción:**  
Este pollo peri peri está preparado con una salsa inspirada en la receta africana, utilizando chiles frescos y secos. El marinado aporta un sabor intenso y un color vibrante al pollo, que se puede cocinar a la parrilla o al horno. Es ideal para quienes buscan un platillo picante y lleno de matices.

---

**Lista de ingredientes:**
- 6 dientes de ajo
- 4 chiles Fresno rojos, sin semillas
- 1 chile habanero, sin semillas
- 1/2 taza de pimiento rojo en cubos (o cualquier pimiento dulce)
- 1 cucharada de pimentón ahumado
- 1 cucharada de hojas frescas de tomillo
- 1 1/2 cucharaditas de sal kosher
- 1 cucharadita de pimienta negra recién molida
- 1 cucharadita de cayena en polvo
- 1 cucharadita de azúcar blanca
- 1/2 cucharadita de polvo de cebolla
- 1/2 taza de aceite de oliva suave o aceite vegetal
- 1/3 taza de vinagre de jerez
- Jugo de 2 limones
- 4 cuartos traseros de pollo (muslo y pierna)
- Sal kosher al gusto para sazonar el pollo

--