In [ ]:
# --- 1. Instalar dependencias ---
!pip install llama-index==0.10.33 llama-index-embeddings-openai pypdf pandas requests

In [ ]:
# --- 2. Imports ---
import os
import pandas as pd
from pathlib import Path
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Document
from llama_index.embeddings.openai import OpenAIEmbedding

In [ ]:
# --- 3. Configurar OpenAI ---
os.environ['OPENAI_API_KEY'] = "TU_API_KEY"

In [ ]:
# --- 4. Cargar PDFs y HTML ---
docs = SimpleDirectoryReader(input_dirs=["../data/pdfs", "../data/html"]).load_data()
print(f"Se cargaron {len(docs)} documentos de PDF/HTML")

In [ ]:
# --- 5. Cargar CSV (projects.csv) ---
df = pd.read_csv("../data/projects.csv")
csv_docs = []
for row in df.itertuples():
    text = f"Project: {row.project_name}, Client: {row.client}, Year: {row.year}, Tech: {row.tech}, Desc: {row.short_description}"
    csv_docs.append(Document(text=text))

print(f"Se cargaron {len(csv_docs)} documentos desde CSV")
docs.extend(csv_docs)

In [ ]:
# --- 6. Crear embeddings + índice ---
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)

In [ ]:
# --- 7. Crear query engine ---
query_engine = index.as_query_engine()

In [ ]:
# --- 8. Ejemplos de queries ---
response = query_engine.query("¿Qué servicios ofrece Sophilabs?")
print("Q: ¿Qué servicios ofrece Sophilabs?\n")
print("A:", response, "\n")

response = query_engine.query("Mencioná un proyecto y la tecnología usada.")
print("Q: Mencioná un proyecto y la tecnología usada.\n")
print("A:", response, "\n")