In [4]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from chromadb.config import Settings
from chromadb import PersistentClient  # Cambiar a PersistentClient según la nueva arquitectura de ChromaDB

# 1. Cargar el archivo CSV
df = pd.read_csv('./data/wiki_movie_plots_deduped.csv')  # Ajusta la ruta si es necesario
titles = df['Title'].tolist()
plots = df['Plot'].tolist()

# 2. Configurar ChromaDB
client = PersistentClient(
    settings=Settings(persist_directory="chroma_db", chroma_db_impl="duckdb+parquet")
)
collection = client.get_or_create_collection(name="movies")

# 3. Generar e insertar embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')  # Usa otro modelo si lo prefieres
print("Generando embeddings...")
embeddings = model.encode(plots, show_progress_bar=True)

# Insertar documentos en ChromaDB
print("Insertando documentos en ChromaDB...")
for i, (title, plot, embedding) in enumerate(zip(titles, plots, embeddings)):
    collection.add(
        documents=[plot],
        metadatas=[{"Title": title}],
        ids=[f"doc_{i}"],  # ID único para cada documento
        embeddings=[embedding.tolist()]  # Convertir a lista para evitar errores
    )

# 4. Realizar consultas
query = "Película sobre aventuras en el espacio"  # Cambia esta consulta según lo que necesites
query_embedding = model.encode([query])
results = collection.query(query_embeddings=query_embedding, n_results=5)

# Mostrar resultados
print("\nResultados de la consulta:")
for doc, meta in zip(results["documents"], results["metadatas"]):
    print(f"Título: {meta['Title']}, Trama: {doc}")

# 5. Evaluar resultados (puedes agregar métodos adicionales aquí)
# Si deseas implementar FAISS, TF-IDF, o BM25, necesitarás agregar bibliotecas y lógica adicionales.


ValueError: [91mYou are using a deprecated configuration of Chroma.

[94mIf you do not have data you wish to migrate, you only need to change how you construct
your Chroma client. Please see the "New Clients" section of https://docs.trychroma.com/deployment/migration.
________________________________________________________________________________________________

If you do have data you wish to migrate, we have a migration tool you can use in order to
migrate your data to the new Chroma architecture.
Please `pip install chroma-migrate` and run `chroma-migrate` to migrate your data and then
change how you construct your Chroma client.

See https://docs.trychroma.com/deployment/migration for more information or join our discord at https://discord.gg/8g5FESbj for help![0m