In [1]:
from pathlib import Path
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

In [2]:
import os

def load_env(path="."):

    with open(path, "r") as f:
        var_envs = f.readlines()
        for var in var_envs:
            name, value = var.strip("\n").split("=")
            os.environ[name] = value.strip('"')

load_env(path="/home/rocabrera/Desktop/retrieval-knowledge-gpt/.env")

In [3]:
vectordb_path = Path.cwd().parent / "experiments/hemophilia/knowledge_bases/chroma_abstracts"

embeddings = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=str(vectordb_path), embedding_function=embeddings)

Using embedded DuckDB with persistence: data will be stored in: /home/rocabrera/Desktop/retrieval-knowledge-gpt/experiments/hemophilia/knowledge_bases/chroma_abstracts


In [4]:
query = "Is gene therapy a cure for hemophilia?"
docs = vectordb.similarity_search_with_score(query)
docs[0]

(Document(page_content='Gene therapy provides hope for a cure for patients with hemophilia by establishing continuous endogenous expression of factor VIII or factor IX following transfer of a functional gene copy to replace the hemophilic patient\'s own defective gene. Hemophilia may be considered a "low-hanging fruit" for gene therapy because a small increment in blood factor levels (≥2% of normal) significantly improves the bleeding tendency from severe to moderate, eliminating most spontaneous bleeds. After decades of research, the first trial to provide clear evidence of efficiency after gene transfer in patients with hemophilia B using adeno-associated virus vectors was reported by the authors\' group in 2011. This has been followed by unprecedented activity in this area, with the commencement of seven new early-phase trials involving >55 patients with hemophilia A or hemophilia B. These studies have, in large part, generated promising clinical data that lay a strong foundation fo

In [5]:
document, score = docs[0]
score

0.17245423793792725

In [6]:
document.page_content

'Gene therapy provides hope for a cure for patients with hemophilia by establishing continuous endogenous expression of factor VIII or factor IX following transfer of a functional gene copy to replace the hemophilic patient\'s own defective gene. Hemophilia may be considered a "low-hanging fruit" for gene therapy because a small increment in blood factor levels (≥2% of normal) significantly improves the bleeding tendency from severe to moderate, eliminating most spontaneous bleeds. After decades of research, the first trial to provide clear evidence of efficiency after gene transfer in patients with hemophilia B using adeno-associated virus vectors was reported by the authors\' group in 2011. This has been followed by unprecedented activity in this area, with the commencement of seven new early-phase trials involving >55 patients with hemophilia A or hemophilia B. These studies have, in large part, generated promising clinical data that lay a strong foundation for gene therapy to move 

O documento contém os metadados utilizados, no nosso caso estamos interessados na fonte, isto é, em qual arquivo temos a maior similaridade.

In [7]:
Path(document.metadata["source"]).name

'28835123.txt'

# Referências

- [Persisting Vector DB](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/chroma.html)