In [1]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import chromadb
from chromadb.utils import embedding_functions


client = chromadb.PersistentClient()
em = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2", device="cuda:4")
collection = client.get_or_create_collection("eligibility_criteria_collection", metadata={"hnsw:space": "cosine"}, embedding_function=em) # cosine is the default
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = Chroma(
    client=client,
    collection_name="eligibility_criteria_collection",
    embedding_function=embedding_function,
)


  from .autonotebook import tqdm as notebook_tqdm


In [28]:
db.similarity_search?

[0;31mSignature:[0m
[0mdb[0m[0;34m.[0m[0msimilarity_search[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mquery[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mk[0m[0;34m:[0m [0;34m'int'[0m [0;34m=[0m [0;36m4[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfilter[0m[0;34m:[0m [0;34m'Optional[Dict[str, str]]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'List[Document]'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Run similarity search with Chroma.

Args:
    query (str): Query text to search for.
    k (int): Number of results to return. Defaults to 4.
    filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

Returns:
    List[Document]: List of documents most similar to the query text.
[0;31mFile:[0m      ~/miniconda3/lib/python3.11/site-packages/langchain_community

In [25]:
query = "Patient does not have KRAS mutation"
docs = db.similarity_search_with_score(query)

In [26]:
docs

[(Document(page_content='Patients with KRAS gene mutation confirmed by the central laboratory', metadata={'criteria': 'Inclusion Criteria', 'sub-criteria': 'Inclusion Criteria'}),
  0.1526924967765808),
 (Document(page_content='Absence of a mutation in KRAS {KRAS wild type} is required for patients enrolled in cohort 2', metadata={'criteria': 'Inclusion Criteria', 'sub-criteria': 'Inclusion Criteria'}),
  0.15496045351028442),
 (Document(page_content='Patients with KRAS/NRAS mutation', metadata={'criteria': 'Exclusion Criteria', 'sub-criteria': 'Exclusion Criteria'}),
  0.17447936534881592),
 (Document(page_content='KRAS mutation', metadata={'criteria': 'Inclusion Criteria', 'sub-criteria': 'Inclusion Criteria'}),
  0.18968480825424194)]