In [1]:
# !pip install chromadb -q
# !pip install sentence-transformers -q     

In [2]:
import chromadb

client = chromadb.Client()

collection = client.create_collection("test")

In [None]:
# Adding Sample Document

In [5]:
collection.add(
    documents=["This is a document about cat", "This is a document about car"],
    metadatas=[{"category": "animal"}, {"category": "vehicle"}],
    ids=["id1", "id2"]
)


C:\Users\intel\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:36<00:00, 2.30MiB/s]


In [6]:
results = collection.query(
    query_texts=["vehicle"],
    n_results=1
)
results

{'ids': [['id2']],
 'distances': [[0.8069301843643188]],
 'metadatas': [[{'category': 'vehicle'}]],
 'embeddings': None,
 'documents': [['This is a document about car']],
 'uris': None,
 'data': None}

In [13]:
import os

def read_files_from_folder(folder_path):
    file_data = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            with open(os.path.join(folder_path, file_name), 'r') as file:
                content = file.read()
                file_data.append({"file_name": file_name, "content": content})

    return file_data

In [14]:
folder_path = "D:\CromaDB\data"  # your folder path
file_data = read_files_from_folder(folder_path)

In [16]:
file_data

[{'file_name': 'Different Types of Pet Animals.txt',
  'content': 'Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.'},
 {'file_name': 'Health Care for Pets.txt',
  'content': 'Routine health care is crucial for pets to live long, happy lives. Regular vet check-ups help catch potential issues early and keep vaccinations up to date. Dental care is also essential to prevent diseases in pets, especially in dogs and cats. Regular grooming, parasite control, and weight management are other important aspects of pet health care.'},
 {'file_name': 'Nutrition Needs of Pet Animals.txt',
  'content': '

In [17]:
documents = []
metadatas = []
ids = []
for index,data in enumerate(file_data):
  documents.append(data['content'])
  metadatas.append({'source':data['file_name']})
  ids.append(str(index+1))
     

In [18]:
metadatas

[{'source': 'Different Types of Pet Animals.txt'},
 {'source': 'Health Care for Pets.txt'},
 {'source': 'Nutrition Needs of Pet Animals.txt'},
 {'source': 'The Emotional Bond Between Humans and Pets.txt'},
 {'source': 'Training and Behaviour of Pets.txt'}]

In [19]:
ids

['1', '2', '3', '4', '5']

In [22]:
pet_collection = client.create_collection("pet_collection")
pet_collection.add(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)

In [23]:
results = pet_collection.query(
    query_texts=["What are the different kinds of pets people commonly own?"],
    n_results=1
)
results

{'ids': [['1']],
 'distances': [[0.7325009107589722]],
 'metadatas': [[{'source': 'Different Types of Pet Animals.txt'}]],
 'embeddings': None,
 'documents': [['Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.']],
 'uris': None,
 'data': None}

In [24]:
results = pet_collection.query(
    query_texts=["What are the emotional benefits of owning a pet?"],
    n_results=1,
    where_document={"$contains":"reptiles"}
)
results

{'ids': [['1']],
 'distances': [[0.837824821472168]],
 'metadatas': [[{'source': 'Different Types of Pet Animals.txt'}]],
 'embeddings': None,
 'documents': [['Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.']],
 'uris': None,
 'data': None}

In [27]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L3-v2')

In [28]:
documents = []
embeddings = []
metadatas = []
ids = []
for index,data in enumerate(file_data):
  documents.append(data['content'])
  embeding = model.encode(data['content']).tolist()
  embeddings.append(embeding)
  metadatas.append({'source':data['file_name']})
  ids.append(str(index+1))

In [29]:
pet_collection_emb = client.create_collection("pet_collection_emb")
pet_collection_emb.add(
    documents=documents,
    embeddings=embeddings,
    metadatas=metadatas,
    ids=ids
)

In [30]:
query = "What are the different kinds of pets people commonly own?"
input_em = model.encode(query).tolist()

results = pet_collection_emb.query(
    query_embeddings=[input_em],
    n_results=1
)
results

{'ids': [['1']],
 'distances': [[12.040447235107422]],
 'metadatas': [[{'source': 'Different Types of Pet Animals.txt'}]],
 'embeddings': None,
 'documents': [['Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.']],
 'uris': None,
 'data': None}

In [31]:
query = "foods that are recommended for  dogs?"
input_em = model.encode(query).tolist()

results = pet_collection_emb.query(
    query_embeddings=[input_em],
    n_results=1
)
results

{'ids': [['3']],
 'distances': [[17.14393424987793]],
 'metadatas': [[{'source': 'Nutrition Needs of Pet Animals.txt'}]],
 'embeddings': None,
 'documents': [['Proper nutrition is vital for the health and wellbeing of pets. Dogs and cats require a balanced diet that includes proteins, carbohydrates, and fats. Some may even have specific dietary needs based on their breed or age. Birds typically thrive on a diet of seeds, fruits, and vegetables, while reptiles have diverse diets ranging from live insects to fresh produce. Fish diets depend greatly on the species, with some needing live food and others subsisting on flakes or pellets.']],
 'uris': None,
 'data': None}