In [4]:
from langchain_chroma import Chroma

In [5]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_ollama import OllamaEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
documents = TextLoader("speech.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=100,chunk_overlap=20)
docs = text_splitter.split_documents(documents)

Created a chunk of size 164, which is longer than the specified 100
Created a chunk of size 141, which is longer than the specified 100


In [7]:
embeddings = OllamaEmbeddings(model="gemma:2b")
db = Chroma.from_documents(docs,embeddings)

In [8]:
query = "What is Generative AI"
results = db.similarity_search(query)
results

[Document(id='a6f2c8a0-cdfa-4f2a-9abe-4a67f55e4602', metadata={'source': 'speech.txt'}, page_content="It operates on neural networks and deep learning algorithms that can generate novel content in response to a user's natural language prompt."),
 Document(id='9daf6259-bbfc-4782-9cfc-360cf0739e35', metadata={'source': 'speech.txt'}, page_content='Generative AI is a type of artificial intelligence that creates new content, such as text, images, video, audio, and code, by learning patterns from existing data.'),
 Document(id='8fbba5cf-7ca0-40d3-9a8c-6d4db490b177', metadata={'source': 'speech.txt'}, page_content='While it enhances productivity and offers numerous applications, it also presents risks like generating inaccurate information (hallucinations) and raises concerns about intellectual property and job displacement.')]

## SAVE & LOAD

In [9]:
db = Chroma.from_documents(docs,embeddings,persist_directory="./chroma_db")

In [10]:
new_db = Chroma(persist_directory="./chroma_db",embedding_function= embeddings)

In [11]:
new_db.similarity_search(query)

[Document(id='8244e4ce-7024-4c99-9ef3-f5993731c59a', metadata={'source': 'speech.txt'}, page_content="It operates on neural networks and deep learning algorithms that can generate novel content in response to a user's natural language prompt."),
 Document(id='748ff395-b60e-4b57-be1c-4a7443b2e96a', metadata={'source': 'speech.txt'}, page_content='Generative AI is a type of artificial intelligence that creates new content, such as text, images, video, audio, and code, by learning patterns from existing data.'),
 Document(id='f2fcc9a8-774a-4821-9bf5-9752950f7a14', metadata={'source': 'speech.txt'}, page_content='While it enhances productivity and offers numerous applications, it also presents risks like generating inaccurate information (hallucinations) and raises concerns about intellectual property and job displacement.')]

### RETRIEVER

In [12]:
retriever = db.as_retriever()
retriever.invoke(query)

[Document(id='8244e4ce-7024-4c99-9ef3-f5993731c59a', metadata={'source': 'speech.txt'}, page_content="It operates on neural networks and deep learning algorithms that can generate novel content in response to a user's natural language prompt."),
 Document(id='748ff395-b60e-4b57-be1c-4a7443b2e96a', metadata={'source': 'speech.txt'}, page_content='Generative AI is a type of artificial intelligence that creates new content, such as text, images, video, audio, and code, by learning patterns from existing data.'),
 Document(id='f2fcc9a8-774a-4821-9bf5-9752950f7a14', metadata={'source': 'speech.txt'}, page_content='While it enhances productivity and offers numerous applications, it also presents risks like generating inaccurate information (hallucinations) and raises concerns about intellectual property and job displacement.')]