In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
HF_API_KEY = os.getenv('HF_API_KEY')
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [2]:
from Indox import IndoxRetrievalAugmentation
from Indox.qa_models import OpenAiQA
from Indox.qa_models import MistralQA
from Indox.qa_models import DspyCotQA

In [3]:
from Indox.embeddings import OpenAiEmbedding
from Indox.embeddings import HuggingFaceEmbedding

In [4]:
from Indox.splitter import SplitWithClustering

In [5]:
Indox = IndoxRetrievalAugmentation()

In [6]:
openai_qa = OpenAiQA(api_key=OPENAI_API_KEY,model="gpt-3.5-turbo-0125")
mistral_qa = MistralQA(api_key=HF_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
dspy_qa = DspyCotQA(api_key=OPENAI_API_KEY,model="gpt-3.5-turbo")

In [7]:
openai_embeddings = OpenAiEmbedding(model="text-embedding-3-small",openai_api_key=OPENAI_API_KEY)
hugging_face_embedding = HuggingFaceEmbedding(model_name="multi-qa-mpnet-base-cos-v1")

2024-05-13 10:16:52,637 - INFO - Load pretrained SentenceTransformer: multi-qa-mpnet-base-cos-v1
2024-05-13 10:16:53,004 - INFO - Use pytorch device: cpu


In [8]:
Indox.config

{'clustering': {'dim': 10, 'threshold': 0.1},
 'embedding_model': 'sbert',
 'postgres': {'conn_string': 'postgresql+psycopg2://postgres:xxx@localhost:port/db_name'},
 'prompts': {'document_relevancy_prompt': "You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.\nGive a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.\nProvide the binary score as a JSON with a single key 'score' and no preamble or explanation.\nHere is the retrieved document:\n{document}\nHere is the user question:\n{question}",
  'summary_model': {'content': 'You are a helpful assistant. Give a detailed summary of the documentation provided'}},
 'splitter': 'semantic-text-splitter',
 'summary_model': {'max_tokens': 100,
  'min_len': 30,
  'model_name': 'gpt-3.5-turbo-0125'},

In [9]:
Indox.initialize()

In [10]:
file_path = "sample.txt"

In [11]:
docs_cluster = SplitWithClustering(file_path=file_path,embeddings=openai_embeddings,max_chunk_size=300)

Starting processing...


2024-05-13 10:16:54,959 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


--Generated 3 clusters--


2024-05-13 10:17:08,456 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-13 10:17:11,610 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-13 10:17:15,252 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-13 10:17:16,518 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


--Generated 1 clusters--


2024-05-13 10:17:20,995 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Create 14 chunks: 13 leaf chunks plus 1 extra chunks.
End Chunking & Clustering process.


In [13]:
docs_cluster

["The wife of a rich man fell sick, and as she felt that her end was drawing near, she called her only daughter to her bedside and said, dear child, be good and pious, and then the good God will always protect you, and I will look down on you from heaven and be near you.  Thereupon she closed her eyes and departed.  Every day the maiden went out to her mother's grave, and wept, and she remained pious and good.  When winter came the snow spread a white sheet over the grave, and by the time the spring sun had drawn it off again, the man had taken another wife. The woman had brought with her into the house two daughters, who were beautiful and fair of face, but vile and black of heart. Now began a bad time for the poor step-child.  Is the stupid goose to sit in the parlor with us, they said.  He who wants to eat bread must earn it.  Out with the kitchen-wench.  They took her pretty clothes away from her, put an old grey bedgown on her, and gave her wooden shoes.  Just look at the proud pr

In [10]:
docs = Indox.create_chunks(file_path=file_path,unstructured=True,content_type="text")

Starting processing...
End Chunking process.


In [11]:
Indox.connect_to_vectorstore(collection_name="sample",embeddings=openai_embeddings)

2024-05-12 20:07:56,406 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Connection established successfully.


In [12]:
Indox.store_in_vectorstore(chunks=docs)

2024-05-12 20:08:04,725 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-05-12 20:08:06,457 - INFO - Document added successfully to the vector store.


<Indox.vectorstore.ChromaVectorStore at 0x225fec85d00>

In [14]:
query = "how cinderella reach her happy ending?"

In [15]:
response_openai = Indox.answer_question(query=query,qa_model=openai_qa)

2024-05-12 19:25:36,174 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [16]:
response_openai[0]

"Cinderella reached her happy ending by attending the royal wedding in a beautiful dress and slippers embroidered with silk and silver. Her step-sisters and step-mother did not recognize her and thought she was a foreign princess. The prince approached her, danced with her, and was captivated by her beauty. Despite trying to escape and hiding in the pigeon-house, the prince eventually found her and realized she was the mysterious maiden he had been searching for. This led to Cinderella's happy ending as she was reunited with the prince and they lived happily ever after."

In [17]:
response_mistral = Indox.answer_question(query=query,qa_model=mistral_qa)

In [18]:
response_mistral[0]

"Cinderella reached her happy ending by escaping from the prince and hiding in various places, such as a pigeon-house or her mother's, while wearing the beautiful dress and slippers she had received from the fairy godmother. Despite her step-sisters and step-mother not recognizing her, she eventually went to the wedding and was identified by the prince when they danced together. After her identity was revealed, she was welcomed back into her father's"

In [19]:
response_dspy = Indox.answer_question(query=query,qa_model=dspy_qa)

In [20]:
response_dspy[0]

"Cinderella reached her happy ending by attending the prince's ball, where she danced with him and caught his attention. Despite trying to escape him, the prince eventually found her and realized she was the mysterious maiden he had been searching for. They danced until evening, and the prince decided to accompany her home to find out who she was. Cinderella managed to escape him once more, but the prince sought her out and eventually found her. The story ends with the prince recognizing Cinderella as the one he had been searching for, leading to their happily ever after."