In [25]:
import os
from dotenv import load_dotenv

load_dotenv()
HF_API_KEY = os.getenv('HF_API_KEY')
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [26]:
from Indox import IndoxRetrievalAugmentation
from Indox.qa_models import OpenAiQA
from Indox.qa_models import MistralQA
from Indox.qa_models import DspyCotQA

In [27]:
from Indox.embeddings import OpenAiEmbedding
from Indox.embeddings import HuggingFaceEmbedding

In [28]:
from Indox.splitter import SplitWithClustering
from Indox.splitter import SplitUnstructured

In [29]:
Indox = IndoxRetrievalAugmentation()

In [30]:
openai_qa = OpenAiQA(api_key=OPENAI_API_KEY,model="gpt-3.5-turbo-0125")
mistral_qa = MistralQA(api_key=HF_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
dspy_qa = DspyCotQA(api_key=OPENAI_API_KEY,model="gpt-3.5-turbo")

In [31]:
openai_embeddings = OpenAiEmbedding(model="text-embedding-3-small",openai_api_key=OPENAI_API_KEY)
hugging_face_embedding = HuggingFaceEmbedding(model_name="multi-qa-mpnet-base-cos-v1")

2024-05-13 11:49:13,924 - INFO - Load pretrained SentenceTransformer: multi-qa-mpnet-base-cos-v1
2024-05-13 11:49:14,263 - INFO - Use pytorch device: cpu


In [33]:
Indox.config

{'postgres': {'conn_string': 'postgresql+psycopg2://postgres:xxx@localhost:port/db_name'},
 'prompts': {'document_relevancy_prompt': "You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.\nGive a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.\nProvide the binary score as a JSON with a single key 'score' and no preamble or explanation.\nHere is the retrieved document:\n{document}\nHere is the user question:\n{question}",
  'summary_model': {'content': 'You are a helpful assistant. Give a detailed summary of the documentation provided'}},
 'splitter': 'semantic-text-splitter',
 'summary_model': {'max_tokens': 100,
  'min_len': 30,
  'model_name': 'gpt-3.5-turbo-0125'},
 'tokenizer': 'openai',
 'vector_store': 'chroma'}

In [34]:
Indox.initialize()

In [35]:
file_path = "sample.txt"

In [36]:
docs_cluster = SplitWithClustering(file_path=file_path,embeddings=hugging_face_embedding,chunk_size=400)

Starting processing...
--Generated 1 clusters--


2024-05-13 11:49:43,676 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


End Chunking & Clustering process.


In [37]:
len(docs_cluster)

10

In [38]:
docs_unstructured = SplitUnstructured(file_path=file_path,content_type="text")

Starting processing...
End Chunking process.


In [39]:
len(docs_unstructured)

33

In [40]:
Indox.connect_to_vectorstore(collection_name="sample",embeddings=hugging_face_embedding)

2024-05-13 11:49:44,024 - INFO - Collection sample is not created.


Connection established successfully.


In [41]:
Indox.store_in_vectorstore(chunks=docs_unstructured)

2024-05-13 11:49:47,898 - INFO - Document added successfully to the vector store.


<Indox.vectorstore.ChromaVectorStore at 0x1d1fede95b0>

In [42]:
query = "how cinderella reach her happy ending?"

In [43]:
response_openai = Indox.answer_question(query=query,qa_model=openai_qa)

2024-05-13 11:49:51,693 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [44]:
response_openai[0]

"Cinderella reached her happy ending by persevering through her hardships, remaining kind and hopeful, and ultimately being recognized for her true worth. Despite being mistreated by her step-family and forced to endure a life of servitude, Cinderella maintained a positive attitude and never lost hope. With the help of her fairy godmother, she was able to attend the royal ball and capture the prince's heart with her inner beauty and grace. In the end, Cinderella's true identity was revealed, and she was able to break free from her oppressive circumstances and find happiness and love with the prince."

In [45]:
response_mistral = Indox.answer_question(query=query,qa_model=mistral_qa)

In [46]:
response_mistral[0]

'Cinderella reaches her happy ending by going to the royal ball in disguise with the help of her fairy godmother, who transforms a pumpkin into a carriage, horses, and mice into footmen, and transforms her rags into a beautiful gown. At the ball, the prince falls in love with her, but they do not recognize each other. After the ball ends, Cinderella loses her glass slipper and the prince searches for the owner. When'

In [49]:
response_dspy = Indox.answer_question(query=query,qa_model=dspy_qa)

In [50]:
response_dspy[0]

'Cinderella reached her happy ending by attending the royal ball with the help of her fairy godmother and capturing the heart of the prince.'