In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
HF_API_KEY = os.getenv('HF_API_KEY')
INDOX_OPENAI_API_KEY = os.getenv("INDOX_OPENAI_API_KEY")

In [2]:
from Indox import IndoxRetrievalAugmentation
from Indox.QaModels import MistralQA
from Indox.Embeddings import HuggingFaceEmbedding
from Indox.DataLoaderSplitter import UnstructuredLoadAndSplit
from Indox.DataLoaderSplitter import ClusteredSplit

In [3]:
from Indox.QaModels import IndoxApiOpenAiQa
from Indox.Embeddings import IndoxOpenAIEmbedding

In [4]:
Indox = IndoxRetrievalAugmentation()

In [5]:
# Using Mistral from HuggingFace
mistral_qa = MistralQA(api_key=HF_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
embed_hf = HuggingFaceEmbedding()

2024-05-23 14:59:42,412 - INFO - Load pretrained SentenceTransformer: multi-qa-mpnet-base-cos-v1
2024-05-23 14:59:42,942 - INFO - Use pytorch device: cpu


In [6]:
# Using OpenAi for qa and embedding from Indox
qa_indox_api_openai = IndoxApiOpenAiQa(api_key=INDOX_OPENAI_API_KEY)
embed_indox_api_openai = IndoxOpenAIEmbedding(api_key=INDOX_OPENAI_API_KEY,model="text-embedding-ada-002")

In [7]:
file_path = "sample.txt"

# Load and Split with Unstructured library

In [28]:
data = UnstructuredLoadAndSplit(file_path=file_path,max_chunk_size=400)

Starting processing...
End Chunking process.


In [29]:
Indox.connect_to_vectorstore(embeddings=embed_indox_api_openai,collection_name="sample")

2024-05-23 14:57:21,809 - INFO - Collection sample is not created.


Connection established successfully.


In [30]:
Indox.store_in_vectorstore(data)

2024-05-23 14:58:36,189 - INFO - Document added successfully to the vector store.


<Indox.vectorstore.ChromaVectorStore at 0x15405f10bc0>

In [13]:
query = "How cinderella reach her happy ending?"

In [32]:
response = Indox.answer_question(query=query,qa_model=qa_indox_api_openai)

In [33]:
response[0]

"Cinderella reached her happy ending by remaining kind, patient, and true to herself despite facing mistreatment from her step-family. With the help of her fairy godmother and the magical assistance of birds, she was able to attend the royal festival and capture the heart of the king's son. Despite challenges and obstacles, Cinderella's pure heart and unwavering spirit ultimately led her to her happy ending of marrying the prince and living happily ever after."

In [34]:
response[1][0]

['cinderella expressed a wish, the bird threw down to her what she\n\nhad wished for.\n\nIt happened, however, that the king gave orders for a festival\n\nwhich was to last three days, and to which all the beautiful young\n\ngirls in the country were invited, in order that his son might choose\n\nhimself a bride. When the two step-sisters heard that they too were',
 'cinderella expressed a wish, the bird threw down to her what she\n\nhad wished for.\n\nIt happened, however, that the king gave orders for a festival\n\nwhich was to last three days, and to which all the beautiful young\n\ngirls in the country were invited, in order that his son might choose\n\nhimself a bride. When the two step-sisters heard that they too were',
 "danced with her only, and if any one invited her to dance, he said\n\nthis is my partner.\n\nWhen evening came, cinderella wished to leave, and the king's\n\nson was anxious to go with her, but she escaped from him so quickly\n\nthat he could not follow her. The

# Clustering with Indox OpenAi API Embedding

In [8]:
data_2 = ClusteredSplit(file_path=file_path,embeddings=embed_indox_api_openai)

Starting processing...
--Generated 6 clusters--


2024-05-23 15:01:08,339 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-23 15:01:14,482 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-23 15:01:20,520 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-23 15:01:25,332 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-23 15:01:28,762 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-05-23 15:01:31,761 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


--Generated 1 clusters--


2024-05-23 15:01:51,529 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


End Chunking & Clustering process.


In [10]:
Indox.connect_to_vectorstore(embeddings=embed_indox_api_openai,collection_name="sample_2")

2024-05-23 15:02:27,308 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Connection established successfully.


In [11]:
Indox.store_in_vectorstore(data_2)

2024-05-23 15:05:04,654 - INFO - Document added successfully to the vector store.


<Indox.vectorstore.ChromaVectorStore at 0x1ca4a0e5130>

In [14]:
response_2 = Indox.answer_question(query=query,qa_model=qa_indox_api_openai)

In [15]:
response_2[0]

"Cinderella reaches her happy ending in the classic fairy tale by overcoming the mistreatment and cruelty from her step-family with the help of magical elements such as a hazel tree and a bird. These magical entities assist Cinderella in attending a royal festival where she captures the attention of the prince. Despite obstacles and challenges, including her stepmother's attempts to prevent her from attending the festival, Cinderella's kindness, perseverance, and the magical assistance she receives ultimately lead her to her happy ending. The prince recognizes her true worth, and they live happily ever after, symbolizing Cinderella's triumph over adversity and finding her true love and happiness."