In [1]:
import sys
import os
module_path = os.path.abspath('E:/Codes/inDox/libs/indoxArcg')
if module_path not in sys.path:
    sys.path.append(module_path)




In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [3]:
from indoxArcg.llms import OpenAi
from indoxArcg.embeddings import OpenAiEmbedding
from indoxArcg.data_loaders import Txt,DoclingReader
from indoxArcg.splitter import RecursiveCharacterTextSplitter,SemanticTextSplitter
from indoxArcg.pipelines.cag import CAG, KVCache
from pprint import pprint

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASHKAN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASHKAN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
llm = OpenAi(model="gpt-4o-mini",api_key=OPENAI_API_KEY)
embed_model = OpenAiEmbedding(api_key=OPENAI_API_KEY, model="text-embedding-3-small")



[32mINFO[0m: [1mInitializing OpenAi with model: gpt-4o-mini[0m
[32mINFO[0m: [1mOpenAi initialized successfully[0m
[32mINFO[0m: [1mInitialized OpenAiEmbedding with model: text-embedding-3-small[0m


In [5]:
txt_loader = Txt(txt_path="sample.txt")
splitter = RecursiveCharacterTextSplitter()
docs = txt_loader.load()
split_docs = splitter.split_text(text=docs)
print(len(split_docs))
print(split_docs[0])


38
The wife of a rich man fell sick, and as she felt that her end

was drawing near, she called her only daughter to her bedside and

said, dear child, be good and pious, and then the

good God will always protect you, and I will look down on you

from heaven and be near you.  Thereupon she closed her eyes and

departed.  Every day the maiden went out to her mother's grave,


In [6]:
pdf_file_path = "LLM output verification.pdf"
docling_reader = DoclingReader(file_path=pdf_file_path)
pdf_doc = docling_reader.load()
text_docs = pdf_doc.document.export_to_text()
semantic_splitter = SemanticTextSplitter()
pdf_doc_split = semantic_splitter.split_text(text_docs)

2025-01-22 18:50:03,575 - docling.document_converter - INFO - Going to convert document batch...
2025-01-22 18:50:05,382 - docling.utils.accelerator_utils - INFO - Accelerator device: 'cpu'
2025-01-22 18:50:07,318 - docling.utils.accelerator_utils - INFO - Accelerator device: 'cpu'
2025-01-22 18:50:07,852 - docling.utils.accelerator_utils - INFO - Accelerator device: 'cpu'
2025-01-22 18:50:08,230 - docling.pipeline.base_pipeline - INFO - Processing document LLM output verification.pdf
2025-01-22 18:50:29,571 - docling.document_converter - INFO - Finished converting document LLM output verification.pdf in 26.03 sec.


In [7]:
pprint(pdf_doc_split[1])

('zkLLM is a cryptographic framework designed to ensure verifiable execution '
 'of large language models (LLMs) using Zero-Knowledge Proofs (ZKPs) . The key '
 'idea is that a third-party executor can prove they used the specified model '
 'to compute the given input and produce the output without revealing the '
 'underlying model parameters or the input data. zkLLM achieves this through '
 'efficient protocols tailored for LLM operations, such as transformer '
 'attention mechanisms, enabling secure and scalable verification.\n'
 '\n'
 'This innovative approach is detailed in the paper, zkLLM: Zero Knowledge '
 'Proofs for Large Language Models , which introduces core components like '
 'tlookup for non-arithmetic operations and zkAttn for attention mechanisms. '
 'The official implementation is available on: '
 'https://github.com/jvhs0706/zkllm-ccs2024\n'
 '\n'
 'Detailed Explanation of zkLLM\n'
 '\n'
 'The zkLLM framework introduces an innovative method for verifiable '
 'computa

In [8]:
# cag_with_embedding = CAG(
#     llm=llm,
    
#     embedding_model=embed_model,
#     cache=KVCache(),
# )
# cache_embed_key = "embed_cache"
# cag_with_embedding.preload_documents(split_docs,cache_embed_key)




cache_no_embed_key = "no_embed_cache"
cag_without_embedding = CAG(llm=llm, cache=KVCache())
cag_without_embedding.preload_documents(split_docs, cache_no_embed_key)


cache_no_embed_key_pdf = "no_embed_cache_pdf"
cag_without_embedding = CAG(llm=llm, cache=KVCache())
cag_without_embedding.preload_documents(pdf_doc_split, cache_no_embed_key_pdf)

[32mINFO[0m: [1mPrecomputing KV cache for 38 document chunks...[0m
[32mINFO[0m: [1mKV cache saved: kv_cache\no_embed_cache.pkl[0m
[32mINFO[0m: [1mPreloaded 38 document chunks into KV cache[0m
[32mINFO[0m: [1mPrecomputing KV cache for 16 document chunks...[0m
[32mINFO[0m: [1mKV cache saved: kv_cache\no_embed_cache_pdf.pkl[0m
[32mINFO[0m: [1mPreloaded 16 document chunks into KV cache[0m


In [9]:
query_cinderella = "How Cinderella reach her happy ending?"
query_pdf = "how users could earn tokens?"

In [10]:
# pprint(cag_with_embedding.infer(query,cache_key=cache_embed_key))

In [15]:
response_tfidf = cag_without_embedding.infer(query_pdf,cache_key=cache_no_embed_key_pdf,similarity_search_type="tfidf",similarity_threshold=0.3)
pprint(response_tfidf)

[32mINFO[0m: [1mRetrieving relevant context...[0m
[32mINFO[0m: [1mSelected 0 relevant chunks from cache[0m
[32mINFO[0m: [1mPerforming inference with filtered context...[0m


2025-01-22 18:53:59,656 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'The context does not provide sufficient information to answer this query.'


In [18]:
response_bm25 = cag_without_embedding.infer(query_pdf,cache_key=cache_no_embed_key,similarity_search_type="bm25",similarity_threshold=0.7)
pprint(response_bm25)

[32mINFO[0m: [1mRetrieving relevant context...[0m
[32mINFO[0m: [1mSelected 5 relevant chunks from cache[0m
[32mINFO[0m: [1mPerforming inference with filtered context...[0m


2025-01-22 18:43:29,137 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


('Users can earn tokens by participating in the verification process of '
 'computations performed by third-party executors. The earning process '
 'includes:\n'
 '\n'
 '- **Computation Tasks**: Users can submit computation tasks where they run a '
 'large language model (LLM) and submit the result. If the result is verified, '
 'they receive the token as a reward.\n'
 '- **Computation Rewards**: Users will earn tokens by successfully completing '
 'computations. The reward structure will be tiered based on the complexity of '
 'tasks completed, with higher rewards for more challenging verifications.\n'
 '- **Staking Rewards**: Users can stake their tokens to support network '
 'security and operations. In return, they will receive additional tokens as '
 'staking rewards, promoting long-term holding and reducing circulating '
 'supply.')


In [16]:
response_jaccard = cag_without_embedding.infer(query_pdf,cache_key=cache_no_embed_key,similarity_search_type="jaccard",similarity_threshold=0.1)
pprint(response_jaccard)

[32mINFO[0m: [1mRetrieving relevant context...[0m
[32mINFO[0m: [1mSelected 0 relevant chunks from cache[0m
[32mINFO[0m: [1mPerforming inference with filtered context...[0m


2025-01-22 18:54:56,373 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'The context does not provide sufficient information to answer this query.'


In [17]:
response_jaccard = cag_without_embedding.infer(query_pdf,cache_key=cache_no_embed_key,similarity_search_type="jaccard",similarity_threshold=0.1,smart_retrieval=True)
pprint(response_jaccard)

[32mINFO[0m: [1mRetrieving relevant context...[0m
[32mINFO[0m: [1mUsing smart retrieval[0m
[32mINFO[0m: [1mSelected 0 relevant chunks from cache[0m
[32mINFO[0m: [1mPerforming web search for additional context[0m


2025-01-22 18:55:43,705 - primp - INFO - response: https://html.duckduckgo.com/html 200 26971
2025-01-22 18:55:46,083 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32mINFO[0m: [1mRelevant doc[0m


2025-01-22 18:55:46,986 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32mINFO[0m: [1mRelevant doc[0m


2025-01-22 18:55:48,049 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32mINFO[0m: [1mRelevant doc[0m


2025-01-22 18:55:48,912 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32mINFO[0m: [1mRelevant doc[0m


2025-01-22 18:55:49,872 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32mINFO[0m: [1mRelevant doc[0m
[32mINFO[0m: [1mPerforming inference with filtered context...[0m


2025-01-22 18:55:51,932 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


('Users can earn tokens by using the Grass platform, as the more they engage '
 'with it, the more tokens they will accumulate. Additionally, users can earn '
 'tokens by participating in Play-to-Earn (P2E) games, such as Tamadoge, where '
 'players raise and nurture digital pets to earn TAMA tokens. Furthermore, '
 'users may also earn tokens through delegating tokens to validators, who can '
 'then generate rewards based on the tokens delegated to them.')
