In [1]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

reader = SimpleDirectoryReader(input_dir="./files")
documents = reader.load_data()

parser = SentenceSplitter()
# nodes = parser.get_nodes_from_documents(documents)
# print(vars(nodes[0]))


In [2]:
from llama_index.core import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.ingestion import IngestionPipeline, IngestionCache

# for jupyter notebooks
import nest_asyncio
nest_asyncio.apply()

# in-mem vector DB
import qdrant_client

client = qdrant_client.QdrantClient(location=":memory:")
vector_store = QdrantVectorStore(client=client, collection_name="test_store")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=200, chunk_overlap=0),
        TitleExtractor(),
        OpenAIEmbedding(),
    ],
    vector_store=vector_store,
)

# Save the cache ( not working for me atm )
pipeline.cache.persist("./pipeline_cache/llama_cache.json")
pipeline.cache = IngestionCache.from_persist_path("./pipeline_cache/llama_cache.json")

nodes = pipeline.run(documents=documents)



100%|██████████| 5/5 [00:01<00:00,  2.55it/s]
100%|██████████| 4/4 [00:00<00:00,  7.55it/s]
100%|██████████| 5/5 [00:00<00:00,  5.91it/s]
100%|██████████| 5/5 [00:00<00:00,  5.83it/s]
100%|██████████| 4/4 [00:00<00:00,  8.45it/s]
100%|██████████| 4/4 [00:01<00:00,  3.20it/s]
100%|██████████| 5/5 [00:01<00:00,  4.31it/s]
100%|██████████| 5/5 [00:00<00:00,  5.92it/s]
100%|██████████| 3/3 [00:00<00:00,  4.44it/s]
  self._client.create_payload_index(


In [3]:
# Create your index
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_vector_store(vector_store)

# querying

In [5]:
from llama_index.llms.openrouter import OpenRouter
import os

llm = OpenRouter(
    api_key=os.environ['OPENROUTER_API_KEY'],
    max_tokens=8192,
    context_window=128000,
    model="qwen/qwen2.5-vl-72b-instruct:free",
)

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query("what does commerce on the internet rely on")
print(response)

Commerce on the internet relies on trust-based models that involve financial institutions mediating disputes. This system, however, has inherent weaknesses, such as the inability to conduct completely non-reversible transactions and the increased costs associated with mediation. These factors limit the practicality of small transactions and introduce uncertainties in payments. To overcome these challenges, an electronic payment system based on cryptographic proof is proposed, which would allow direct transactions between parties without the need for a trusted third party. This system aims to protect sellers from fraud and buyers through routine escrow mechanisms, all while eliminating the need for a central authority to oversee transactions.


In [7]:
from llama_index.core.evaluation import FaithfulnessEvaluator

evaluator = FaithfulnessEvaluator(llm=llm)
response = query_engine.query("how does the paper define an electronic coin?")
print(response)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing

An electronic coin is defined as a chain of digital signatures. Each owner transfers the coin to the next by digitally signing a hash of the previous transaction and the public key of the next owner, then appending these to the end of the coin. This allows a payee to verify the signatures and confirm the chain of ownership.


True