In [1]:
import os
from PyPDF2 import PdfReader
from typing import List, Tuple, Dict, Any
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.chains import RetrievalQA
from datasets import load_dataset
from langchain.schema import Document
import numpy as np

# DASHSCOPE_API_KEY = os.getenv("DASHSCOPE_API_KEY")

In [2]:
from llama_cpp import Llama
from langchain.embeddings.base import Embeddings

# 自定义 LangChain 的 Embeddings 类封装
class LlamaCppEmbeddings(Embeddings):
    def __init__(self, model_path: str):
        self.llm = Llama(model_path=model_path, embedding=True)

    def embed_documents(self, texts):
        # return [self.llm.embed(text)["data"][0]["embedding"] for text in texts]
        embeddings = []
        for text in texts:
            result = self.llm.embed(text)
            if isinstance(result, list) and isinstance(result[0], list):
                embeddings.append(result[0])
            else:
                embeddings.append(result)
        return embeddings

    def embed_query(self, text):
        # return self.llm.embed(text)["data"][0]["embedding"]
        result = self.llm.embed(text)
        return result[0] if isinstance(result, list) and isinstance(result[0], list) else result

In [3]:

class Proof:
    """
    隐私证明数据结构
    """
    def __init__(self, doc_id: str, score: float, vector: List[float], proof_data: Any):
        self.doc_id = doc_id
        self.score = score
        self.vector = vector
        self.proof_data = proof_data

In [None]:
class Client:
    """
    轻量级rag客户端，负责数据集加载、向量存储构建与检索。
    """
    def __init__(self, model_path: str = "./models/Qwen3-Embedding/Qwen3-Embedding-0.6B-Q8_0.gguf", 
                vectorstore_path: str = "faiss_db"): # dashscope_api_key: str,使用api调用embedding模型
        os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.vectorstore_path = vectorstore_path
        # self.embeddings = DashScopeEmbeddings(
        #     model="text-embedding-v1",
        #     dashscope_api_key=dashscope_api_key
        # )
        self.embeddings = LlamaCppEmbeddings(model_path=model_path)
        self.db: FAISS = None

    def _chunk_text(self, text: str, chunk_size=1000, overlap= 200) -> list[str]:
        """
        将文本分块处理，使用递归字符分割器。
        """
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=overlap
        )
        return splitter.split_text(text)

    def _read_pdfs(self, pdf_paths: List[str]) -> str:
        text = []
        for path in pdf_paths:
            reader = PdfReader(path)
            for page in reader.pages:
                text.append(page.extract_text() or "")
        return "\n".join(text)

    def build_vectorstore_from_pdf(self, pdf_paths: list[str]) -> None:
        """
        处理pdf文件并构建FAISS向量存储。
        """
        raw = self._read_pdfs(pdf_paths)
        chunks = self._chunk_text(raw)
        self.db = FAISS.from_texts(chunks, embedding=self.embeddings)
        self.db.save_local(self.vectorstore_path)
        print(f"Vectorstore built at '{self.vectorstore_path}' with {len(chunks)} chunks.")

    def build_vectorstore_from_wiki(self, sample_size=100, batch_size=10):
        # 启用streaming模式在线读取huggingface datasets
        dataset = load_dataset("wikimedia/wikipedia", "20231101.en", streaming=True)
        iterator = iter(dataset["train"])
        texts = []
        count = 0
        for item in iterator:
            if count >= sample_size:
                break
            text = item.get("text", "")
            if text:
                texts.append(text)
                count += 1
        print(f"Total collected Wikipedia texts: {len(texts)}")

        # 分块并批量处理
        all_chunks = []
        for i, text in enumerate(texts):
            chunks = self._chunk_text(text)
            all_chunks.extend(chunks)

            # 每 batch_size 保存一次，防止内存溢出
            if len(all_chunks) >= batch_size or i == len(texts) - 1:
                if self.db is None:
                    self.db = FAISS.from_texts(all_chunks, embedding=self.embeddings)
                else:
                    self.db.add_texts(all_chunks)
                all_chunks.clear()
                print(f"Processed {i+1}/{len(texts)} articles...")

        # 保存向量库
        if self.db:
            self.db.save_local(self.vectorstore_path)
            print(f"Vectorstore saved to {self.vectorstore_path}")
        else:
            print("No data processed.")

    def load_vectorstore(self) -> None:
        """
        加载已保存的向量存储，并初始化检索器。
        """
        if not os.path.exists(self.vectorstore_path):
            raise FileNotFoundError(f"Vectorstore directory '{self.vectorstore_path}' not found.")
        self.db = FAISS.load_local(
            self.vectorstore_path,
            embeddings=self.embeddings,
            allow_dangerous_deserialization=True
        )
        self.retriever = self.db.as_retriever()
        print("Vectorstore loaded and retriever initialized.")

    def retrieve(self, query: str, top_k: int = 5) -> Tuple[List[Document], List[List[float]]]:
        """
        对输入 query 执行检索，返回 top_k 最相似文档及其向量。
        Output:
          docs: List[langchain.schema.Document]
          vectors: List[List[float]]
        """
        if self.db is None:
            raise RuntimeError("Vectorstore 未初始化，调用 load_vectorstore 或 build 方法先初始化。")
        # 生成 query 向量
        q_vec = self.embeddings.embed_query(query)
        print(q_vec)
        # 确保转换成 2D NumPy 数组，FAISS 要求 shape = (n_queries, dim)
        if isinstance(q_vec, list):
            q_vec = np.array(q_vec, dtype="float32").reshape(1, -1)
        # 使用 FAISS 原生 index.search
        D, I = self.db.index.search(q_vec, top_k)  # D: 距离, I: 索引ID
        docs = []
        vecs = []

        for idx in I[0]:
            if idx == -1:
                continue
            # 用 LangChain 的 docstore 获取 Document
            doc_id = self.db.index_to_docstore_id[idx]
            doc = self.db.docstore.search(doc_id)
            docs.append(doc)

            # 从 FAISS 中 reconstruct 向量
            vec = self.db.index.reconstruct(int(idx))
            vecs.append(vec)

        return docs, vecs
        # results = self.db.index.search(q_vec, top_k)
        # print(results)
        # ids, scores = results[1].tolist()[0], results[0].tolist()[0]
        # docs = [self.db.docstore.search(id) for id in ids]
        # vecs = [self.db.index.reconstruct(id) for id in ids]
        # return docs, vecs


    def query(self, question: str, top_k: int = 5) -> Tuple[List[Proof], List[float]]:
        """
        基于检索结果计算 Proof，并返回 proofs 列表及 query 向量。
        """
        docs, vecs = self.retrieve(question, top_k)
        proofs: List[Proof] = []
        for doc, vec in zip(docs, vecs):
            proof_data = {"doc_id": doc.metadata["source"], "merkle_path": []}
            proofs.append(Proof(doc.metadata.get("source", ""), score=0.0, vector=vec, proof_data=proof_data))
        q_vec = self.embeddings.embed_query(question)
        return proofs, q_vec
    
    # 这一段query包含了调用llm生成答案部分，一种是调用ollama部署的llm，一种是调用api并使用agent工具
    # def query(self, question: str) -> str:
    #     """
    #     基于已加载的向量存储进行查询，并返回生成的答案。
    #     """
    #     if self.retriever is None:
    #         raise RuntimeError("Retriever not initialized. Call 'load_vectorstore()' first.")
    #     llm = ChatOllama(model=self.ollama_model)
    #     qa_chain = RetrievalQA.from_chain_type(
    #         llm=llm,
    #         retriever=self.retriever,
    #         chain_type="stuff",
    #         return_source_documents=False
    #     )
    #     result = qa_chain.invoke({"query": question})
    #     return result["result"]
        # 创建检索工具
        # retrieval_tool = create_retriever_tool(
        #     self.retriever,
        #     name="pdf_extractor",
        #     description="Tool to answer queries based on the processed PDF content."
        # )

        # prompt = ChatPromptTemplate.from_messages([
        #     ("system", 
        #      """
        #      你是AI助手，请根据提供的上下文回答问题，确保提供所有细节，
        #      如果答案不在上下文中，请说 '答案不在上下文中'，不要提供错误的答案
        #      """),
        #     ("human", "{input}"),
        #     ("placeholder", "{agent_scratchpad}")
        # ])
        # agent = create_tool_calling_agent(llm, [retrieval_tool], prompt)
        # executor = AgentExecutor(agent=agent, tools=[retrieval_tool], verbose=False)
        # result = executor.invoke({"input": question})
        # return result.get('output', '')


In [None]:
# 初始化 RAG 客户端
client = Client()
# 构建或加载向量库
pdf_files = ['D:\RAGnet\paper.pdf']
client.build_vectorstore_from_pdf(pdf_files)
client.load_vectorstore()
docs, vecs = client.retrieve("What is the main topic of the paper?")

llama_model_loader: loaded meta data with 36 key-value pairs and 310 tensors from ./models/Qwen3-Embedding/Qwen3-Embedding-0.6B-Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen3
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen3 Embedding 0.6b
llama_model_loader: - kv   3:                           general.basename str              = qwen3-embedding
llama_model_loader: - kv   4:                         general.size_label str              = 0.6B
llama_model_loader: - kv   5:                            general.license str              = apache-2.0
llama_model_loader: - kv   6:                   general.base_model.count u32              = 1
llama_model_loader: - kv  

KeyboardInterrupt: 

In [5]:
client = Client()
client.build_vectorstore_from_wiki(sample_size=100, batch_size=10)

llama_model_loader: loaded meta data with 36 key-value pairs and 310 tensors from ./models/Qwen3-Embedding/Qwen3-Embedding-0.6B-Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen3
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen3 Embedding 0.6b
llama_model_loader: - kv   3:                           general.basename str              = qwen3-embedding
llama_model_loader: - kv   4:                         general.size_label str              = 0.6B
llama_model_loader: - kv   5:                            general.license str              = apache-2.0
llama_model_loader: - kv   6:                   general.base_model.count u32              = 1
llama_model_loader: - kv  

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Total collected Wikipedia texts: 100


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     534.01 ms /   100 tokens (    5.34 ms per token,   187.26 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     542.37 ms /   101 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1063.42 ms /   200 tokens (    5.32 ms per token,   188.07 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1077.61 ms /   201 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     244.87 ms /    46 tokens (    5.32 ms per token,   187.85 tokens per second)
llama_perf_context_print:        eval time = 

Processed 1/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1079.40 ms /   207 tokens (    5.21 ms per token,   191.77 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1087.11 ms /   208 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     983.51 ms /   187 tokens (    5.26 ms per token,   190.14 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     995.83 ms /   188 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1113.48 ms /   206 tokens (    5.41 ms per token,   185.01 tokens per second)
llama_perf_context_print:        eval time = 

Processed 2/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     830.32 ms /   163 tokens (    5.09 ms per token,   196.31 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     838.16 ms /   164 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     632.53 ms /   118 tokens (    5.36 ms per token,   186.55 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     640.12 ms /   119 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     777.06 ms /   151 tokens (    5.15 ms per token,   194.32 tokens per second)
llama_perf_context_print:        eval time = 

Processed 3/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     942.32 ms /   180 tokens (    5.24 ms per token,   191.02 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     950.54 ms /   181 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1217.42 ms /   222 tokens (    5.48 ms per token,   182.35 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1225.57 ms /   223 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     514.43 ms /    90 tokens (    5.72 ms per token,   174.95 tokens per second)
llama_perf_context_print:        eval time = 

Processed 4/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     705.03 ms /   118 tokens (    5.97 ms per token,   167.37 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     712.12 ms /   119 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1071.97 ms /   205 tokens (    5.23 ms per token,   191.24 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1079.62 ms /   206 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     677.80 ms /   121 tokens (    5.60 ms per token,   178.52 tokens per second)
llama_perf_context_print:        eval time = 

Processed 5/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     712.22 ms /   116 tokens (    6.14 ms per token,   162.87 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     718.75 ms /   117 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1142.85 ms /   214 tokens (    5.34 ms per token,   187.25 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1151.31 ms /   215 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     371.69 ms /    64 tokens (    5.81 ms per token,   172.19 tokens per second)
llama_perf_context_print:        eval time = 

Processed 6/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     545.23 ms /   102 tokens (    5.35 ms per token,   187.08 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     551.26 ms /   103 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     710.42 ms /   137 tokens (    5.19 ms per token,   192.84 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     716.67 ms /   138 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     446.45 ms /    87 tokens (    5.13 ms per token,   194.87 tokens per second)
llama_perf_context_print:        eval time = 

Processed 7/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1093.22 ms /   212 tokens (    5.16 ms per token,   193.92 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1101.96 ms /   213 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1407.58 ms /   218 tokens (    6.46 ms per token,   154.88 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1416.62 ms /   219 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1305.39 ms /   227 tokens (    5.75 ms per token,   173.89 tokens per second)
llama_perf_context_print:        eval time = 

Processed 8/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1416.70 ms /   264 tokens (    5.37 ms per token,   186.35 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1425.93 ms /   265 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1372.50 ms /   227 tokens (    6.05 ms per token,   165.39 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1380.12 ms /   228 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     859.66 ms /   161 tokens (    5.34 ms per token,   187.28 tokens per second)
llama_perf_context_print:        eval time = 

Processed 10/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1076.92 ms /   209 tokens (    5.15 ms per token,   194.07 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1086.11 ms /   210 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     660.95 ms /   112 tokens (    5.90 ms per token,   169.45 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     666.26 ms /   113 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1008.46 ms /   181 tokens (    5.57 ms per token,   179.48 tokens per second)
llama_perf_context_print:        eval time = 

Processed 13/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     941.55 ms /   185 tokens (    5.09 ms per token,   196.49 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     948.77 ms /   186 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     901.48 ms /   175 tokens (    5.15 ms per token,   194.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     909.54 ms /   176 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1034.51 ms /   200 tokens (    5.17 ms per token,   193.33 tokens per second)
llama_perf_context_print:        eval time = 

Processed 14/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     889.33 ms /   174 tokens (    5.11 ms per token,   195.65 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     897.59 ms /   175 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     590.41 ms /   113 tokens (    5.22 ms per token,   191.39 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     595.75 ms /   114 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     521.34 ms /   104 tokens (    5.01 ms per token,   199.49 tokens per second)
llama_perf_context_print:        eval time = 

Processed 15/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1124.45 ms /   201 tokens (    5.59 ms per token,   178.75 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1135.32 ms /   202 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1420.00 ms /   254 tokens (    5.59 ms per token,   178.87 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1428.53 ms /   255 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1441.73 ms /   257 tokens (    5.61 ms per token,   178.26 tokens per second)
llama_perf_context_print:        eval time = 

Processed 17/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1172.83 ms /   207 tokens (    5.67 ms per token,   176.50 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1182.04 ms /   208 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     702.82 ms /   119 tokens (    5.91 ms per token,   169.32 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     710.71 ms /   120 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1088.05 ms /   208 tokens (    5.23 ms per token,   191.17 tokens per second)
llama_perf_context_print:        eval time = 

Processed 18/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     527.67 ms /    97 tokens (    5.44 ms per token,   183.83 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     532.88 ms /    98 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     998.92 ms /   192 tokens (    5.20 ms per token,   192.21 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1006.22 ms /   193 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     625.12 ms /    93 tokens (    6.72 ms per token,   148.77 tokens per second)
llama_perf_context_print:        eval time = 

Processed 19/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     840.58 ms /   162 tokens (    5.19 ms per token,   192.72 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     849.65 ms /   163 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     726.78 ms /   144 tokens (    5.05 ms per token,   198.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     734.17 ms /   145 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1096.37 ms /   215 tokens (    5.10 ms per token,   196.10 tokens per second)
llama_perf_context_print:        eval time = 

Processed 20/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     726.66 ms /   137 tokens (    5.30 ms per token,   188.53 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     733.99 ms /   138 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     817.21 ms /   158 tokens (    5.17 ms per token,   193.34 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     824.93 ms /   159 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1117.35 ms /   190 tokens (    5.88 ms per token,   170.05 tokens per second)
llama_perf_context_print:        eval time = 

Processed 21/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1090.93 ms /   211 tokens (    5.17 ms per token,   193.41 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1099.36 ms /   212 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     669.31 ms /   132 tokens (    5.07 ms per token,   197.22 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     676.98 ms /   133 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     638.66 ms /   126 tokens (    5.07 ms per token,   197.29 tokens per second)
llama_perf_context_print:        eval time = 

Processed 22/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     816.17 ms /   157 tokens (    5.20 ms per token,   192.36 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     824.30 ms /   158 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1494.38 ms /   280 tokens (    5.34 ms per token,   187.37 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1503.61 ms /   281 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1551.03 ms /   297 tokens (    5.22 ms per token,   191.49 tokens per second)
llama_perf_context_print:        eval time = 

Processed 24/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     994.26 ms /   137 tokens (    7.26 ms per token,   137.79 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1003.03 ms /   138 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1391.14 ms /   199 tokens (    6.99 ms per token,   143.05 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1400.26 ms /   200 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1028.26 ms /   194 tokens (    5.30 ms per token,   188.67 tokens per second)
llama_perf_context_print:        eval time = 

Processed 25/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1067.45 ms /   204 tokens (    5.23 ms per token,   191.11 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1076.45 ms /   205 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     980.77 ms /   184 tokens (    5.33 ms per token,   187.61 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     988.53 ms /   185 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     349.69 ms /    72 tokens (    4.86 ms per token,   205.89 tokens per second)
llama_perf_context_print:        eval time = 

Processed 27/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1177.29 ms /   224 tokens (    5.26 ms per token,   190.27 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1187.16 ms /   225 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     433.24 ms /    87 tokens (    4.98 ms per token,   200.81 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     438.04 ms /    88 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     744.49 ms /   145 tokens (    5.13 ms per token,   194.76 tokens per second)
llama_perf_context_print:        eval time = 

Processed 28/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     776.52 ms /   151 tokens (    5.14 ms per token,   194.46 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     783.36 ms /   152 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1453.20 ms /   208 tokens (    6.99 ms per token,   143.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1466.02 ms /   209 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     915.72 ms /   151 tokens (    6.06 ms per token,   164.90 tokens per second)
llama_perf_context_print:        eval time = 

Processed 29/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1012.88 ms /   197 tokens (    5.14 ms per token,   194.50 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1021.59 ms /   198 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     309.13 ms /    58 tokens (    5.33 ms per token,   187.62 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     313.20 ms /    59 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1245.13 ms /   239 tokens (    5.21 ms per token,   191.95 tokens per second)
llama_perf_context_print:        eval time = 

Processed 30/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     871.95 ms /   168 tokens (    5.19 ms per token,   192.67 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     880.93 ms /   169 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1137.37 ms /   218 tokens (    5.22 ms per token,   191.67 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1146.57 ms /   219 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     914.30 ms /   167 tokens (    5.47 ms per token,   182.65 tokens per second)
llama_perf_context_print:        eval time = 

Processed 31/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     879.16 ms /   141 tokens (    6.24 ms per token,   160.38 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     886.67 ms /   142 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1024.25 ms /   157 tokens (    6.52 ms per token,   153.28 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1033.08 ms /   158 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1395.02 ms /   212 tokens (    6.58 ms per token,   151.97 tokens per second)
llama_perf_context_print:        eval time = 

Processed 32/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1043.62 ms /   143 tokens (    7.30 ms per token,   137.02 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1054.94 ms /   144 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     690.77 ms /   128 tokens (    5.40 ms per token,   185.30 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     699.60 ms /   129 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     776.06 ms /   149 tokens (    5.21 ms per token,   192.00 tokens per second)
llama_perf_context_print:        eval time = 

Processed 33/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     983.64 ms /   181 tokens (    5.43 ms per token,   184.01 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     993.46 ms /   182 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     935.09 ms /   181 tokens (    5.17 ms per token,   193.57 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     942.84 ms /   182 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     762.43 ms /   152 tokens (    5.02 ms per token,   199.36 tokens per second)
llama_perf_context_print:        eval time = 

Processed 34/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     579.25 ms /   107 tokens (    5.41 ms per token,   184.72 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     586.86 ms /   108 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     896.57 ms /   172 tokens (    5.21 ms per token,   191.84 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     903.78 ms /   173 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     537.07 ms /   108 tokens (    4.97 ms per token,   201.09 tokens per second)
llama_perf_context_print:        eval time = 

Processed 35/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1018.63 ms /   195 tokens (    5.22 ms per token,   191.43 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1027.63 ms /   196 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     635.94 ms /   120 tokens (    5.30 ms per token,   188.70 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     642.69 ms /   121 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     766.88 ms /   147 tokens (    5.22 ms per token,   191.69 tokens per second)
llama_perf_context_print:        eval time = 

Processed 36/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     570.85 ms /   105 tokens (    5.44 ms per token,   183.94 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     576.58 ms /   106 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     916.73 ms /   176 tokens (    5.21 ms per token,   191.99 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     924.34 ms /   177 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     919.12 ms /   179 tokens (    5.13 ms per token,   194.75 tokens per second)
llama_perf_context_print:        eval time = 

Processed 37/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     772.12 ms /   150 tokens (    5.15 ms per token,   194.27 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     780.12 ms /   151 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     732.15 ms /   137 tokens (    5.34 ms per token,   187.12 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     739.43 ms /   138 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     558.91 ms /   103 tokens (    5.43 ms per token,   184.29 tokens per second)
llama_perf_context_print:        eval time = 

Processed 38/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1056.18 ms /   203 tokens (    5.20 ms per token,   192.20 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1066.54 ms /   204 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     513.63 ms /    96 tokens (    5.35 ms per token,   186.90 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     518.88 ms /    97 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     973.07 ms /   190 tokens (    5.12 ms per token,   195.26 tokens per second)
llama_perf_context_print:        eval time = 

Processed 39/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1307.29 ms /   250 tokens (    5.23 ms per token,   191.23 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1316.78 ms /   251 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1279.91 ms /   229 tokens (    5.59 ms per token,   178.92 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1288.25 ms /   230 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     761.21 ms /   148 tokens (    5.14 ms per token,   194.43 tokens per second)
llama_perf_context_print:        eval time = 

Processed 41/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1023.73 ms /   195 tokens (    5.25 ms per token,   190.48 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1031.83 ms /   196 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     566.28 ms /   113 tokens (    5.01 ms per token,   199.55 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     571.93 ms /   114 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1290.69 ms /   219 tokens (    5.89 ms per token,   169.68 tokens per second)
llama_perf_context_print:        eval time = 

Processed 42/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     938.84 ms /   136 tokens (    6.90 ms per token,   144.86 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     946.54 ms /   137 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1065.71 ms /   175 tokens (    6.09 ms per token,   164.21 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1074.77 ms /   176 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     470.03 ms /    83 tokens (    5.66 ms per token,   176.58 tokens per second)
llama_perf_context_print:        eval time = 

Processed 43/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     741.62 ms /   141 tokens (    5.26 ms per token,   190.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     749.43 ms /   142 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     973.49 ms /   188 tokens (    5.18 ms per token,   193.12 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     981.00 ms /   189 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1175.13 ms /   219 tokens (    5.37 ms per token,   186.36 tokens per second)
llama_perf_context_print:        eval time = 

Processed 44/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     583.72 ms /    98 tokens (    5.96 ms per token,   167.89 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     590.16 ms /    99 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1034.90 ms /   175 tokens (    5.91 ms per token,   169.10 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1044.84 ms /   176 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1496.45 ms /   237 tokens (    6.31 ms per token,   158.38 tokens per second)
llama_perf_context_print:        eval time = 

Processed 45/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     827.32 ms /   135 tokens (    6.13 ms per token,   163.18 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     836.17 ms /   136 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1163.16 ms /   188 tokens (    6.19 ms per token,   161.63 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1172.61 ms /   189 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     851.67 ms /   141 tokens (    6.04 ms per token,   165.56 tokens per second)
llama_perf_context_print:        eval time = 

Processed 47/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1091.98 ms /   203 tokens (    5.38 ms per token,   185.90 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1101.71 ms /   204 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     867.32 ms /   160 tokens (    5.42 ms per token,   184.48 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     875.00 ms /   161 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1037.64 ms /   202 tokens (    5.14 ms per token,   194.67 tokens per second)
llama_perf_context_print:        eval time = 

Processed 48/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1059.37 ms /   199 tokens (    5.32 ms per token,   187.85 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1068.09 ms /   200 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1131.32 ms /   210 tokens (    5.39 ms per token,   185.62 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1145.37 ms /   211 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1085.89 ms /   203 tokens (    5.35 ms per token,   186.94 tokens per second)
llama_perf_context_print:        eval time = 

Processed 49/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     994.68 ms /   188 tokens (    5.29 ms per token,   189.00 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1004.33 ms /   189 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     725.76 ms /   139 tokens (    5.22 ms per token,   191.52 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     734.13 ms /   140 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     539.28 ms /   103 tokens (    5.24 ms per token,   190.99 tokens per second)
llama_perf_context_print:        eval time = 

Processed 50/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     604.12 ms /   115 tokens (    5.25 ms per token,   190.36 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     612.29 ms /   116 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     967.19 ms /   188 tokens (    5.14 ms per token,   194.38 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     975.84 ms /   189 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     786.50 ms /   148 tokens (    5.31 ms per token,   188.18 tokens per second)
llama_perf_context_print:        eval time = 

Processed 51/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     846.90 ms /   159 tokens (    5.33 ms per token,   187.74 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     855.90 ms /   160 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     781.03 ms /   151 tokens (    5.17 ms per token,   193.33 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     787.96 ms /   152 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     693.99 ms /   133 tokens (    5.22 ms per token,   191.64 tokens per second)
llama_perf_context_print:        eval time = 

Processed 52/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     855.84 ms /   153 tokens (    5.59 ms per token,   178.77 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     864.46 ms /   154 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     396.74 ms /    72 tokens (    5.51 ms per token,   181.48 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     401.98 ms /    73 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     902.00 ms /   174 tokens (    5.18 ms per token,   192.90 tokens per second)
llama_perf_context_print:        eval time = 

Processed 53/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     747.35 ms /   138 tokens (    5.42 ms per token,   184.65 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     755.24 ms /   139 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     705.48 ms /   130 tokens (    5.43 ms per token,   184.27 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     712.91 ms /   131 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     523.84 ms /   102 tokens (    5.14 ms per token,   194.71 tokens per second)
llama_perf_context_print:        eval time = 

Processed 54/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     598.32 ms /   111 tokens (    5.39 ms per token,   185.52 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     604.24 ms /   112 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1039.03 ms /   194 tokens (    5.36 ms per token,   186.71 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1048.17 ms /   195 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1031.05 ms /   191 tokens (    5.40 ms per token,   185.25 tokens per second)
llama_perf_context_print:        eval time = 

Processed 56/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     926.57 ms /   180 tokens (    5.15 ms per token,   194.27 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     935.12 ms /   181 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     831.97 ms /   156 tokens (    5.33 ms per token,   187.51 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     839.35 ms /   157 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1087.15 ms /   195 tokens (    5.58 ms per token,   179.37 tokens per second)
llama_perf_context_print:        eval time = 

Processed 57/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     431.09 ms /    80 tokens (    5.39 ms per token,   185.58 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     436.88 ms /    81 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     783.30 ms /   152 tokens (    5.15 ms per token,   194.05 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     791.48 ms /   153 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     974.83 ms /   185 tokens (    5.27 ms per token,   189.78 tokens per second)
llama_perf_context_print:        eval time = 

Processed 58/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     835.45 ms /   154 tokens (    5.42 ms per token,   184.33 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     843.89 ms /   155 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     807.93 ms /   154 tokens (    5.25 ms per token,   190.61 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     815.59 ms /   155 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     691.32 ms /   136 tokens (    5.08 ms per token,   196.73 tokens per second)
llama_perf_context_print:        eval time = 

Processed 59/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     744.31 ms /   136 tokens (    5.47 ms per token,   182.72 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     750.89 ms /   137 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     780.74 ms /   152 tokens (    5.14 ms per token,   194.69 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     788.71 ms /   153 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     934.44 ms /   180 tokens (    5.19 ms per token,   192.63 tokens per second)
llama_perf_context_print:        eval time = 

Processed 60/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     421.64 ms /    73 tokens (    5.78 ms per token,   173.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     430.09 ms /    74 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     977.33 ms /   185 tokens (    5.28 ms per token,   189.29 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     986.92 ms /   186 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     632.95 ms /   116 tokens (    5.46 ms per token,   183.27 tokens per second)
llama_perf_context_print:        eval time = 

Processed 61/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1111.59 ms /   208 tokens (    5.34 ms per token,   187.12 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1121.41 ms /   209 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     608.14 ms /   116 tokens (    5.24 ms per token,   190.74 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     614.77 ms /   117 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     926.90 ms /   180 tokens (    5.15 ms per token,   194.20 tokens per second)
llama_perf_context_print:        eval time = 

Processed 62/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     793.01 ms /   141 tokens (    5.62 ms per token,   177.80 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     802.69 ms /   142 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     737.22 ms /   138 tokens (    5.34 ms per token,   187.19 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     744.09 ms /   139 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     409.19 ms /    75 tokens (    5.46 ms per token,   183.29 tokens per second)
llama_perf_context_print:        eval time = 

Processed 63/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1021.16 ms /   195 tokens (    5.24 ms per token,   190.96 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1030.33 ms /   196 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     919.91 ms /   170 tokens (    5.41 ms per token,   184.80 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     934.69 ms /   171 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1129.10 ms /   209 tokens (    5.40 ms per token,   185.10 tokens per second)
llama_perf_context_print:        eval time = 

Processed 65/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     719.93 ms /   137 tokens (    5.25 ms per token,   190.30 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     727.29 ms /   138 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     881.54 ms /   166 tokens (    5.31 ms per token,   188.31 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     889.18 ms /   167 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     889.85 ms /   149 tokens (    5.97 ms per token,   167.44 tokens per second)
llama_perf_context_print:        eval time = 

Processed 66/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1124.91 ms /   213 tokens (    5.28 ms per token,   189.35 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1133.73 ms /   214 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     791.94 ms /   150 tokens (    5.28 ms per token,   189.41 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     798.58 ms /   151 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     960.12 ms /   183 tokens (    5.25 ms per token,   190.60 tokens per second)
llama_perf_context_print:        eval time = 

Processed 68/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1126.48 ms /   212 tokens (    5.31 ms per token,   188.20 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1137.18 ms /   213 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     856.79 ms /   161 tokens (    5.32 ms per token,   187.91 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     863.94 ms /   162 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     900.18 ms /   174 tokens (    5.17 ms per token,   193.29 tokens per second)
llama_perf_context_print:        eval time = 

Processed 69/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1257.54 ms /   233 tokens (    5.40 ms per token,   185.28 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1267.18 ms /   234 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1100.05 ms /   196 tokens (    5.61 ms per token,   178.17 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1110.94 ms /   197 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     580.07 ms /   104 tokens (    5.58 ms per token,   179.29 tokens per second)
llama_perf_context_print:        eval time = 

Processed 70/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     610.57 ms /   117 tokens (    5.22 ms per token,   191.62 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     617.23 ms /   118 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     746.49 ms /   140 tokens (    5.33 ms per token,   187.55 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     753.42 ms /   141 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     503.27 ms /    95 tokens (    5.30 ms per token,   188.77 tokens per second)
llama_perf_context_print:        eval time = 

Processed 71/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     946.15 ms /   179 tokens (    5.29 ms per token,   189.19 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     955.42 ms /   180 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     899.34 ms /   166 tokens (    5.42 ms per token,   184.58 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     907.18 ms /   167 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     857.40 ms /   163 tokens (    5.26 ms per token,   190.11 tokens per second)
llama_perf_context_print:        eval time = 

Processed 73/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     850.51 ms /   167 tokens (    5.09 ms per token,   196.35 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     859.11 ms /   168 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     782.74 ms /   147 tokens (    5.32 ms per token,   187.80 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     790.14 ms /   148 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1172.70 ms /   215 tokens (    5.45 ms per token,   183.34 tokens per second)
llama_perf_context_print:        eval time = 

Processed 74/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1000.24 ms /   188 tokens (    5.32 ms per token,   187.96 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1009.77 ms /   189 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     537.47 ms /   101 tokens (    5.32 ms per token,   187.92 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     543.18 ms /   102 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1316.53 ms /   190 tokens (    6.93 ms per token,   144.32 tokens per second)
llama_perf_context_print:        eval time = 

Processed 75/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     545.24 ms /    94 tokens (    5.80 ms per token,   172.40 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     552.24 ms /    95 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     415.74 ms /    78 tokens (    5.33 ms per token,   187.62 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     421.14 ms /    79 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1050.15 ms /   200 tokens (    5.25 ms per token,   190.45 tokens per second)
llama_perf_context_print:        eval time = 

Processed 77/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1298.18 ms /   221 tokens (    5.87 ms per token,   170.24 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1308.67 ms /   222 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     342.17 ms /    45 tokens (    7.60 ms per token,   131.51 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     349.70 ms /    46 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1163.83 ms /   216 tokens (    5.39 ms per token,   185.59 tokens per second)
llama_perf_context_print:        eval time = 

Processed 78/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     740.78 ms /   135 tokens (    5.49 ms per token,   182.24 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     747.58 ms /   136 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     537.95 ms /   104 tokens (    5.17 ms per token,   193.33 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     544.22 ms /   105 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     830.24 ms /   158 tokens (    5.25 ms per token,   190.31 tokens per second)
llama_perf_context_print:        eval time = 

Processed 79/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1182.70 ms /   215 tokens (    5.50 ms per token,   181.79 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1192.58 ms /   216 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1411.23 ms /   255 tokens (    5.53 ms per token,   180.69 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1420.94 ms /   256 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1159.13 ms /   217 tokens (    5.34 ms per token,   187.21 tokens per second)
llama_perf_context_print:        eval time = 

Processed 80/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     852.19 ms /   159 tokens (    5.36 ms per token,   186.58 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     861.67 ms /   160 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     384.80 ms /    74 tokens (    5.20 ms per token,   192.31 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     389.98 ms /    75 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1133.80 ms /   208 tokens (    5.45 ms per token,   183.45 tokens per second)
llama_perf_context_print:        eval time = 

Processed 81/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1115.24 ms /   202 tokens (    5.52 ms per token,   181.13 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1126.11 ms /   203 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     380.30 ms /    71 tokens (    5.36 ms per token,   186.69 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     385.24 ms /    72 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     989.44 ms /   190 tokens (    5.21 ms per token,   192.03 tokens per second)
llama_perf_context_print:        eval time = 

Processed 82/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1308.74 ms /   240 tokens (    5.45 ms per token,   183.38 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1321.47 ms /   241 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1917.95 ms /   336 tokens (    5.71 ms per token,   175.19 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1933.36 ms /   337 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     892.49 ms /   130 tokens (    6.87 ms per token,   145.66 tokens per second)
llama_perf_context_print:        eval time = 

Processed 84/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     244.12 ms /    33 tokens (    7.40 ms per token,   135.18 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     249.31 ms /    34 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1237.12 ms /   173 tokens (    7.15 ms per token,   139.84 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1246.18 ms /   174 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1508.57 ms /   195 tokens (    7.74 ms per token,   129.26 tokens per second)
llama_perf_context_print:        eval time = 

Processed 86/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     320.05 ms /    59 tokens (    5.42 ms per token,   184.35 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     325.81 ms /    60 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     401.09 ms /    75 tokens (    5.35 ms per token,   186.99 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     406.47 ms /    76 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     876.30 ms /   170 tokens (    5.15 ms per token,   194.00 tokens per second)
llama_perf_context_print:        eval time = 

Processed 87/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     749.13 ms /   143 tokens (    5.24 ms per token,   190.89 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     757.14 ms /   144 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1382.89 ms /   251 tokens (    5.51 ms per token,   181.50 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1393.35 ms /   252 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     685.40 ms /   108 tokens (    6.35 ms per token,   157.57 tokens per second)
llama_perf_context_print:        eval time = 

Processed 88/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1601.65 ms /   292 tokens (    5.49 ms per token,   182.31 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1613.69 ms /   293 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1962.16 ms /   327 tokens (    6.00 ms per token,   166.65 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1983.94 ms /   328 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1582.95 ms /   283 tokens (    5.59 ms per token,   178.78 tokens per second)
llama_perf_context_print:        eval time = 

Processed 90/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1123.96 ms /   208 tokens (    5.40 ms per token,   185.06 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1134.51 ms /   209 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     371.24 ms /    68 tokens (    5.46 ms per token,   183.17 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     375.91 ms /    69 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     977.88 ms /   187 tokens (    5.23 ms per token,   191.23 tokens per second)
llama_perf_context_print:        eval time = 

Processed 91/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     885.66 ms /   164 tokens (    5.40 ms per token,   185.17 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     894.01 ms /   165 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1136.41 ms /   211 tokens (    5.39 ms per token,   185.67 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1152.08 ms /   212 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1007.59 ms /   188 tokens (    5.36 ms per token,   186.58 tokens per second)
llama_perf_context_print:        eval time = 

Processed 92/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     950.08 ms /   176 tokens (    5.40 ms per token,   185.25 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     957.67 ms /   177 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1158.17 ms /   216 tokens (    5.36 ms per token,   186.50 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1173.93 ms /   217 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     378.97 ms /    73 tokens (    5.19 ms per token,   192.63 tokens per second)
llama_perf_context_print:        eval time = 

Processed 93/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     582.04 ms /    88 tokens (    6.61 ms per token,   151.19 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     588.37 ms /    89 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1031.22 ms /   189 tokens (    5.46 ms per token,   183.28 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1041.05 ms /   190 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1470.01 ms /   251 tokens (    5.86 ms per token,   170.75 tokens per second)
llama_perf_context_print:        eval time = 

Processed 94/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1235.84 ms /   215 tokens (    5.75 ms per token,   173.97 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1245.93 ms /   216 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     595.81 ms /    93 tokens (    6.41 ms per token,   156.09 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     601.80 ms /    94 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1220.31 ms /   216 tokens (    5.65 ms per token,   177.00 tokens per second)
llama_perf_context_print:        eval time = 

Processed 96/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     575.32 ms /   102 tokens (    5.64 ms per token,   177.29 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     582.57 ms /   103 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     510.49 ms /   103 tokens (    4.96 ms per token,   201.77 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     516.21 ms /   104 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     656.38 ms /   119 tokens (    5.52 ms per token,   181.30 tokens per second)
llama_perf_context_print:        eval time = 

Processed 97/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1089.62 ms /   205 tokens (    5.32 ms per token,   188.14 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1099.44 ms /   206 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1146.07 ms /   215 tokens (    5.33 ms per token,   187.60 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1155.89 ms /   216 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     974.46 ms /   175 tokens (    5.57 ms per token,   179.59 tokens per second)
llama_perf_context_print:        eval time = 

Processed 98/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     932.98 ms /   175 tokens (    5.33 ms per token,   187.57 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     941.74 ms /   176 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     958.11 ms /   181 tokens (    5.29 ms per token,   188.91 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     966.49 ms /   182 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     933.33 ms /   178 tokens (    5.24 ms per token,   190.71 tokens per second)
llama_perf_context_print:        eval time = 

Processed 99/100 articles...


llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1107.17 ms /   208 tokens (    5.32 ms per token,   187.87 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1116.86 ms /   209 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =    1288.92 ms /   231 tokens (    5.58 ms per token,   179.22 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1297.92 ms /   232 tokens
llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =     697.45 ms /   126 tokens (    5.54 ms per token,   180.66 tokens per second)
llama_perf_context_print:        eval time = 

Processed 100/100 articles...
Vectorstore saved to faiss_db


In [6]:
client.load_vectorstore()

Vectorstore loaded and retriever initialized.


In [7]:
docs, vecs = client.retrieve("What is Abkhaz alphabet?", top_k=5)

llama_perf_context_print:        load time =     539.69 ms
llama_perf_context_print: prompt eval time =      88.81 ms /     8 tokens (   11.10 ms per token,    90.08 tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =      97.99 ms /     9 tokens


[3.9952266216278076, -16.86469268798828, 0.44079113006591797, -12.166587829589844, 2.918513774871826, -1.7121270895004272, 1.0849204063415527, -50.32857894897461, 1.9396106004714966, 6.704031467437744, -2.515451192855835, -1.750204086303711, 15.22193431854248, 0.6257795691490173, -7.34955358505249, 2.6439383029937744, -12.371712684631348, 4.164959907531738, -17.326148986816406, 3.081815481185913, -0.8964845538139343, -0.3004414737224579, -3.47270131111145, -6.245206832885742, -0.5454325675964355, -1.021567940711975, -2.2698965072631836, -15.29393196105957, -0.0789736956357956, -2.1437089443206787, 6.799513816833496, 5.4126877784729, -11.917704582214355, 1.8241513967514038, 0.07318972796201706, 0.7108349204063416, -1.1951684951782227, -2.7781360149383545, -3.081969976425171, 0.49996235966682434, 2.287668466567993, -0.5753247141838074, 1.0012001991271973, -1.489026665687561, -2.2935118675231934, -7.600603103637695, 2.375309944152832, 2.7748098373413086, 1.7590562105178833, 2.028740644454