# Postgres RAG

## Define the  Service Context

In [1]:
import os

from llama_index import SimpleDirectoryReader

input_path = os.path.expanduser("~/iCloud/nvAlt/")
# documents_path = os.path.expanduser("~/Desktop/tmp/nvAlt/")
documents = SimpleDirectoryReader(
    input_dir=input_path,
    exclude_hidden=True,
    exclude=["Notes & Settings"]
).load_data()

In [2]:
from llama_index import (
  LangchainEmbedding,
  ServiceContext,
  VectorStoreIndex,
  set_global_service_context,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

model_path = os.path.expanduser("~/ai/models/llama2/llama-2-13b-chat.Q5_K_M.gguf")
llm = LlamaCPP(
    model_path=model_path,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens
    context_window=4096,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/kevinkirkup/ai/models/llama2/llama-2-13b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5

## Create the Vector Database

### Make sure we set the PGVector Vector Size since we are using HuggingFaceEmbedding

https://github.com/langchain-ai/langchain/pull/3964

In [3]:
os.environ["PGVECTOR_VECTOR_SIZE"] = "768"

In [7]:
import psycopg2

from llama_index.vector_stores import PGVectorStore
from llama_index import StorageContext

CONNECTION_STRING = "postgresql://mercury:m3ssenger@localhost:5432/postgres"
DATABASE_NAME = "nvalt_vector_db"

conn = psycopg2.connect(CONNECTION_STRING)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {DATABASE_NAME}")
    c.execute(f"CREATE DATABASE {DATABASE_NAME}")

## Create the Vector Storage context

In [8]:
from sqlalchemy import make_url

url = make_url(CONNECTION_STRING)
vector_store = PGVectorStore.from_params(
    database=DATABASE_NAME,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="vector_data",
    embed_dim=768,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

# Create Tools for extracting Text

## Create the Metadata Extractors

In [9]:
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    KeywordExtractor,
    MetadataExtractor,
    TitleExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor
)
from llama_index.text_splitter import TokenTextSplitter

model_path = os.path.expanduser("~/ai/models/llama2/llama-2-13b-chat.Q5_K_M.gguf")
summary_llm = LlamaCPP(
    model_path=model_path,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens
    context_window=4096,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

metadata_extractor = MetadataExtractor(
    extractors=[
        TitleExtractor(nodes=5),
        # QuestionsAnsweredExtractor(questions=3),
        SummaryExtractor(llm=summary_llm),
        KeywordExtractor(keywords=5),
    ],
)
text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

node_parser = SimpleNodeParser.from_defaults(
    metadata_extractor=metadata_extractor,
    text_splitter=text_splitter,
)

******
Could not load OpenAI model. Using default LlamaCPP=llama2-13b-chat. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

******
******
Could not load OpenAI model. Using default LlamaCPP=llama2-13b-chat. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

******


llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/kevinkirkup/ai/models/llama2/llama-2-13b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5

## Parse the doucuments we want to index

In [10]:
nodes = node_parser.get_nodes_from_documents(
    documents=documents,
    show_progress=True,
)

Parsing documents into nodes:   0%|          | 0/192 [00:00<?, ?it/s]


llama_print_timings:        load time =  1763.77 ms
llama_print_timings:      sample time =    20.78 ms /    32 runs   (    0.65 ms per token,  1539.94 tokens per second)
llama_print_timings: prompt eval time =  2296.48 ms /   658 tokens (    3.49 ms per token,   286.53 tokens per second)
llama_print_timings:        eval time =   916.53 ms /    31 runs   (   29.57 ms per token,    33.82 tokens per second)
llama_print_timings:       total time =  3269.48 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1763.77 ms
llama_print_timings:      sample time =    42.49 ms /    66 runs   (    0.64 ms per token,  1553.42 tokens per second)
llama_print_timings: prompt eval time =  1770.87 ms /   542 tokens (    3.27 ms per token,   306.06 tokens per second)
llama_print_timings:        eval time =  1907.06 ms /    65 runs   (   29.34 ms per token,    34.08 tokens per second)
llama_print_timings:       total time =  3784.58 ms
Llama.generate: prefix-match hit

llama_pri

Extracting summaries:   0%|          | 0/642 [00:00<?, ?it/s]


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    87.51 ms /   125 runs   (    0.70 ms per token,  1428.34 tokens per second)
llama_print_timings: prompt eval time =  3309.41 ms /   800 tokens (    4.14 ms per token,   241.74 tokens per second)
llama_print_timings:        eval time =  5030.92 ms /   124 runs   (   40.57 ms per token,    24.65 tokens per second)
llama_print_timings:       total time =  8565.58 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   120.42 ms /   173 runs   (    0.70 ms per token,  1436.59 tokens per second)
llama_print_timings: prompt eval time =  2240.64 ms /   536 tokens (    4.18 ms per token,   239.22 tokens per second)
llama_print_timings:        eval time =  6948.50 ms /   172 runs   (   40.40 ms per token,    24.75 tokens per second)
llama_print_timings:       total time =  9500.05 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   121.66 ms /   176 runs   (    0.69 ms per token,  1446.71 tokens per second)
llama_print_timings: prompt eval time =  2256.09 ms /   543 tokens (    4.15 ms per token,   240.68 tokens per second)
llama_print_timings:        eval time =  7146.50 ms /   175 runs   (   40.84 ms per token,    24.49 tokens per second)
llama_print_timings:       total time =  9715.04 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   165.33 ms /   238 runs   (    0.69 ms per token,  1439.55 tokens per second)
llama_print_timings: prompt eval time =  2239.95 ms /   532 tokens (    4.21 ms per token,   237.51 tokens per second)
llama_print_timings:        eval time =  9669.01 ms /   237 runs   (   40.80 ms per token,    24.51 tokens per second)
llama_print_timings:       total time = 12340.64 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   107.72 ms /   154 runs   (    0.70 ms per token,  1429.67 tokens per second)
llama_print_timings: prompt eval time =  2354.71 ms /   567 tokens (    4.15 ms per token,   240.79 tokens per second)
llama_print_timings:        eval time =  6220.52 ms /   153 runs   (   40.66 ms per token,    24.60 tokens per second)
llama_print_timings:       total time =  8855.63 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   112.22 ms /   160 runs   (    0.70 ms per token,  1425.83 tokens per second)
llama_print_timings: prompt eval time =  2489.27 ms /   577 tokens (    4.31 ms per token,   231.79 tokens per second)
llama_print_timings:        eval time =  6453.42 ms /   159 runs   (   40.59 ms per token,    24.64 tokens per second)
llama_print_timings:       total time =  9234.32 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    81.77 ms /   118 runs   (    0.69 ms per token,  1443.02 tokens per second)
llama_print_timings: prompt eval time =  2490.55 ms /   579 tokens (    4.30 ms per token,   232.48 tokens per second)
llama_print_timings:        eval time =  4775.43 ms /   117 runs   (   40.82 ms per token,    24.50 tokens per second)
llama_print_timings:       total time =  7472.30 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   110.76 ms /   158 runs   (    0.70 ms per token,  1426.49 tokens per second)
llama_print_timings: prompt eval time =  2364.44 ms /   570 tokens (    4.15 ms per token,   241.07 tokens per second)
llama_print_timings:        eval time =  6406.38 ms /   157 runs   (   40.80 ms per token,    24.51 tokens per second)
llama_print_timings:       total time =  9052.78 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    99.29 ms /   148 runs   (    0.67 ms per token,  1490.63 tokens per second)
llama_print_timings: prompt eval time =  2354.12 ms /   568 tokens (    4.14 ms per token,   241.28 tokens per second)
llama_print_timings:        eval time =  5993.51 ms /   147 runs   (   40.77 ms per token,    24.53 tokens per second)
llama_print_timings:       total time =  8607.15 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    79.06 ms /   118 runs   (    0.67 ms per token,  1492.58 tokens per second)
llama_print_timings: prompt eval time =  2025.40 ms /   483 tokens (    4.19 ms per token,   238.47 tokens per second)
llama_print_timings:        eval time =  4722.94 ms /   117 runs   (   40.37 ms per token,    24.77 tokens per second)
llama_print_timings:       total time =  6950.55 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   171.47 ms /   256 runs   (    0.67 ms per token,  1492.99 tokens per second)
llama_print_timings: prompt eval time =  2504.90 ms /   597 tokens (    4.20 ms per token,   238.33 tokens per second)
llama_print_timings:        eval time = 10499.36 ms /   255 runs   (   41.17 ms per token,    24.29 tokens per second)
llama_print_timings:       total time = 13456.50 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   105.97 ms /   158 runs   (    0.67 ms per token,  1490.99 tokens per second)
llama_print_timings: prompt eval time =  2485.67 ms /   588 tokens (    4.23 ms per token,   236.56 tokens per second)
llama_print_timings:        eval time =  6421.87 ms /   157 runs   (   40.90 ms per token,    24.45 tokens per second)
llama_print_timings:       total time =  9181.42 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   139.65 ms /   198 runs   (    0.71 ms per token,  1417.84 tokens per second)
llama_print_timings: prompt eval time =  1783.34 ms /   418 tokens (    4.27 ms per token,   234.39 tokens per second)
llama_print_timings:        eval time =  7922.34 ms /   197 runs   (   40.21 ms per token,    24.87 tokens per second)
llama_print_timings:       total time = 10070.49 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   203.00 ms /   256 runs   (    0.79 ms per token,  1261.10 tokens per second)
llama_print_timings: prompt eval time =  2046.11 ms /   488 tokens (    4.19 ms per token,   238.50 tokens per second)
llama_print_timings:        eval time = 10476.23 ms /   255 runs   (   41.08 ms per token,    24.34 tokens per second)
llama_print_timings:       total time = 13067.15 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   183.79 ms /   256 runs   (    0.72 ms per token,  1392.88 tokens per second)
llama_print_timings: prompt eval time =  2588.85 ms /   531 tokens (    4.88 ms per token,   205.11 tokens per second)
llama_print_timings:        eval time = 10754.02 ms /   255 runs   (   42.17 ms per token,    23.71 tokens per second)
llama_print_timings:       total time = 13830.14 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   144.20 ms /   202 runs   (    0.71 ms per token,  1400.82 tokens per second)
llama_print_timings: prompt eval time =  1685.94 ms /   372 tokens (    4.53 ms per token,   220.65 tokens per second)
llama_print_timings:        eval time =  8316.43 ms /   201 runs   (   41.38 ms per token,    24.17 tokens per second)
llama_print_timings:       total time = 10376.99 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   138.90 ms /   196 runs   (    0.71 ms per token,  1411.08 tokens per second)
llama_print_timings: prompt eval time =  2743.55 ms /   544 tokens (    5.04 ms per token,   198.28 tokens per second)
llama_print_timings:        eval time =  8484.30 ms /   195 runs   (   43.51 ms per token,    22.98 tokens per second)
llama_print_timings:       total time = 11602.71 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   137.84 ms /   190 runs   (    0.73 ms per token,  1378.36 tokens per second)
llama_print_timings: prompt eval time =  2774.55 ms /   557 tokens (    4.98 ms per token,   200.75 tokens per second)
llama_print_timings:        eval time =  8278.12 ms /   189 runs   (   43.80 ms per token,    22.83 tokens per second)
llama_print_timings:       total time = 11426.19 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   186.45 ms /   256 runs   (    0.73 ms per token,  1373.04 tokens per second)
llama_print_timings: prompt eval time =  2695.64 ms /   545 tokens (    4.95 ms per token,   202.18 tokens per second)
llama_print_timings:        eval time = 10681.74 ms /   255 runs   (   41.89 ms per token,    23.87 tokens per second)
llama_print_timings:       total time = 13862.99 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   167.17 ms /   232 runs   (    0.72 ms per token,  1387.83 tokens per second)
llama_print_timings: prompt eval time =  2694.35 ms /   556 tokens (    4.85 ms per token,   206.36 tokens per second)
llama_print_timings:        eval time =  9709.87 ms /   231 runs   (   42.03 ms per token,    23.79 tokens per second)
llama_print_timings:       total time = 12841.01 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   135.53 ms /   189 runs   (    0.72 ms per token,  1394.51 tokens per second)
llama_print_timings: prompt eval time =  2365.39 ms /   512 tokens (    4.62 ms per token,   216.46 tokens per second)
llama_print_timings:        eval time =  8063.75 ms /   188 runs   (   42.89 ms per token,    23.31 tokens per second)
llama_print_timings:       total time = 10793.05 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   184.62 ms /   254 runs   (    0.73 ms per token,  1375.78 tokens per second)
llama_print_timings: prompt eval time =  2630.68 ms /   530 tokens (    4.96 ms per token,   201.47 tokens per second)
llama_print_timings:        eval time = 10707.89 ms /   253 runs   (   42.32 ms per token,    23.63 tokens per second)
llama_print_timings:       total time = 13821.11 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   143.87 ms /   200 runs   (    0.72 ms per token,  1390.11 tokens per second)
llama_print_timings: prompt eval time =   993.86 ms /   194 tokens (    5.12 ms per token,   195.20 tokens per second)
llama_print_timings:        eval time =  8174.69 ms /   199 runs   (   41.08 ms per token,    24.34 tokens per second)
llama_print_timings:       total time =  9546.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   173.78 ms /   241 runs   (    0.72 ms per token,  1386.81 tokens per second)
llama_print_timings: prompt eval time =  2623.87 ms /   521 tokens (    5.04 ms per token,   198.56 tokens per second)
llama_print_timings:        eval time = 10091.44 ms /   240 runs   (   42.05 ms per token,    23.78 tokens per second)
llama_print_timings:       total time = 13178.16 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   186.38 ms /   256 runs   (    0.73 ms per token,  1373.52 tokens per second)
llama_print_timings: prompt eval time =   991.63 ms /   202 tokens (    4.91 ms per token,   203.71 tokens per second)
llama_print_timings:        eval time = 10528.84 ms /   255 runs   (   41.29 ms per token,    24.22 tokens per second)
llama_print_timings:       total time = 11992.44 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   125.75 ms /   175 runs   (    0.72 ms per token,  1391.66 tokens per second)
llama_print_timings: prompt eval time =   701.04 ms /   160 tokens (    4.38 ms per token,   228.23 tokens per second)
llama_print_timings:        eval time =  7109.78 ms /   174 runs   (   40.86 ms per token,    24.47 tokens per second)
llama_print_timings:       total time =  8128.49 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   158.78 ms /   221 runs   (    0.72 ms per token,  1391.84 tokens per second)
llama_print_timings: prompt eval time =  1334.45 ms /   299 tokens (    4.46 ms per token,   224.06 tokens per second)
llama_print_timings:        eval time =  9140.28 ms /   220 runs   (   41.55 ms per token,    24.07 tokens per second)
llama_print_timings:       total time = 10882.29 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    93.53 ms /   130 runs   (    0.72 ms per token,  1389.93 tokens per second)
llama_print_timings: prompt eval time =   690.68 ms /   129 tokens (    5.35 ms per token,   186.77 tokens per second)
llama_print_timings:        eval time =  5269.94 ms /   129 runs   (   40.85 ms per token,    24.48 tokens per second)
llama_print_timings:       total time =  6198.46 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   168.43 ms /   233 runs   (    0.72 ms per token,  1383.36 tokens per second)
llama_print_timings: prompt eval time =  1125.72 ms /   244 tokens (    4.61 ms per token,   216.75 tokens per second)
llama_print_timings:        eval time =  9536.97 ms /   232 runs   (   41.11 ms per token,    24.33 tokens per second)
llama_print_timings:       total time = 11097.66 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   173.79 ms /   240 runs   (    0.72 ms per token,  1380.99 tokens per second)
llama_print_timings: prompt eval time =  2703.73 ms /   574 tokens (    4.71 ms per token,   212.30 tokens per second)
llama_print_timings:        eval time = 10016.04 ms /   239 runs   (   41.91 ms per token,    23.86 tokens per second)
llama_print_timings:       total time = 13175.55 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   124.07 ms /   172 runs   (    0.72 ms per token,  1386.27 tokens per second)
llama_print_timings: prompt eval time =  2735.55 ms /   570 tokens (    4.80 ms per token,   208.37 tokens per second)
llama_print_timings:        eval time =  7248.66 ms /   171 runs   (   42.39 ms per token,    23.59 tokens per second)
llama_print_timings:       total time = 10311.66 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   182.94 ms /   256 runs   (    0.71 ms per token,  1399.38 tokens per second)
llama_print_timings: prompt eval time =  2869.61 ms /   583 tokens (    4.92 ms per token,   203.16 tokens per second)
llama_print_timings:        eval time = 10789.85 ms /   255 runs   (   42.31 ms per token,    23.63 tokens per second)
llama_print_timings:       total time = 14142.65 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   182.93 ms /   256 runs   (    0.71 ms per token,  1399.43 tokens per second)
llama_print_timings: prompt eval time =  1870.06 ms /   389 tokens (    4.81 ms per token,   208.01 tokens per second)
llama_print_timings:        eval time = 10877.31 ms /   255 runs   (   42.66 ms per token,    23.44 tokens per second)
llama_print_timings:       total time = 13238.00 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   133.88 ms /   186 runs   (    0.72 ms per token,  1389.30 tokens per second)
llama_print_timings: prompt eval time =  1016.63 ms /   198 tokens (    5.13 ms per token,   194.76 tokens per second)
llama_print_timings:        eval time =  7660.38 ms /   185 runs   (   41.41 ms per token,    24.15 tokens per second)
llama_print_timings:       total time =  9023.15 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   103.48 ms /   144 runs   (    0.72 ms per token,  1391.51 tokens per second)
llama_print_timings: prompt eval time =  2069.42 ms /   430 tokens (    4.81 ms per token,   207.79 tokens per second)
llama_print_timings:        eval time =  5894.15 ms /   143 runs   (   41.22 ms per token,    24.26 tokens per second)
llama_print_timings:       total time =  8223.68 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   105.36 ms /   145 runs   (    0.73 ms per token,  1376.23 tokens per second)
llama_print_timings: prompt eval time =  1920.38 ms /   416 tokens (    4.62 ms per token,   216.62 tokens per second)
llama_print_timings:        eval time =  5959.72 ms /   144 runs   (   41.39 ms per token,    24.16 tokens per second)
llama_print_timings:       total time =  8142.04 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   143.36 ms /   204 runs   (    0.70 ms per token,  1423.03 tokens per second)
llama_print_timings: prompt eval time =  2183.51 ms /   441 tokens (    4.95 ms per token,   201.97 tokens per second)
llama_print_timings:        eval time =  8705.24 ms /   203 runs   (   42.88 ms per token,    23.32 tokens per second)
llama_print_timings:       total time = 11272.62 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   129.60 ms /   184 runs   (    0.70 ms per token,  1419.79 tokens per second)
llama_print_timings: prompt eval time =  3016.55 ms /   595 tokens (    5.07 ms per token,   197.25 tokens per second)
llama_print_timings:        eval time =  7837.44 ms /   183 runs   (   42.83 ms per token,    23.35 tokens per second)
llama_print_timings:       total time = 11202.65 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   146.51 ms /   209 runs   (    0.70 ms per token,  1426.56 tokens per second)
llama_print_timings: prompt eval time =  2835.04 ms /   565 tokens (    5.02 ms per token,   199.29 tokens per second)
llama_print_timings:        eval time =  8874.96 ms /   208 runs   (   42.67 ms per token,    23.44 tokens per second)
llama_print_timings:       total time = 12105.77 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   140.20 ms /   197 runs   (    0.71 ms per token,  1405.12 tokens per second)
llama_print_timings: prompt eval time =  1732.30 ms /   376 tokens (    4.61 ms per token,   217.05 tokens per second)
llama_print_timings:        eval time =  8156.31 ms /   196 runs   (   41.61 ms per token,    24.03 tokens per second)
llama_print_timings:       total time = 10250.87 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   118.50 ms /   168 runs   (    0.71 ms per token,  1417.73 tokens per second)
llama_print_timings: prompt eval time =  1027.79 ms /   219 tokens (    4.69 ms per token,   213.08 tokens per second)
llama_print_timings:        eval time =  6922.14 ms /   167 runs   (   41.45 ms per token,    24.13 tokens per second)
llama_print_timings:       total time =  8263.29 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   129.30 ms /   181 runs   (    0.71 ms per token,  1399.90 tokens per second)
llama_print_timings: prompt eval time =  2947.25 ms /   591 tokens (    4.99 ms per token,   200.53 tokens per second)
llama_print_timings:        eval time =  7605.80 ms /   180 runs   (   42.25 ms per token,    23.67 tokens per second)
llama_print_timings:       total time = 10886.56 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   154.20 ms /   216 runs   (    0.71 ms per token,  1400.80 tokens per second)
llama_print_timings: prompt eval time =  2986.81 ms /   594 tokens (    5.03 ms per token,   198.87 tokens per second)
llama_print_timings:        eval time =  9187.21 ms /   215 runs   (   42.73 ms per token,    23.40 tokens per second)
llama_print_timings:       total time = 12585.09 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   182.62 ms /   256 runs   (    0.71 ms per token,  1401.79 tokens per second)
llama_print_timings: prompt eval time =  2843.56 ms /   575 tokens (    4.95 ms per token,   202.21 tokens per second)
llama_print_timings:        eval time = 11054.51 ms /   255 runs   (   43.35 ms per token,    23.07 tokens per second)
llama_print_timings:       total time = 14380.11 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   184.58 ms /   256 runs   (    0.72 ms per token,  1386.97 tokens per second)
llama_print_timings: prompt eval time =  2712.99 ms /   576 tokens (    4.71 ms per token,   212.31 tokens per second)
llama_print_timings:        eval time = 10804.61 ms /   255 runs   (   42.37 ms per token,    23.60 tokens per second)
llama_print_timings:       total time = 13999.31 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   130.95 ms /   183 runs   (    0.72 ms per token,  1397.46 tokens per second)
llama_print_timings: prompt eval time =  2861.45 ms /   582 tokens (    4.92 ms per token,   203.39 tokens per second)
llama_print_timings:        eval time =  7595.26 ms /   182 runs   (   41.73 ms per token,    23.96 tokens per second)
llama_print_timings:       total time = 10791.65 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   186.79 ms /   256 runs   (    0.73 ms per token,  1370.54 tokens per second)
llama_print_timings: prompt eval time =  2889.14 ms /   592 tokens (    4.88 ms per token,   204.90 tokens per second)
llama_print_timings:        eval time = 10959.36 ms /   255 runs   (   42.98 ms per token,    23.27 tokens per second)
llama_print_timings:       total time = 14354.58 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   154.13 ms /   213 runs   (    0.72 ms per token,  1381.91 tokens per second)
llama_print_timings: prompt eval time =  3162.65 ms /   616 tokens (    5.13 ms per token,   194.77 tokens per second)
llama_print_timings:        eval time =  9096.81 ms /   212 runs   (   42.91 ms per token,    23.30 tokens per second)
llama_print_timings:       total time = 12680.22 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   143.22 ms /   196 runs   (    0.73 ms per token,  1368.52 tokens per second)
llama_print_timings: prompt eval time =   465.68 ms /    96 tokens (    4.85 ms per token,   206.15 tokens per second)
llama_print_timings:        eval time =  7957.48 ms /   195 runs   (   40.81 ms per token,    24.51 tokens per second)
llama_print_timings:       total time =  8788.45 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   156.98 ms /   218 runs   (    0.72 ms per token,  1388.73 tokens per second)
llama_print_timings: prompt eval time =  1271.08 ms /   280 tokens (    4.54 ms per token,   220.29 tokens per second)
llama_print_timings:        eval time =  8989.03 ms /   217 runs   (   41.42 ms per token,    24.14 tokens per second)
llama_print_timings:       total time = 10661.63 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   103.53 ms /   145 runs   (    0.71 ms per token,  1400.52 tokens per second)
llama_print_timings: prompt eval time =  2402.08 ms /   510 tokens (    4.71 ms per token,   212.32 tokens per second)
llama_print_timings:        eval time =  5935.22 ms /   144 runs   (   41.22 ms per token,    24.26 tokens per second)
llama_print_timings:       total time =  8604.06 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   115.81 ms /   162 runs   (    0.71 ms per token,  1398.82 tokens per second)
llama_print_timings: prompt eval time =  1253.40 ms /   266 tokens (    4.71 ms per token,   212.22 tokens per second)
llama_print_timings:        eval time =  6608.31 ms /   161 runs   (   41.05 ms per token,    24.36 tokens per second)
llama_print_timings:       total time =  8153.31 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    86.82 ms /   122 runs   (    0.71 ms per token,  1405.22 tokens per second)
llama_print_timings: prompt eval time =   703.37 ms /   130 tokens (    5.41 ms per token,   184.83 tokens per second)
llama_print_timings:        eval time =  4978.86 ms /   121 runs   (   41.15 ms per token,    24.30 tokens per second)
llama_print_timings:       total time =  5905.63 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =    85.36 ms /   121 runs   (    0.71 ms per token,  1417.48 tokens per second)
llama_print_timings: prompt eval time =   466.68 ms /    82 tokens (    5.69 ms per token,   175.71 tokens per second)
llama_print_timings:        eval time =  4945.20 ms /   120 runs   (   41.21 ms per token,    24.27 tokens per second)
llama_print_timings:       total time =  5633.38 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   107.90 ms /   151 runs   (    0.71 ms per token,  1399.47 tokens per second)
llama_print_timings: prompt eval time =   713.55 ms /   139 tokens (    5.13 ms per token,   194.80 tokens per second)
llama_print_timings:        eval time =  6216.50 ms /   150 runs   (   41.44 ms per token,    24.13 tokens per second)
llama_print_timings:       total time =  7209.57 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   110.15 ms /   153 runs   (    0.72 ms per token,  1389.04 tokens per second)
llama_print_timings: prompt eval time =   570.72 ms /   121 tokens (    4.72 ms per token,   212.01 tokens per second)
llama_print_timings:        eval time =  6182.76 ms /   152 runs   (   40.68 ms per token,    24.58 tokens per second)
llama_print_timings:       total time =  7029.12 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   105.61 ms /   149 runs   (    0.71 ms per token,  1410.85 tokens per second)
llama_print_timings: prompt eval time =  1550.35 ms /   304 tokens (    5.10 ms per token,   196.09 tokens per second)
llama_print_timings:        eval time =  6377.26 ms /   148 runs   (   43.09 ms per token,    23.21 tokens per second)
llama_print_timings:       total time =  8220.87 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   101.16 ms /   142 runs   (    0.71 ms per token,  1403.70 tokens per second)
llama_print_timings: prompt eval time =   911.93 ms /   167 tokens (    5.46 ms per token,   183.13 tokens per second)
llama_print_timings:        eval time =  5848.44 ms /   141 runs   (   41.48 ms per token,    24.11 tokens per second)
llama_print_timings:       total time =  7031.74 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   144.08 ms /   202 runs   (    0.71 ms per token,  1402.00 tokens per second)
llama_print_timings: prompt eval time =  3248.87 ms /   638 tokens (    5.09 ms per token,   196.38 tokens per second)
llama_print_timings:        eval time =  8919.36 ms /   201 runs   (   44.37 ms per token,    22.54 tokens per second)
llama_print_timings:       total time = 12588.74 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   103.28 ms /   144 runs   (    0.72 ms per token,  1394.20 tokens per second)
llama_print_timings: prompt eval time =  3539.41 ms /   696 tokens (    5.09 ms per token,   196.64 tokens per second)
llama_print_timings:        eval time =  6240.52 ms /   143 runs   (   43.64 ms per token,    22.91 tokens per second)
llama_print_timings:       total time = 10079.35 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   183.26 ms /   256 runs   (    0.72 ms per token,  1396.94 tokens per second)
llama_print_timings: prompt eval time =  1971.23 ms /   391 tokens (    5.04 ms per token,   198.35 tokens per second)
llama_print_timings:        eval time = 11108.41 ms /   255 runs   (   43.56 ms per token,    22.96 tokens per second)
llama_print_timings:       total time = 13619.81 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   146.26 ms /   204 runs   (    0.72 ms per token,  1394.80 tokens per second)
llama_print_timings: prompt eval time =   755.99 ms /   160 tokens (    4.72 ms per token,   211.64 tokens per second)
llama_print_timings:        eval time =  8690.47 ms /   203 runs   (   42.81 ms per token,    23.36 tokens per second)
llama_print_timings:       total time =  9872.61 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   181.85 ms /   256 runs   (    0.71 ms per token,  1407.73 tokens per second)
llama_print_timings: prompt eval time =  1330.93 ms /   238 tokens (    5.59 ms per token,   178.82 tokens per second)
llama_print_timings:        eval time = 11510.28 ms /   255 runs   (   45.14 ms per token,    22.15 tokens per second)
llama_print_timings:       total time = 13383.75 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   135.20 ms /   188 runs   (    0.72 ms per token,  1390.49 tokens per second)
llama_print_timings: prompt eval time =   515.03 ms /    78 tokens (    6.60 ms per token,   151.45 tokens per second)
llama_print_timings:        eval time =  8196.69 ms /   187 runs   (   43.83 ms per token,    22.81 tokens per second)
llama_print_timings:       total time =  9109.28 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   129.56 ms /   182 runs   (    0.71 ms per token,  1404.78 tokens per second)
llama_print_timings: prompt eval time =  2929.45 ms /   535 tokens (    5.48 ms per token,   182.63 tokens per second)
llama_print_timings:        eval time =  8007.42 ms /   181 runs   (   44.24 ms per token,    22.60 tokens per second)
llama_print_timings:       total time = 11319.56 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   139.80 ms /   196 runs   (    0.71 ms per token,  1402.01 tokens per second)
llama_print_timings: prompt eval time =  2036.18 ms /   390 tokens (    5.22 ms per token,   191.53 tokens per second)
llama_print_timings:        eval time =  8566.07 ms /   195 runs   (   43.93 ms per token,    22.76 tokens per second)
llama_print_timings:       total time = 11013.78 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   174.20 ms /   243 runs   (    0.72 ms per token,  1394.93 tokens per second)
llama_print_timings: prompt eval time =  1986.71 ms /   377 tokens (    5.27 ms per token,   189.76 tokens per second)
llama_print_timings:        eval time = 11194.47 ms /   242 runs   (   46.26 ms per token,    21.62 tokens per second)
llama_print_timings:       total time = 13698.72 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   123.54 ms /   173 runs   (    0.71 ms per token,  1400.33 tokens per second)
llama_print_timings: prompt eval time =  2518.79 ms /   469 tokens (    5.37 ms per token,   186.20 tokens per second)
llama_print_timings:        eval time =  7956.22 ms /   172 runs   (   46.26 ms per token,    21.62 tokens per second)
llama_print_timings:       total time = 10841.57 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   182.86 ms /   256 runs   (    0.71 ms per token,  1400.00 tokens per second)
llama_print_timings: prompt eval time =  2770.83 ms /   530 tokens (    5.23 ms per token,   191.28 tokens per second)
llama_print_timings:        eval time = 11301.70 ms /   255 runs   (   44.32 ms per token,    22.56 tokens per second)
llama_print_timings:       total time = 14616.42 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   183.21 ms /   256 runs   (    0.72 ms per token,  1397.30 tokens per second)
llama_print_timings: prompt eval time =  2355.41 ms /   460 tokens (    5.12 ms per token,   195.29 tokens per second)
llama_print_timings:        eval time = 11245.38 ms /   255 runs   (   44.10 ms per token,    22.68 tokens per second)
llama_print_timings:       total time = 14144.49 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   182.53 ms /   256 runs   (    0.71 ms per token,  1402.53 tokens per second)
llama_print_timings: prompt eval time =  2943.66 ms /   546 tokens (    5.39 ms per token,   185.48 tokens per second)
llama_print_timings:        eval time = 11451.63 ms /   255 runs   (   44.91 ms per token,    22.27 tokens per second)
llama_print_timings:       total time = 14936.14 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   133.85 ms /   189 runs   (    0.71 ms per token,  1412.02 tokens per second)
llama_print_timings: prompt eval time =  1062.95 ms /   196 tokens (    5.42 ms per token,   184.39 tokens per second)
llama_print_timings:        eval time =  8214.18 ms /   188 runs   (   43.69 ms per token,    22.89 tokens per second)
llama_print_timings:       total time =  9670.50 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   118.20 ms /   165 runs   (    0.72 ms per token,  1395.99 tokens per second)
llama_print_timings: prompt eval time =   724.15 ms /   157 tokens (    4.61 ms per token,   216.80 tokens per second)
llama_print_timings:        eval time =  6741.96 ms /   164 runs   (   41.11 ms per token,    24.33 tokens per second)
llama_print_timings:       total time =  7774.72 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   181.22 ms /   256 runs   (    0.71 ms per token,  1412.63 tokens per second)
llama_print_timings: prompt eval time =  2220.85 ms /   432 tokens (    5.14 ms per token,   194.52 tokens per second)
llama_print_timings:        eval time = 11114.84 ms /   255 runs   (   43.59 ms per token,    22.94 tokens per second)
llama_print_timings:       total time = 13835.59 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   140.65 ms /   197 runs   (    0.71 ms per token,  1400.62 tokens per second)
llama_print_timings: prompt eval time =  1921.67 ms /   409 tokens (    4.70 ms per token,   212.84 tokens per second)
llama_print_timings:        eval time =  8194.67 ms /   196 runs   (   41.81 ms per token,    23.92 tokens per second)
llama_print_timings:       total time = 10484.30 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  2261.89 ms
llama_print_timings:      sample time =   104.69 ms /   147 runs   (    0.71 ms per token,  1404.09 tokens per second)
llama_print_timings: prompt eval time =   868.33 ms /   180 tokens (    4.82 ms per token,   207.29 tokens per second)
llama_print_timings:        eval time =  5968.79 ms /   146 runs   (   40.88 ms per token,    24.46 tokens per second)
llama_print_timings:       total time =  7107.73 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    23.37 ms /    32 runs   (    0.73 ms per token,  1369.57 tokens per second)
llama_print_timings: prompt eval time =  1683.27 ms /   514 tokens (    3.27 ms per token,   305.36 tokens per second)
llama_print_timings:        eval time =   916.62 ms /    31 runs   (   29.57 ms per token,    33.82 tokens per second)
llama_print_timings:       total time =  2662.74 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    30.02 ms /    42 runs   (    0.71 ms per token,  1399.16 tokens per second)
llama_print_timings: prompt eval time =   792.98 ms /   251 tokens (    3.16 ms per token,   316.53 tokens per second)
llama_print_timings:        eval time =  1169.99 ms /    41 runs   (   28.54 ms per token,    35.04 tokens per second)
llama_print_timings:       total time =  2040.24 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    35.04 ms /    49 runs   (    0.72 ms per token,  1398.40 tokens per second)
llama_print_timings: prompt eval time =  1866.02 ms /   557 tokens (    3.35 ms per token,   298.50 tokens per second)
llama_print_timings:        eval time =  1439.55 ms /    48 runs   (   29.99 ms per token,    33.34 tokens per second)
llama_print_timings:       total time =  3392.94 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   180.98 ms /   256 runs   (    0.71 ms per token,  1414.49 tokens per second)
llama_print_timings: prompt eval time =  1767.86 ms /   515 tokens (    3.43 ms per token,   291.31 tokens per second)
llama_print_timings:        eval time =  7769.92 ms /   255 runs   (   30.47 ms per token,    32.82 tokens per second)
llama_print_timings:       total time = 10013.56 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.40 ms /    45 runs   (    0.70 ms per token,  1433.08 tokens per second)
llama_print_timings: prompt eval time =  1983.72 ms /   585 tokens (    3.39 ms per token,   294.90 tokens per second)
llama_print_timings:        eval time =  1324.43 ms /    44 runs   (   30.10 ms per token,    33.22 tokens per second)
llama_print_timings:       total time =  3386.54 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   182.31 ms /   256 runs   (    0.71 ms per token,  1404.19 tokens per second)
llama_print_timings: prompt eval time =  1980.60 ms /   583 tokens (    3.40 ms per token,   294.36 tokens per second)
llama_print_timings:        eval time =  7779.16 ms /   255 runs   (   30.51 ms per token,    32.78 tokens per second)
llama_print_timings:       total time = 10230.03 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    37.28 ms /    52 runs   (    0.72 ms per token,  1394.92 tokens per second)
llama_print_timings: prompt eval time =  1933.00 ms /   564 tokens (    3.43 ms per token,   291.77 tokens per second)
llama_print_timings:        eval time =  1524.72 ms /    51 runs   (   29.90 ms per token,    33.45 tokens per second)
llama_print_timings:       total time =  3550.98 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.06 ms /    43 runs   (    0.72 ms per token,  1384.37 tokens per second)
llama_print_timings: prompt eval time =  2041.44 ms /   580 tokens (    3.52 ms per token,   284.11 tokens per second)
llama_print_timings:        eval time =  1261.65 ms /    42 runs   (   30.04 ms per token,    33.29 tokens per second)
llama_print_timings:       total time =  3382.27 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.18 ms /    43 runs   (    0.73 ms per token,  1379.13 tokens per second)
llama_print_timings: prompt eval time =  1994.76 ms /   560 tokens (    3.56 ms per token,   280.74 tokens per second)
llama_print_timings:        eval time =  1250.01 ms /    42 runs   (   29.76 ms per token,    33.60 tokens per second)
llama_print_timings:       total time =  3328.55 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.53 ms /    44 runs   (    0.72 ms per token,  1395.72 tokens per second)
llama_print_timings: prompt eval time =  2001.03 ms /   567 tokens (    3.53 ms per token,   283.35 tokens per second)
llama_print_timings:        eval time =  1287.48 ms /    43 runs   (   29.94 ms per token,    33.40 tokens per second)
llama_print_timings:       total time =  3368.58 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   183.82 ms /   256 runs   (    0.72 ms per token,  1392.68 tokens per second)
llama_print_timings: prompt eval time =   481.44 ms /   141 tokens (    3.41 ms per token,   292.87 tokens per second)
llama_print_timings:        eval time =  7242.02 ms /   255 runs   (   28.40 ms per token,    35.21 tokens per second)
llama_print_timings:       total time =  8205.65 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    36.07 ms /    50 runs   (    0.72 ms per token,  1386.12 tokens per second)
llama_print_timings: prompt eval time =  2002.12 ms /   608 tokens (    3.29 ms per token,   303.68 tokens per second)
llama_print_timings:        eval time =  1470.28 ms /    49 runs   (   30.01 ms per token,    33.33 tokens per second)
llama_print_timings:       total time =  3563.88 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    35.83 ms /    50 runs   (    0.72 ms per token,  1395.36 tokens per second)
llama_print_timings: prompt eval time =  1287.04 ms /   408 tokens (    3.15 ms per token,   317.01 tokens per second)
llama_print_timings:        eval time =  1418.17 ms /    49 runs   (   28.94 ms per token,    34.55 tokens per second)
llama_print_timings:       total time =  2796.59 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    30.49 ms /    43 runs   (    0.71 ms per token,  1410.34 tokens per second)
llama_print_timings: prompt eval time =   324.59 ms /    89 tokens (    3.65 ms per token,   274.19 tokens per second)
llama_print_timings:        eval time =  1170.76 ms /    42 runs   (   27.88 ms per token,    35.87 tokens per second)
llama_print_timings:       total time =  1570.98 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   179.57 ms /   256 runs   (    0.70 ms per token,  1425.65 tokens per second)
llama_print_timings: prompt eval time =  1624.52 ms /   503 tokens (    3.23 ms per token,   309.63 tokens per second)
llama_print_timings:        eval time =  7661.11 ms /   255 runs   (   30.04 ms per token,    33.29 tokens per second)
llama_print_timings:       total time =  9752.57 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    34.90 ms /    48 runs   (    0.73 ms per token,  1375.56 tokens per second)
llama_print_timings: prompt eval time =   688.39 ms /   208 tokens (    3.31 ms per token,   302.16 tokens per second)
llama_print_timings:        eval time =  1329.22 ms /    47 runs   (   28.28 ms per token,    35.36 tokens per second)
llama_print_timings:       total time =  2103.57 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.75 ms /    38 runs   (    0.73 ms per token,  1369.12 tokens per second)
llama_print_timings: prompt eval time =  1259.15 ms /   384 tokens (    3.28 ms per token,   304.97 tokens per second)
llama_print_timings:        eval time =  1063.61 ms /    37 runs   (   28.75 ms per token,    34.79 tokens per second)
llama_print_timings:       total time =  2397.23 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.56 ms /    40 runs   (    0.69 ms per token,  1451.43 tokens per second)
llama_print_timings: prompt eval time =   928.35 ms /   288 tokens (    3.22 ms per token,   310.23 tokens per second)
llama_print_timings:        eval time =  1122.08 ms /    39 runs   (   28.77 ms per token,    34.76 tokens per second)
llama_print_timings:       total time =  2119.70 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    32.42 ms /    45 runs   (    0.72 ms per token,  1388.20 tokens per second)
llama_print_timings: prompt eval time =  1687.61 ms /   487 tokens (    3.47 ms per token,   288.57 tokens per second)
llama_print_timings:        eval time =  1295.18 ms /    44 runs   (   29.44 ms per token,    33.97 tokens per second)
llama_print_timings:       total time =  3065.37 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.64 ms /    44 runs   (    0.72 ms per token,  1390.56 tokens per second)
llama_print_timings: prompt eval time =  2101.49 ms /   587 tokens (    3.58 ms per token,   279.33 tokens per second)
llama_print_timings:        eval time =  1291.85 ms /    43 runs   (   30.04 ms per token,    33.29 tokens per second)
llama_print_timings:       total time =  3473.10 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    34.80 ms /    50 runs   (    0.70 ms per token,  1436.86 tokens per second)
llama_print_timings: prompt eval time =  1917.93 ms /   549 tokens (    3.49 ms per token,   286.25 tokens per second)
llama_print_timings:        eval time =  1462.83 ms /    49 runs   (   29.85 ms per token,    33.50 tokens per second)
llama_print_timings:       total time =  3469.81 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    25.86 ms /    37 runs   (    0.70 ms per token,  1431.00 tokens per second)
llama_print_timings: prompt eval time =   585.88 ms /   174 tokens (    3.37 ms per token,   296.99 tokens per second)
llama_print_timings:        eval time =  1008.00 ms /    36 runs   (   28.00 ms per token,    35.71 tokens per second)
llama_print_timings:       total time =  1658.97 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    28.13 ms /    39 runs   (    0.72 ms per token,  1386.47 tokens per second)
llama_print_timings: prompt eval time =  1192.47 ms /   380 tokens (    3.14 ms per token,   318.67 tokens per second)
llama_print_timings:        eval time =  1104.52 ms /    38 runs   (   29.07 ms per token,    34.40 tokens per second)
llama_print_timings:       total time =  2367.83 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    28.91 ms /    41 runs   (    0.71 ms per token,  1418.29 tokens per second)
llama_print_timings: prompt eval time =  1089.02 ms /   327 tokens (    3.33 ms per token,   300.27 tokens per second)
llama_print_timings:        eval time =  1159.58 ms /    40 runs   (   28.99 ms per token,    34.50 tokens per second)
llama_print_timings:       total time =  2320.65 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.54 ms /    40 runs   (    0.69 ms per token,  1452.38 tokens per second)
llama_print_timings: prompt eval time =   394.38 ms /   102 tokens (    3.87 ms per token,   258.64 tokens per second)
llama_print_timings:        eval time =  1087.17 ms /    39 runs   (   27.88 ms per token,    35.87 tokens per second)
llama_print_timings:       total time =  1551.34 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.98 ms /    41 runs   (    0.68 ms per token,  1465.28 tokens per second)
llama_print_timings: prompt eval time =  1985.36 ms /   562 tokens (    3.53 ms per token,   283.07 tokens per second)
llama_print_timings:        eval time =  1195.74 ms /    40 runs   (   29.89 ms per token,    33.45 tokens per second)
llama_print_timings:       total time =  3253.94 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.46 ms /    40 runs   (    0.69 ms per token,  1456.93 tokens per second)
llama_print_timings: prompt eval time =  1945.79 ms /   547 tokens (    3.56 ms per token,   281.12 tokens per second)
llama_print_timings:        eval time =  1164.37 ms /    39 runs   (   29.86 ms per token,    33.49 tokens per second)
llama_print_timings:       total time =  3179.87 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    40.55 ms /    59 runs   (    0.69 ms per token,  1454.99 tokens per second)
llama_print_timings: prompt eval time =  1873.54 ms /   543 tokens (    3.45 ms per token,   289.83 tokens per second)
llama_print_timings:        eval time =  1738.69 ms /    58 runs   (   29.98 ms per token,    33.36 tokens per second)
llama_print_timings:       total time =  3715.25 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   177.96 ms /   256 runs   (    0.70 ms per token,  1438.55 tokens per second)
llama_print_timings: prompt eval time =  2030.26 ms /   490 tokens (    4.14 ms per token,   241.35 tokens per second)
llama_print_timings:        eval time =  7655.37 ms /   255 runs   (   30.02 ms per token,    33.31 tokens per second)
llama_print_timings:       total time = 10174.92 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   177.75 ms /   256 runs   (    0.69 ms per token,  1440.19 tokens per second)
llama_print_timings: prompt eval time =   852.39 ms /   244 tokens (    3.49 ms per token,   286.26 tokens per second)
llama_print_timings:        eval time =  7366.74 ms /   255 runs   (   28.89 ms per token,    34.62 tokens per second)
llama_print_timings:       total time =  8717.02 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    33.83 ms /    49 runs   (    0.69 ms per token,  1448.42 tokens per second)
llama_print_timings: prompt eval time =  2029.12 ms /   525 tokens (    3.86 ms per token,   258.73 tokens per second)
llama_print_timings:        eval time =  1440.63 ms /    48 runs   (   30.01 ms per token,    33.32 tokens per second)
llama_print_timings:       total time =  3557.51 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    36.07 ms /    49 runs   (    0.74 ms per token,  1358.51 tokens per second)
llama_print_timings: prompt eval time =  1972.46 ms /   525 tokens (    3.76 ms per token,   266.17 tokens per second)
llama_print_timings:        eval time =  1439.67 ms /    48 runs   (   29.99 ms per token,    33.34 tokens per second)
llama_print_timings:       total time =  3504.84 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   176.51 ms /   256 runs   (    0.69 ms per token,  1450.37 tokens per second)
llama_print_timings: prompt eval time =  1629.03 ms /   426 tokens (    3.82 ms per token,   261.51 tokens per second)
llama_print_timings:        eval time =  7617.05 ms /   255 runs   (   29.87 ms per token,    33.48 tokens per second)
llama_print_timings:       total time =  9719.11 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    30.97 ms /    45 runs   (    0.69 ms per token,  1452.83 tokens per second)
llama_print_timings: prompt eval time =   402.23 ms /   110 tokens (    3.66 ms per token,   273.48 tokens per second)
llama_print_timings:        eval time =  1229.19 ms /    44 runs   (   27.94 ms per token,    35.80 tokens per second)
llama_print_timings:       total time =  1711.63 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    23.92 ms /    35 runs   (    0.68 ms per token,  1463.15 tokens per second)
llama_print_timings: prompt eval time =   824.70 ms /   250 tokens (    3.30 ms per token,   303.14 tokens per second)
llama_print_timings:        eval time =   976.08 ms /    34 runs   (   28.71 ms per token,    34.83 tokens per second)
llama_print_timings:       total time =  1861.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    30.05 ms /    44 runs   (    0.68 ms per token,  1464.42 tokens per second)
llama_print_timings: prompt eval time =   216.81 ms /    51 tokens (    4.25 ms per token,   235.23 tokens per second)
llama_print_timings:        eval time =  1196.93 ms /    43 runs   (   27.84 ms per token,    35.93 tokens per second)
llama_print_timings:       total time =  1489.69 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    22.41 ms /    32 runs   (    0.70 ms per token,  1427.62 tokens per second)
llama_print_timings: prompt eval time =  2168.32 ms /   595 tokens (    3.64 ms per token,   274.41 tokens per second)
llama_print_timings:        eval time =   940.50 ms /    31 runs   (   30.34 ms per token,    32.96 tokens per second)
llama_print_timings:       total time =  3165.60 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    20.88 ms /    30 runs   (    0.70 ms per token,  1436.85 tokens per second)
llama_print_timings: prompt eval time =  2176.04 ms /   606 tokens (    3.59 ms per token,   278.49 tokens per second)
llama_print_timings:        eval time =   876.42 ms /    29 runs   (   30.22 ms per token,    33.09 tokens per second)
llama_print_timings:       total time =  3111.04 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   177.05 ms /   256 runs   (    0.69 ms per token,  1445.89 tokens per second)
llama_print_timings: prompt eval time =  2160.72 ms /   580 tokens (    3.73 ms per token,   268.43 tokens per second)
llama_print_timings:        eval time =  7817.88 ms /   255 runs   (   30.66 ms per token,    32.62 tokens per second)
llama_print_timings:       total time = 10454.73 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   177.13 ms /   256 runs   (    0.69 ms per token,  1445.26 tokens per second)
llama_print_timings: prompt eval time =   698.97 ms /   217 tokens (    3.22 ms per token,   310.46 tokens per second)
llama_print_timings:        eval time =  7334.34 ms /   255 runs   (   28.76 ms per token,    34.77 tokens per second)
llama_print_timings:       total time =  8521.06 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    64.55 ms /    94 runs   (    0.69 ms per token,  1456.26 tokens per second)
llama_print_timings: prompt eval time =  2235.40 ms /   600 tokens (    3.73 ms per token,   268.41 tokens per second)
llama_print_timings:        eval time =  2809.56 ms /    93 runs   (   30.21 ms per token,    33.10 tokens per second)
llama_print_timings:       total time =  5212.10 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    35.14 ms /    51 runs   (    0.69 ms per token,  1451.46 tokens per second)
llama_print_timings: prompt eval time =  2004.07 ms /   564 tokens (    3.55 ms per token,   281.43 tokens per second)
llama_print_timings:        eval time =  1501.87 ms /    50 runs   (   30.04 ms per token,    33.29 tokens per second)
llama_print_timings:       total time =  3595.70 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.98 ms /    47 runs   (    0.68 ms per token,  1469.48 tokens per second)
llama_print_timings: prompt eval time =  2137.61 ms /   587 tokens (    3.64 ms per token,   274.61 tokens per second)
llama_print_timings:        eval time =  1386.21 ms /    46 runs   (   30.14 ms per token,    33.18 tokens per second)
llama_print_timings:       total time =  3606.85 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.77 ms /    46 runs   (    0.69 ms per token,  1448.04 tokens per second)
llama_print_timings: prompt eval time =  2098.38 ms /   578 tokens (    3.63 ms per token,   275.45 tokens per second)
llama_print_timings:        eval time =  1352.64 ms /    45 runs   (   30.06 ms per token,    33.27 tokens per second)
llama_print_timings:       total time =  3531.01 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    44.27 ms /    65 runs   (    0.68 ms per token,  1468.43 tokens per second)
llama_print_timings: prompt eval time =  1865.55 ms /   521 tokens (    3.58 ms per token,   279.27 tokens per second)
llama_print_timings:        eval time =  1909.23 ms /    64 runs   (   29.83 ms per token,    33.52 tokens per second)
llama_print_timings:       total time =  3887.45 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   176.02 ms /   256 runs   (    0.69 ms per token,  1454.36 tokens per second)
llama_print_timings: prompt eval time =  2293.91 ms /   648 tokens (    3.54 ms per token,   282.49 tokens per second)
llama_print_timings:        eval time =  7863.41 ms /   255 runs   (   30.84 ms per token,    32.43 tokens per second)
llama_print_timings:       total time = 10632.23 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    29.53 ms /    43 runs   (    0.69 ms per token,  1456.15 tokens per second)
llama_print_timings: prompt eval time =  2088.86 ms /   608 tokens (    3.44 ms per token,   291.07 tokens per second)
llama_print_timings:        eval time =  1271.77 ms /    42 runs   (   30.28 ms per token,    33.02 tokens per second)
llama_print_timings:       total time =  3437.02 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    43.58 ms /    63 runs   (    0.69 ms per token,  1445.62 tokens per second)
llama_print_timings: prompt eval time =  2092.06 ms /   594 tokens (    3.52 ms per token,   283.93 tokens per second)
llama_print_timings:        eval time =  1875.52 ms /    62 runs   (   30.25 ms per token,    33.06 tokens per second)
llama_print_timings:       total time =  4080.70 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.25 ms /    45 runs   (    0.69 ms per token,  1439.95 tokens per second)
llama_print_timings: prompt eval time =  1654.01 ms /   485 tokens (    3.41 ms per token,   293.23 tokens per second)
llama_print_timings:        eval time =  1307.95 ms /    44 runs   (   29.73 ms per token,    33.64 tokens per second)
llama_print_timings:       total time =  3041.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    28.92 ms /    42 runs   (    0.69 ms per token,  1452.28 tokens per second)
llama_print_timings: prompt eval time =   895.06 ms /   274 tokens (    3.27 ms per token,   306.13 tokens per second)
llama_print_timings:        eval time =  1179.36 ms /    41 runs   (   28.76 ms per token,    34.76 tokens per second)
llama_print_timings:       total time =  2149.86 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.01 ms /    45 runs   (    0.69 ms per token,  1450.96 tokens per second)
llama_print_timings: prompt eval time =  1972.86 ms /   605 tokens (    3.26 ms per token,   306.66 tokens per second)
llama_print_timings:        eval time =  1325.09 ms /    44 runs   (   30.12 ms per token,    33.21 tokens per second)
llama_print_timings:       total time =  3377.62 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    29.96 ms /    43 runs   (    0.70 ms per token,  1435.29 tokens per second)
llama_print_timings: prompt eval time =  1983.05 ms /   563 tokens (    3.52 ms per token,   283.91 tokens per second)
llama_print_timings:        eval time =  1248.81 ms /    42 runs   (   29.73 ms per token,    33.63 tokens per second)
llama_print_timings:       total time =  3309.51 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.67 ms /    46 runs   (    0.69 ms per token,  1452.34 tokens per second)
llama_print_timings: prompt eval time =   893.34 ms /   257 tokens (    3.48 ms per token,   287.68 tokens per second)
llama_print_timings:        eval time =  1290.07 ms /    45 runs   (   28.67 ms per token,    34.88 tokens per second)
llama_print_timings:       total time =  2263.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    31.53 ms /    46 runs   (    0.69 ms per token,  1459.07 tokens per second)
llama_print_timings: prompt eval time =   989.17 ms /   289 tokens (    3.42 ms per token,   292.17 tokens per second)
llama_print_timings:        eval time =  1295.26 ms /    45 runs   (   28.78 ms per token,    34.74 tokens per second)
llama_print_timings:       total time =  2364.64 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    57.57 ms /    83 runs   (    0.69 ms per token,  1441.70 tokens per second)
llama_print_timings: prompt eval time =   617.17 ms /   178 tokens (    3.47 ms per token,   288.41 tokens per second)
llama_print_timings:        eval time =  2310.47 ms /    82 runs   (   28.18 ms per token,    35.49 tokens per second)
llama_print_timings:       total time =  3083.20 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    25.78 ms /    38 runs   (    0.68 ms per token,  1474.24 tokens per second)
llama_print_timings: prompt eval time =   511.21 ms /   151 tokens (    3.39 ms per token,   295.38 tokens per second)
llama_print_timings:        eval time =  1044.75 ms /    37 runs   (   28.24 ms per token,    35.42 tokens per second)
llama_print_timings:       total time =  1622.19 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    28.67 ms /    41 runs   (    0.70 ms per token,  1429.97 tokens per second)
llama_print_timings: prompt eval time =  2130.53 ms /   599 tokens (    3.56 ms per token,   281.15 tokens per second)
llama_print_timings:        eval time =  1220.68 ms /    40 runs   (   30.52 ms per token,    32.77 tokens per second)
llama_print_timings:       total time =  3426.37 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    29.94 ms /    43 runs   (    0.70 ms per token,  1436.30 tokens per second)
llama_print_timings: prompt eval time =  1329.41 ms /   397 tokens (    3.35 ms per token,   298.63 tokens per second)
llama_print_timings:        eval time =  1226.63 ms /    42 runs   (   29.21 ms per token,    34.24 tokens per second)
llama_print_timings:       total time =  2631.63 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    38.45 ms /    56 runs   (    0.69 ms per token,  1456.47 tokens per second)
llama_print_timings: prompt eval time =  2529.51 ms /   680 tokens (    3.72 ms per token,   268.83 tokens per second)
llama_print_timings:        eval time =  1682.37 ms /    55 runs   (   30.59 ms per token,    32.69 tokens per second)
llama_print_timings:       total time =  4316.32 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    34.67 ms /    51 runs   (    0.68 ms per token,  1471.14 tokens per second)
llama_print_timings: prompt eval time =  2219.42 ms /   658 tokens (    3.37 ms per token,   296.47 tokens per second)
llama_print_timings:        eval time =  1528.27 ms /    50 runs   (   30.57 ms per token,    32.72 tokens per second)
llama_print_timings:       total time =  3837.23 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   177.97 ms /   256 runs   (    0.70 ms per token,  1438.43 tokens per second)
llama_print_timings: prompt eval time =  2002.45 ms /   575 tokens (    3.48 ms per token,   287.15 tokens per second)
llama_print_timings:        eval time =  7761.40 ms /   255 runs   (   30.44 ms per token,    32.85 tokens per second)
llama_print_timings:       total time = 10241.76 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   175.09 ms /   256 runs   (    0.68 ms per token,  1462.13 tokens per second)
llama_print_timings: prompt eval time =   596.44 ms /   184 tokens (    3.24 ms per token,   308.50 tokens per second)
llama_print_timings:        eval time =  7313.71 ms /   255 runs   (   28.68 ms per token,    34.87 tokens per second)
llama_print_timings:       total time =  8373.69 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    37.37 ms /    56 runs   (    0.67 ms per token,  1498.73 tokens per second)
llama_print_timings: prompt eval time =   393.47 ms /    98 tokens (    4.02 ms per token,   249.07 tokens per second)
llama_print_timings:        eval time =  1532.54 ms /    55 runs   (   27.86 ms per token,    35.89 tokens per second)
llama_print_timings:       total time =  2020.96 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    25.04 ms /    36 runs   (    0.70 ms per token,  1437.58 tokens per second)
llama_print_timings: prompt eval time =  2315.38 ms /   610 tokens (    3.80 ms per token,   263.46 tokens per second)
llama_print_timings:        eval time =  1065.02 ms /    35 runs   (   30.43 ms per token,    32.86 tokens per second)
llama_print_timings:       total time =  3448.76 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    23.72 ms /    34 runs   (    0.70 ms per token,  1433.39 tokens per second)
llama_print_timings: prompt eval time =  1686.58 ms /   469 tokens (    3.60 ms per token,   278.08 tokens per second)
llama_print_timings:        eval time =  1043.88 ms /    33 runs   (   31.63 ms per token,    31.61 tokens per second)
llama_print_timings:       total time =  2793.31 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    25.21 ms /    36 runs   (    0.70 ms per token,  1427.78 tokens per second)
llama_print_timings: prompt eval time =  1350.43 ms /   371 tokens (    3.64 ms per token,   274.73 tokens per second)
llama_print_timings:        eval time =  1057.60 ms /    35 runs   (   30.22 ms per token,    33.09 tokens per second)
llama_print_timings:       total time =  2476.99 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    22.83 ms /    32 runs   (    0.71 ms per token,  1401.73 tokens per second)
llama_print_timings: prompt eval time =  1559.95 ms /   409 tokens (    3.81 ms per token,   262.19 tokens per second)
llama_print_timings:        eval time =   920.47 ms /    31 runs   (   29.69 ms per token,    33.68 tokens per second)
llama_print_timings:       total time =  2545.33 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    23.50 ms /    34 runs   (    0.69 ms per token,  1446.81 tokens per second)
llama_print_timings: prompt eval time =  2254.80 ms /   520 tokens (    4.34 ms per token,   230.62 tokens per second)
llama_print_timings:        eval time =   988.04 ms /    33 runs   (   29.94 ms per token,    33.40 tokens per second)
llama_print_timings:       total time =  3309.91 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    47.78 ms /    69 runs   (    0.69 ms per token,  1444.24 tokens per second)
llama_print_timings: prompt eval time =  1335.18 ms /   417 tokens (    3.20 ms per token,   312.32 tokens per second)
llama_print_timings:        eval time =  2004.11 ms /    68 runs   (   29.47 ms per token,    33.93 tokens per second)
llama_print_timings:       total time =  3461.33 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    27.77 ms /    40 runs   (    0.69 ms per token,  1440.35 tokens per second)
llama_print_timings: prompt eval time =  1011.68 ms /   295 tokens (    3.43 ms per token,   291.60 tokens per second)
llama_print_timings:        eval time =  1129.58 ms /    39 runs   (   28.96 ms per token,    34.53 tokens per second)
llama_print_timings:       total time =  2210.74 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    24.83 ms /    36 runs   (    0.69 ms per token,  1449.98 tokens per second)
llama_print_timings: prompt eval time =  2074.61 ms /   535 tokens (    3.88 ms per token,   257.88 tokens per second)
llama_print_timings:        eval time =  1056.14 ms /    35 runs   (   30.18 ms per token,    33.14 tokens per second)
llama_print_timings:       total time =  3198.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    46.45 ms /    67 runs   (    0.69 ms per token,  1442.50 tokens per second)
llama_print_timings: prompt eval time =  1879.38 ms /   514 tokens (    3.66 ms per token,   273.49 tokens per second)
llama_print_timings:        eval time =  1981.04 ms /    66 runs   (   30.02 ms per token,    33.32 tokens per second)
llama_print_timings:       total time =  3986.64 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   175.60 ms /   256 runs   (    0.69 ms per token,  1457.83 tokens per second)
llama_print_timings: prompt eval time =  1908.98 ms /   527 tokens (    3.62 ms per token,   276.06 tokens per second)
llama_print_timings:        eval time =  7748.33 ms /   255 runs   (   30.39 ms per token,    32.91 tokens per second)
llama_print_timings:       total time = 10122.67 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    48.20 ms /    71 runs   (    0.68 ms per token,  1473.18 tokens per second)
llama_print_timings: prompt eval time =  1484.98 ms /   436 tokens (    3.41 ms per token,   293.61 tokens per second)
llama_print_timings:        eval time =  2071.66 ms /    70 runs   (   29.60 ms per token,    33.79 tokens per second)
llama_print_timings:       total time =  3679.89 ms
Llama.generate: prefix-match hit

llama_pri


llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =   175.81 ms /   256 runs   (    0.69 ms per token,  1456.13 tokens per second)
llama_print_timings: prompt eval time =  1953.57 ms /   527 tokens (    3.71 ms per token,   269.76 tokens per second)
llama_print_timings:        eval time =  7767.15 ms /   255 runs   (   30.46 ms per token,    32.83 tokens per second)
llama_print_timings:       total time = 10184.67 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1914.18 ms
llama_print_timings:      sample time =    60.72 ms /    87 runs   (    0.70 ms per token,  1432.74 tokens per second)
llama_print_timings: prompt eval time =  2033.96 ms /   555 tokens (    3.66 ms per token,   272.87 tokens per second)
llama_print_timings:        eval time =  2596.92 ms /    86 runs   (   30.20 ms per token,    33.12 tokens per second)
llama_print_timings:       total time =  4787.68 ms
Llama.generate: prefix-match hit

llama_pri

# Create Vector index

### Create the vectors for our Nodes and store them in the database

In [11]:
for idx, node in enumerate(nodes):
    if "\x00" in node.text:
        print(f"Found in node {idx}")


In [15]:
from llama_index import VectorStoreIndex

index = VectorStoreIndex(
    nodes,
    service_context=service_context,
    storage_context=storage_context,
    show_progress=True,
)

Generating embeddings:   0%|          | 0/642 [00:00<?, ?it/s]

In [16]:
query_engine = index.as_query_engine()

In [19]:
response = query_engine.query("Where can I find a command on how to encode video with ffmpeg?")

Llama.generate: prefix-match hit

llama_print_timings:        load time =  9314.94 ms
llama_print_timings:      sample time =   140.84 ms /   202 runs   (    0.70 ms per token,  1434.30 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  9774.27 ms /   202 runs   (   48.39 ms per token,    20.67 tokens per second)
llama_print_timings:       total time = 10138.87 ms


In [21]:
import textwrap

print(textwrap.fill(str(response), 100))

  Sure! Based on the given context, here's the answer to your query:  To encode video with ffmpeg,
you can use the following command: ```css ffmpeg -i input.mp4 -c:v libx264 -crf 18 output.mp4 ```
This command will encode the input video file (input.mp4) using the H.264 codec with a constant rate
factor (CRF) of 18, and output the encoded video to a new file (output.mp4).  You can also specify
other options to customize the encoding process, such as the input and output color spaces,
framerate, and other settings. For more information, you can refer to the ffmpeg documentation or
the x264 settings documentation.  I hope this helps! Let me know if you have any further questions
or if there's anything else I can help with.
