In [16]:
models = [
    '/Users/kunalbhandarkar/Downloads/Llama-3.2-1B-Instruct.Q5_K_S.gguf',           # smaller llama model
    '/Users/kunalbhandarkar/Downloads/Llama-3.2-1B-Instruct.Q8_0.gguf',             # larger llama model
    '/Users/kunalbhandarkar/Downloads/DeepSeek-R1-Distill-Qwen-1.5B-Q6_K.gguf',     # smaller supposedly better model
    '/Users/kunalbhandarkar/Downloads/DeepSeek-R1-Distill-Qwen-1.5B-f32.gguf',      # 3 GB model
    '/Users/kunalbhandarkar/Downloads/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf',     # larger size model
    '/Users/kunalbhandarkar/Downloads/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf'
]

In [None]:
import json

def load_prompts(file_path):
    """
    Loads the list of prompt strings from a JSON file.
   
    Args:
        file_path (str): Path to the JSON file containing the prompt strings.
   
    Returns:
        list: The list of prompt strings.
    """
    with open(file_path, 'r') as f:
        prompts = json.load(f)
    return prompts

output_file = 'new_prompts.json'
# Load back the list to verify
loaded_prompts = load_prompts(output_file)
print(f"Loaded {len(loaded_prompts)} prompts from {output_file}")


Loaded 1319 prompts from new_prompts.json


In [4]:
print(loaded_prompts[0])

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

### Response: Let's think step by step.


In [5]:
'''
This class takes in the path to a gguf file representing an LLM
and allows a user to interact with the LLM running on a CPU.

'''

from llama_cpp import Llama

class CPUModel:

    # pass in the path to the gguf file
    def __init__(self, model_path):
        self.llm = Llama(model_path=model_path)

    # returns the complete response (a dictionary) of the LLM given an input
    # args is a dict containing input_text: string that is the input to the LLM
    # as well as other optional model parameters like temperature (float),
    # max_tokens (int), and top_p (float)
    def get_response(self, args: dict):
        return self.llm(
            args['input_text'],
            max_tokens=args.get('max_tokens'),
            temperature=args.get('temperature'),
            top_p=args.get('top_p')
        )

    # returns the first "choice" that the model outputs. this is just the text
    # that the model outputs without the other information on the response
    # args is a dict containing input_text: string that is the input to the LLM
    # as well as other optional model parameters like temperature (float),
    # max_tokens (int), and top_p (float)
    def get_text_response(self, args: dict):
        response = self.get_response(args)
        return response['choices'][0]['text']

    # to be implemented
    def evaluate(self): pass

In [17]:
cpu_model = CPUModel(models[0])

llama_model_load_from_file_impl: using device Metal (Apple M3 Pro) - 12282 MiB free
llama_model_loader: loaded meta data with 35 key-value pairs and 147 tensors from /Users/kunalbhandarkar/Downloads/Llama-3.2-1B-Instruct.Q5_K_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Models Meta Llama Llama 3.2 1B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = models-meta-llama-Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 1

In [1]:
args = {
    'temperature': 0.5,
    'top_p': 0.3,
    'max_tokens': 100
}
args['input_text'] = 'Tell me a knock knock joke'

response = cpu_model.get_text_response(args)
print('RESPONSE:')
print(response)
print(len(response))

NameError: name 'cpu_model' is not defined

In [20]:
# get a sense for average time taken

import time 

LIMIT = 100
sum = 0
for i in range(LIMIT):
    print('ITERATION', i)
    args['input_text'] = loaded_prompts[i]
    start_time = time.time()
    cpu_model.get_response(args)
    time_taken = time.time() - start_time
    sum += time_taken

print('Average time taken:', sum / LIMIT)

ITERATION 0


Llama.generate: 22 prefix-match hit, remaining 74 prompt tokens to eval
llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =    2945.16 ms /    75 tokens (   39.27 ms per token,    25.47 tokens per second)
llama_perf_context_print:        eval time =    1205.49 ms /    99 runs   (   12.18 ms per token,    82.12 tokens per second)
llama_perf_context_print:       total time =    2893.21 ms /   174 tokens
Llama.generate: 22 prefix-match hit, remaining 36 prompt tokens to eval


ITERATION 1


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     354.44 ms /    36 tokens (    9.85 ms per token,   101.57 tokens per second)
llama_perf_context_print:        eval time =    1202.91 ms /    99 runs   (   12.15 ms per token,    82.30 tokens per second)
llama_perf_context_print:       total time =    1576.98 ms /   135 tokens
Llama.generate: 22 prefix-match hit, remaining 59 prompt tokens to eval


ITERATION 2


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     336.36 ms /    59 tokens (    5.70 ms per token,   175.41 tokens per second)
llama_perf_context_print:        eval time =    1335.01 ms /    99 runs   (   13.48 ms per token,    74.16 tokens per second)
llama_perf_context_print:       total time =    1691.03 ms /   158 tokens
Llama.generate: 22 prefix-match hit, remaining 44 prompt tokens to eval


ITERATION 3


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     417.03 ms /    44 tokens (    9.48 ms per token,   105.51 tokens per second)
llama_perf_context_print:        eval time =    1191.13 ms /    99 runs   (   12.03 ms per token,    83.11 tokens per second)
llama_perf_context_print:       total time =    1627.04 ms /   143 tokens
Llama.generate: 22 prefix-match hit, remaining 118 prompt tokens to eval


ITERATION 4


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     396.94 ms /   118 tokens (    3.36 ms per token,   297.28 tokens per second)
llama_perf_context_print:        eval time =    1283.92 ms /    99 runs   (   12.97 ms per token,    77.11 tokens per second)
llama_perf_context_print:       total time =    1700.98 ms /   217 tokens
Llama.generate: 22 prefix-match hit, remaining 64 prompt tokens to eval


ITERATION 5


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     477.74 ms /    64 tokens (    7.46 ms per token,   133.96 tokens per second)
llama_perf_context_print:        eval time =    1237.57 ms /    99 runs   (   12.50 ms per token,    80.00 tokens per second)
llama_perf_context_print:       total time =    1734.65 ms /   163 tokens
Llama.generate: 22 prefix-match hit, remaining 51 prompt tokens to eval


ITERATION 6


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     691.84 ms /    51 tokens (   13.57 ms per token,    73.72 tokens per second)
llama_perf_context_print:        eval time =    1730.24 ms /    99 runs   (   17.48 ms per token,    57.22 tokens per second)
llama_perf_context_print:       total time =    2443.33 ms /   150 tokens
Llama.generate: 22 prefix-match hit, remaining 76 prompt tokens to eval


ITERATION 7


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     769.40 ms /    76 tokens (   10.12 ms per token,    98.78 tokens per second)
llama_perf_context_print:        eval time =    1300.88 ms /    99 runs   (   13.14 ms per token,    76.10 tokens per second)
llama_perf_context_print:       total time =    2090.32 ms /   175 tokens
Llama.generate: 22 prefix-match hit, remaining 111 prompt tokens to eval


ITERATION 8


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     900.86 ms /   111 tokens (    8.12 ms per token,   123.22 tokens per second)
llama_perf_context_print:        eval time =    1242.88 ms /    99 runs   (   12.55 ms per token,    79.65 tokens per second)
llama_perf_context_print:       total time =    2163.43 ms /   210 tokens
Llama.generate: 22 prefix-match hit, remaining 67 prompt tokens to eval


ITERATION 9


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     342.52 ms /    67 tokens (    5.11 ms per token,   195.61 tokens per second)
llama_perf_context_print:        eval time =    1291.80 ms /    99 runs   (   13.05 ms per token,    76.64 tokens per second)
llama_perf_context_print:       total time =    1653.74 ms /   166 tokens
Llama.generate: 22 prefix-match hit, remaining 68 prompt tokens to eval


ITERATION 10


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     589.99 ms /    68 tokens (    8.68 ms per token,   115.26 tokens per second)
llama_perf_context_print:        eval time =    1244.91 ms /    99 runs   (   12.57 ms per token,    79.52 tokens per second)
llama_perf_context_print:       total time =    1854.58 ms /   167 tokens
Llama.generate: 22 prefix-match hit, remaining 72 prompt tokens to eval


ITERATION 11


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     379.19 ms /    72 tokens (    5.27 ms per token,   189.88 tokens per second)
llama_perf_context_print:        eval time =    1228.96 ms /    99 runs   (   12.41 ms per token,    80.56 tokens per second)
llama_perf_context_print:       total time =    1627.43 ms /   171 tokens
Llama.generate: 22 prefix-match hit, remaining 76 prompt tokens to eval


ITERATION 12


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     373.22 ms /    76 tokens (    4.91 ms per token,   203.63 tokens per second)
llama_perf_context_print:        eval time =    1251.94 ms /    99 runs   (   12.65 ms per token,    79.08 tokens per second)
llama_perf_context_print:       total time =    1645.62 ms /   175 tokens
Llama.generate: 22 prefix-match hit, remaining 68 prompt tokens to eval


ITERATION 13


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     515.00 ms /    68 tokens (    7.57 ms per token,   132.04 tokens per second)
llama_perf_context_print:        eval time =    1197.47 ms /    99 runs   (   12.10 ms per token,    82.67 tokens per second)
llama_perf_context_print:       total time =    1731.74 ms /   167 tokens
Llama.generate: 22 prefix-match hit, remaining 59 prompt tokens to eval


ITERATION 14


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     325.87 ms /    59 tokens (    5.52 ms per token,   181.06 tokens per second)
llama_perf_context_print:        eval time =    1154.67 ms /    99 runs   (   11.66 ms per token,    85.74 tokens per second)
llama_perf_context_print:       total time =    1499.34 ms /   158 tokens
Llama.generate: 22 prefix-match hit, remaining 100 prompt tokens to eval


ITERATION 15


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     792.07 ms /   100 tokens (    7.92 ms per token,   126.25 tokens per second)
llama_perf_context_print:        eval time =    1324.52 ms /    99 runs   (   13.38 ms per token,    74.74 tokens per second)
llama_perf_context_print:       total time =    2137.58 ms /   199 tokens
Llama.generate: 22 prefix-match hit, remaining 60 prompt tokens to eval


ITERATION 16


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     333.35 ms /    60 tokens (    5.56 ms per token,   179.99 tokens per second)
llama_perf_context_print:        eval time =    1110.76 ms /    99 runs   (   11.22 ms per token,    89.13 tokens per second)
llama_perf_context_print:       total time =    1462.26 ms /   159 tokens
Llama.generate: 22 prefix-match hit, remaining 63 prompt tokens to eval


ITERATION 17


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     430.09 ms /    63 tokens (    6.83 ms per token,   146.48 tokens per second)
llama_perf_context_print:        eval time =    1254.28 ms /    99 runs   (   12.67 ms per token,    78.93 tokens per second)
llama_perf_context_print:       total time =    1703.85 ms /   162 tokens
Llama.generate: 22 prefix-match hit, remaining 39 prompt tokens to eval


ITERATION 18


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     305.71 ms /    39 tokens (    7.84 ms per token,   127.57 tokens per second)
llama_perf_context_print:        eval time =    1292.55 ms /    99 runs   (   13.06 ms per token,    76.59 tokens per second)
llama_perf_context_print:       total time =    1617.96 ms /   138 tokens
Llama.generate: 22 prefix-match hit, remaining 74 prompt tokens to eval


ITERATION 19


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     663.26 ms /    74 tokens (    8.96 ms per token,   111.57 tokens per second)
llama_perf_context_print:        eval time =    1265.02 ms /    99 runs   (   12.78 ms per token,    78.26 tokens per second)
llama_perf_context_print:       total time =    1948.40 ms /   173 tokens
Llama.generate: 22 prefix-match hit, remaining 70 prompt tokens to eval


ITERATION 20


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     509.93 ms /    70 tokens (    7.28 ms per token,   137.27 tokens per second)
llama_perf_context_print:        eval time =    1148.44 ms /    99 runs   (   11.60 ms per token,    86.20 tokens per second)
llama_perf_context_print:       total time =    1677.30 ms /   169 tokens
Llama.generate: 22 prefix-match hit, remaining 54 prompt tokens to eval


ITERATION 21


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     601.87 ms /    54 tokens (   11.15 ms per token,    89.72 tokens per second)
llama_perf_context_print:        eval time =    1158.39 ms /    99 runs   (   11.70 ms per token,    85.46 tokens per second)
llama_perf_context_print:       total time =    1779.35 ms /   153 tokens
Llama.generate: 22 prefix-match hit, remaining 65 prompt tokens to eval


ITERATION 22


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     411.56 ms /    65 tokens (    6.33 ms per token,   157.94 tokens per second)
llama_perf_context_print:        eval time =    1150.56 ms /    99 runs   (   11.62 ms per token,    86.04 tokens per second)
llama_perf_context_print:       total time =    1580.66 ms /   164 tokens
Llama.generate: 22 prefix-match hit, remaining 48 prompt tokens to eval


ITERATION 23


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     306.77 ms /    48 tokens (    6.39 ms per token,   156.47 tokens per second)
llama_perf_context_print:        eval time =    1218.31 ms /    99 runs   (   12.31 ms per token,    81.26 tokens per second)
llama_perf_context_print:       total time =    1544.22 ms /   147 tokens
Llama.generate: 22 prefix-match hit, remaining 46 prompt tokens to eval


ITERATION 24


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     335.47 ms /    46 tokens (    7.29 ms per token,   137.12 tokens per second)
llama_perf_context_print:        eval time =    1124.76 ms /    99 runs   (   11.36 ms per token,    88.02 tokens per second)
llama_perf_context_print:       total time =    1478.56 ms /   145 tokens
Llama.generate: 22 prefix-match hit, remaining 74 prompt tokens to eval


ITERATION 25


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     834.60 ms /    74 tokens (   11.28 ms per token,    88.67 tokens per second)
llama_perf_context_print:        eval time =    1113.29 ms /    99 runs   (   11.25 ms per token,    88.93 tokens per second)
llama_perf_context_print:       total time =    1966.20 ms /   173 tokens
Llama.generate: 22 prefix-match hit, remaining 74 prompt tokens to eval


ITERATION 26


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     363.82 ms /    74 tokens (    4.92 ms per token,   203.40 tokens per second)
llama_perf_context_print:        eval time =    1113.85 ms /    99 runs   (   11.25 ms per token,    88.88 tokens per second)
llama_perf_context_print:       total time =    1495.78 ms /   173 tokens
Llama.generate: 22 prefix-match hit, remaining 66 prompt tokens to eval


ITERATION 27


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     371.63 ms /    66 tokens (    5.63 ms per token,   177.60 tokens per second)
llama_perf_context_print:        eval time =    1152.44 ms /    99 runs   (   11.64 ms per token,    85.90 tokens per second)
llama_perf_context_print:       total time =    1542.45 ms /   165 tokens
Llama.generate: 22 prefix-match hit, remaining 57 prompt tokens to eval


ITERATION 28


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     741.80 ms /    57 tokens (   13.01 ms per token,    76.84 tokens per second)
llama_perf_context_print:        eval time =    1163.94 ms /    99 runs   (   11.76 ms per token,    85.06 tokens per second)
llama_perf_context_print:       total time =    1924.70 ms /   156 tokens
Llama.generate: 22 prefix-match hit, remaining 78 prompt tokens to eval


ITERATION 29


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     517.49 ms /    78 tokens (    6.63 ms per token,   150.73 tokens per second)
llama_perf_context_print:        eval time =    1179.35 ms /    99 runs   (   11.91 ms per token,    83.94 tokens per second)
llama_perf_context_print:       total time =    1715.76 ms /   177 tokens
Llama.generate: 22 prefix-match hit, remaining 45 prompt tokens to eval


ITERATION 30


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     355.17 ms /    45 tokens (    7.89 ms per token,   126.70 tokens per second)
llama_perf_context_print:        eval time =    1176.45 ms /    99 runs   (   11.88 ms per token,    84.15 tokens per second)
llama_perf_context_print:       total time =    1550.83 ms /   144 tokens
Llama.generate: 22 prefix-match hit, remaining 70 prompt tokens to eval


ITERATION 31


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     731.23 ms /    70 tokens (   10.45 ms per token,    95.73 tokens per second)
llama_perf_context_print:        eval time =    1186.75 ms /    99 runs   (   11.99 ms per token,    83.42 tokens per second)
llama_perf_context_print:       total time =    1936.93 ms /   169 tokens
Llama.generate: 22 prefix-match hit, remaining 50 prompt tokens to eval


ITERATION 32


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     406.67 ms /    50 tokens (    8.13 ms per token,   122.95 tokens per second)
llama_perf_context_print:        eval time =    1214.58 ms /    99 runs   (   12.27 ms per token,    81.51 tokens per second)
llama_perf_context_print:       total time =    1640.61 ms /   149 tokens
Llama.generate: 22 prefix-match hit, remaining 38 prompt tokens to eval


ITERATION 33


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     339.29 ms /    38 tokens (    8.93 ms per token,   112.00 tokens per second)
llama_perf_context_print:        eval time =    1174.25 ms /    99 runs   (   11.86 ms per token,    84.31 tokens per second)
llama_perf_context_print:       total time =    1532.21 ms /   137 tokens
Llama.generate: 22 prefix-match hit, remaining 50 prompt tokens to eval


ITERATION 34


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     370.86 ms /    50 tokens (    7.42 ms per token,   134.82 tokens per second)
llama_perf_context_print:        eval time =    1146.24 ms /    99 runs   (   11.58 ms per token,    86.37 tokens per second)
llama_perf_context_print:       total time =    1535.75 ms /   149 tokens
Llama.generate: 22 prefix-match hit, remaining 58 prompt tokens to eval


ITERATION 35


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     302.47 ms /    58 tokens (    5.22 ms per token,   191.75 tokens per second)
llama_perf_context_print:        eval time =    1177.88 ms /    99 runs   (   11.90 ms per token,    84.05 tokens per second)
llama_perf_context_print:       total time =    1500.07 ms /   157 tokens
Llama.generate: 22 prefix-match hit, remaining 52 prompt tokens to eval


ITERATION 36


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     509.69 ms /    52 tokens (    9.80 ms per token,   102.02 tokens per second)
llama_perf_context_print:        eval time =    1166.18 ms /    99 runs   (   11.78 ms per token,    84.89 tokens per second)
llama_perf_context_print:       total time =    1694.77 ms /   151 tokens
Llama.generate: 22 prefix-match hit, remaining 70 prompt tokens to eval


ITERATION 37


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     491.06 ms /    70 tokens (    7.02 ms per token,   142.55 tokens per second)
llama_perf_context_print:        eval time =    1214.45 ms /    99 runs   (   12.27 ms per token,    81.52 tokens per second)
llama_perf_context_print:       total time =    1724.45 ms /   169 tokens
Llama.generate: 23 prefix-match hit, remaining 52 prompt tokens to eval


ITERATION 38


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     357.17 ms /    52 tokens (    6.87 ms per token,   145.59 tokens per second)
llama_perf_context_print:        eval time =    1145.17 ms /    99 runs   (   11.57 ms per token,    86.45 tokens per second)
llama_perf_context_print:       total time =    1520.80 ms /   151 tokens
Llama.generate: 22 prefix-match hit, remaining 83 prompt tokens to eval


ITERATION 39


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     576.45 ms /    83 tokens (    6.95 ms per token,   143.98 tokens per second)
llama_perf_context_print:        eval time =    1193.24 ms /    99 runs   (   12.05 ms per token,    82.97 tokens per second)
llama_perf_context_print:       total time =    1789.31 ms /   182 tokens
Llama.generate: 22 prefix-match hit, remaining 54 prompt tokens to eval


ITERATION 40


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     665.47 ms /    54 tokens (   12.32 ms per token,    81.15 tokens per second)
llama_perf_context_print:        eval time =     998.95 ms /    89 runs   (   11.22 ms per token,    89.09 tokens per second)
llama_perf_context_print:       total time =    1680.40 ms /   143 tokens
Llama.generate: 22 prefix-match hit, remaining 134 prompt tokens to eval


ITERATION 41


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     430.69 ms /   134 tokens (    3.21 ms per token,   311.13 tokens per second)
llama_perf_context_print:        eval time =    1165.14 ms /    99 runs   (   11.77 ms per token,    84.97 tokens per second)
llama_perf_context_print:       total time =    1615.24 ms /   233 tokens
Llama.generate: 22 prefix-match hit, remaining 91 prompt tokens to eval


ITERATION 42


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     439.99 ms /    91 tokens (    4.84 ms per token,   206.82 tokens per second)
llama_perf_context_print:        eval time =    1125.62 ms /    99 runs   (   11.37 ms per token,    87.95 tokens per second)
llama_perf_context_print:       total time =    1583.99 ms /   190 tokens
Llama.generate: 22 prefix-match hit, remaining 63 prompt tokens to eval


ITERATION 43


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     444.16 ms /    63 tokens (    7.05 ms per token,   141.84 tokens per second)
llama_perf_context_print:        eval time =    1187.24 ms /    99 runs   (   11.99 ms per token,    83.39 tokens per second)
llama_perf_context_print:       total time =    1650.50 ms /   162 tokens
Llama.generate: 22 prefix-match hit, remaining 82 prompt tokens to eval


ITERATION 44


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     503.75 ms /    82 tokens (    6.14 ms per token,   162.78 tokens per second)
llama_perf_context_print:        eval time =    1164.28 ms /    99 runs   (   11.76 ms per token,    85.03 tokens per second)
llama_perf_context_print:       total time =    1686.89 ms /   181 tokens
Llama.generate: 22 prefix-match hit, remaining 104 prompt tokens to eval


ITERATION 45


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     787.07 ms /   104 tokens (    7.57 ms per token,   132.14 tokens per second)
llama_perf_context_print:        eval time =    1223.14 ms /    99 runs   (   12.35 ms per token,    80.94 tokens per second)
llama_perf_context_print:       total time =    2030.09 ms /   203 tokens
Llama.generate: 22 prefix-match hit, remaining 100 prompt tokens to eval


ITERATION 46


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     450.52 ms /   100 tokens (    4.51 ms per token,   221.97 tokens per second)
llama_perf_context_print:        eval time =    1134.40 ms /    99 runs   (   11.46 ms per token,    87.27 tokens per second)
llama_perf_context_print:       total time =    1603.91 ms /   199 tokens
Llama.generate: 22 prefix-match hit, remaining 56 prompt tokens to eval


ITERATION 47


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     416.68 ms /    56 tokens (    7.44 ms per token,   134.39 tokens per second)
llama_perf_context_print:        eval time =    1119.93 ms /    99 runs   (   11.31 ms per token,    88.40 tokens per second)
llama_perf_context_print:       total time =    1554.84 ms /   155 tokens
Llama.generate: 22 prefix-match hit, remaining 47 prompt tokens to eval


ITERATION 48


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     324.36 ms /    47 tokens (    6.90 ms per token,   144.90 tokens per second)
llama_perf_context_print:        eval time =    1107.07 ms /    99 runs   (   11.18 ms per token,    89.43 tokens per second)
llama_perf_context_print:       total time =    1449.49 ms /   146 tokens
Llama.generate: 22 prefix-match hit, remaining 52 prompt tokens to eval


ITERATION 49


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     302.95 ms /    52 tokens (    5.83 ms per token,   171.64 tokens per second)
llama_perf_context_print:        eval time =    1126.51 ms /    99 runs   (   11.38 ms per token,    87.88 tokens per second)
llama_perf_context_print:       total time =    1447.87 ms /   151 tokens
Llama.generate: 22 prefix-match hit, remaining 45 prompt tokens to eval


ITERATION 50


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     511.25 ms /    45 tokens (   11.36 ms per token,    88.02 tokens per second)
llama_perf_context_print:        eval time =    1148.47 ms /    99 runs   (   11.60 ms per token,    86.20 tokens per second)
llama_perf_context_print:       total time =    1678.64 ms /   144 tokens
Llama.generate: 22 prefix-match hit, remaining 58 prompt tokens to eval


ITERATION 51


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     341.41 ms /    58 tokens (    5.89 ms per token,   169.88 tokens per second)
llama_perf_context_print:        eval time =    1153.43 ms /    99 runs   (   11.65 ms per token,    85.83 tokens per second)
llama_perf_context_print:       total time =    1513.53 ms /   157 tokens
Llama.generate: 22 prefix-match hit, remaining 70 prompt tokens to eval


ITERATION 52


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     356.67 ms /    70 tokens (    5.10 ms per token,   196.26 tokens per second)
llama_perf_context_print:        eval time =    1120.70 ms /    99 runs   (   11.32 ms per token,    88.34 tokens per second)
llama_perf_context_print:       total time =    1495.81 ms /   169 tokens
Llama.generate: 22 prefix-match hit, remaining 107 prompt tokens to eval


ITERATION 53


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     429.03 ms /   107 tokens (    4.01 ms per token,   249.40 tokens per second)
llama_perf_context_print:        eval time =    1115.78 ms /    99 runs   (   11.27 ms per token,    88.73 tokens per second)
llama_perf_context_print:       total time =    1563.24 ms /   206 tokens
Llama.generate: 22 prefix-match hit, remaining 91 prompt tokens to eval


ITERATION 54


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     429.38 ms /    91 tokens (    4.72 ms per token,   211.93 tokens per second)
llama_perf_context_print:        eval time =    1199.51 ms /    99 runs   (   12.12 ms per token,    82.53 tokens per second)
llama_perf_context_print:       total time =    1648.16 ms /   190 tokens
Llama.generate: 22 prefix-match hit, remaining 59 prompt tokens to eval


ITERATION 55


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     300.35 ms /    59 tokens (    5.09 ms per token,   196.44 tokens per second)
llama_perf_context_print:        eval time =    1116.35 ms /    99 runs   (   11.28 ms per token,    88.68 tokens per second)
llama_perf_context_print:       total time =    1434.98 ms /   158 tokens
Llama.generate: 22 prefix-match hit, remaining 54 prompt tokens to eval


ITERATION 56


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     298.83 ms /    54 tokens (    5.53 ms per token,   180.70 tokens per second)
llama_perf_context_print:        eval time =    1129.73 ms /    99 runs   (   11.41 ms per token,    87.63 tokens per second)
llama_perf_context_print:       total time =    1446.95 ms /   153 tokens
Llama.generate: 22 prefix-match hit, remaining 82 prompt tokens to eval


ITERATION 57


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     370.17 ms /    82 tokens (    4.51 ms per token,   221.52 tokens per second)
llama_perf_context_print:        eval time =    1114.85 ms /    99 runs   (   11.26 ms per token,    88.80 tokens per second)
llama_perf_context_print:       total time =    1503.73 ms /   181 tokens
Llama.generate: 22 prefix-match hit, remaining 89 prompt tokens to eval


ITERATION 58


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     526.78 ms /    89 tokens (    5.92 ms per token,   168.95 tokens per second)
llama_perf_context_print:        eval time =    1146.25 ms /    99 runs   (   11.58 ms per token,    86.37 tokens per second)
llama_perf_context_print:       total time =    1691.91 ms /   188 tokens
Llama.generate: 22 prefix-match hit, remaining 40 prompt tokens to eval


ITERATION 59


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     404.64 ms /    40 tokens (   10.12 ms per token,    98.85 tokens per second)
llama_perf_context_print:        eval time =    1142.02 ms /    99 runs   (   11.54 ms per token,    86.69 tokens per second)
llama_perf_context_print:       total time =    1565.46 ms /   139 tokens
Llama.generate: 23 prefix-match hit, remaining 45 prompt tokens to eval


ITERATION 60


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     314.83 ms /    45 tokens (    7.00 ms per token,   142.94 tokens per second)
llama_perf_context_print:        eval time =    1130.83 ms /    99 runs   (   11.42 ms per token,    87.55 tokens per second)
llama_perf_context_print:       total time =    1464.26 ms /   144 tokens
Llama.generate: 22 prefix-match hit, remaining 67 prompt tokens to eval


ITERATION 61


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     360.46 ms /    67 tokens (    5.38 ms per token,   185.87 tokens per second)
llama_perf_context_print:        eval time =    1112.90 ms /    99 runs   (   11.24 ms per token,    88.96 tokens per second)
llama_perf_context_print:       total time =    1491.50 ms /   166 tokens
Llama.generate: 22 prefix-match hit, remaining 72 prompt tokens to eval


ITERATION 62


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     383.42 ms /    72 tokens (    5.33 ms per token,   187.78 tokens per second)
llama_perf_context_print:        eval time =    1113.18 ms /    99 runs   (   11.24 ms per token,    88.93 tokens per second)
llama_perf_context_print:       total time =    1515.04 ms /   171 tokens
Llama.generate: 22 prefix-match hit, remaining 76 prompt tokens to eval


ITERATION 63


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     600.84 ms /    76 tokens (    7.91 ms per token,   126.49 tokens per second)
llama_perf_context_print:        eval time =    1106.99 ms /    99 runs   (   11.18 ms per token,    89.43 tokens per second)
llama_perf_context_print:       total time =    1726.28 ms /   175 tokens
Llama.generate: 22 prefix-match hit, remaining 100 prompt tokens to eval


ITERATION 64


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     442.46 ms /   100 tokens (    4.42 ms per token,   226.01 tokens per second)
llama_perf_context_print:        eval time =    1153.37 ms /    99 runs   (   11.65 ms per token,    85.84 tokens per second)
llama_perf_context_print:       total time =    1614.34 ms /   199 tokens
Llama.generate: 22 prefix-match hit, remaining 57 prompt tokens to eval


ITERATION 65


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     383.22 ms /    57 tokens (    6.72 ms per token,   148.74 tokens per second)
llama_perf_context_print:        eval time =    1109.75 ms /    99 runs   (   11.21 ms per token,    89.21 tokens per second)
llama_perf_context_print:       total time =    1511.07 ms /   156 tokens
Llama.generate: 22 prefix-match hit, remaining 68 prompt tokens to eval


ITERATION 66


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     339.08 ms /    68 tokens (    4.99 ms per token,   200.54 tokens per second)
llama_perf_context_print:        eval time =    1108.47 ms /    99 runs   (   11.20 ms per token,    89.31 tokens per second)
llama_perf_context_print:       total time =    1465.84 ms /   167 tokens
Llama.generate: 22 prefix-match hit, remaining 61 prompt tokens to eval


ITERATION 67


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     353.75 ms /    61 tokens (    5.80 ms per token,   172.44 tokens per second)
llama_perf_context_print:        eval time =    1108.52 ms /    99 runs   (   11.20 ms per token,    89.31 tokens per second)
llama_perf_context_print:       total time =    1480.65 ms /   160 tokens
Llama.generate: 22 prefix-match hit, remaining 46 prompt tokens to eval


ITERATION 68


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     295.27 ms /    46 tokens (    6.42 ms per token,   155.79 tokens per second)
llama_perf_context_print:        eval time =    1106.42 ms /    99 runs   (   11.18 ms per token,    89.48 tokens per second)
llama_perf_context_print:       total time =    1419.83 ms /   145 tokens
Llama.generate: 22 prefix-match hit, remaining 60 prompt tokens to eval


ITERATION 69


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     355.26 ms /    60 tokens (    5.92 ms per token,   168.89 tokens per second)
llama_perf_context_print:        eval time =    1107.50 ms /    99 runs   (   11.19 ms per token,    89.39 tokens per second)
llama_perf_context_print:       total time =    1481.06 ms /   159 tokens
Llama.generate: 22 prefix-match hit, remaining 60 prompt tokens to eval


ITERATION 70


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     315.40 ms /    60 tokens (    5.26 ms per token,   190.23 tokens per second)
llama_perf_context_print:        eval time =    1107.39 ms /    99 runs   (   11.19 ms per token,    89.40 tokens per second)
llama_perf_context_print:       total time =    1440.85 ms /   159 tokens
Llama.generate: 22 prefix-match hit, remaining 53 prompt tokens to eval


ITERATION 71


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     305.52 ms /    53 tokens (    5.76 ms per token,   173.48 tokens per second)
llama_perf_context_print:        eval time =    1031.66 ms /    92 runs   (   11.21 ms per token,    89.18 tokens per second)
llama_perf_context_print:       total time =    1353.88 ms /   145 tokens
Llama.generate: 22 prefix-match hit, remaining 58 prompt tokens to eval


ITERATION 72


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     305.07 ms /    58 tokens (    5.26 ms per token,   190.12 tokens per second)
llama_perf_context_print:        eval time =    1133.92 ms /    99 runs   (   11.45 ms per token,    87.31 tokens per second)
llama_perf_context_print:       total time =    1457.81 ms /   157 tokens
Llama.generate: 22 prefix-match hit, remaining 53 prompt tokens to eval


ITERATION 73


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     306.81 ms /    53 tokens (    5.79 ms per token,   172.74 tokens per second)
llama_perf_context_print:        eval time =    1118.40 ms /    99 runs   (   11.30 ms per token,    88.52 tokens per second)
llama_perf_context_print:       total time =    1443.68 ms /   152 tokens
Llama.generate: 22 prefix-match hit, remaining 131 prompt tokens to eval


ITERATION 74


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     486.73 ms /   131 tokens (    3.72 ms per token,   269.14 tokens per second)
llama_perf_context_print:        eval time =    1124.56 ms /    99 runs   (   11.36 ms per token,    88.03 tokens per second)
llama_perf_context_print:       total time =    1630.49 ms /   230 tokens
Llama.generate: 22 prefix-match hit, remaining 75 prompt tokens to eval


ITERATION 75


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     374.36 ms /    75 tokens (    4.99 ms per token,   200.34 tokens per second)
llama_perf_context_print:        eval time =    1109.26 ms /    99 runs   (   11.20 ms per token,    89.25 tokens per second)
llama_perf_context_print:       total time =    1502.06 ms /   174 tokens
Llama.generate: 22 prefix-match hit, remaining 93 prompt tokens to eval


ITERATION 76


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     373.09 ms /    93 tokens (    4.01 ms per token,   249.27 tokens per second)
llama_perf_context_print:        eval time =    1131.22 ms /    99 runs   (   11.43 ms per token,    87.52 tokens per second)
llama_perf_context_print:       total time =    1523.38 ms /   192 tokens
Llama.generate: 22 prefix-match hit, remaining 53 prompt tokens to eval


ITERATION 77


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     306.44 ms /    53 tokens (    5.78 ms per token,   172.96 tokens per second)
llama_perf_context_print:        eval time =    1111.61 ms /    99 runs   (   11.23 ms per token,    89.06 tokens per second)
llama_perf_context_print:       total time =    1436.29 ms /   152 tokens
Llama.generate: 22 prefix-match hit, remaining 51 prompt tokens to eval


ITERATION 78


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     339.32 ms /    51 tokens (    6.65 ms per token,   150.30 tokens per second)
llama_perf_context_print:        eval time =    1119.63 ms /    99 runs   (   11.31 ms per token,    88.42 tokens per second)
llama_perf_context_print:       total time =    1477.34 ms /   150 tokens
Llama.generate: 22 prefix-match hit, remaining 55 prompt tokens to eval


ITERATION 79


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     305.71 ms /    55 tokens (    5.56 ms per token,   179.91 tokens per second)
llama_perf_context_print:        eval time =    1108.71 ms /    99 runs   (   11.20 ms per token,    89.29 tokens per second)
llama_perf_context_print:       total time =    1432.55 ms /   154 tokens
Llama.generate: 22 prefix-match hit, remaining 44 prompt tokens to eval


ITERATION 80


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     329.03 ms /    44 tokens (    7.48 ms per token,   133.73 tokens per second)
llama_perf_context_print:        eval time =    1195.74 ms /    99 runs   (   12.08 ms per token,    82.79 tokens per second)
llama_perf_context_print:       total time =    1543.41 ms /   143 tokens
Llama.generate: 22 prefix-match hit, remaining 65 prompt tokens to eval


ITERATION 81


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     389.70 ms /    65 tokens (    6.00 ms per token,   166.79 tokens per second)
llama_perf_context_print:        eval time =    1111.19 ms /    99 runs   (   11.22 ms per token,    89.09 tokens per second)
llama_perf_context_print:       total time =    1519.04 ms /   164 tokens
Llama.generate: 22 prefix-match hit, remaining 38 prompt tokens to eval


ITERATION 82


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     348.13 ms /    38 tokens (    9.16 ms per token,   109.15 tokens per second)
llama_perf_context_print:        eval time =    1050.62 ms /    94 runs   (   11.18 ms per token,    89.47 tokens per second)
llama_perf_context_print:       total time =    1415.85 ms /   132 tokens
Llama.generate: 22 prefix-match hit, remaining 41 prompt tokens to eval


ITERATION 83


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     325.33 ms /    41 tokens (    7.93 ms per token,   126.02 tokens per second)
llama_perf_context_print:        eval time =    1110.54 ms /    99 runs   (   11.22 ms per token,    89.15 tokens per second)
llama_perf_context_print:       total time =    1454.30 ms /   140 tokens
Llama.generate: 22 prefix-match hit, remaining 33 prompt tokens to eval


ITERATION 84


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     362.87 ms /    33 tokens (   11.00 ms per token,    90.94 tokens per second)
llama_perf_context_print:        eval time =    1186.87 ms /    99 runs   (   11.99 ms per token,    83.41 tokens per second)
llama_perf_context_print:       total time =    1568.27 ms /   132 tokens
Llama.generate: 22 prefix-match hit, remaining 91 prompt tokens to eval


ITERATION 85


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     588.26 ms /    91 tokens (    6.46 ms per token,   154.69 tokens per second)
llama_perf_context_print:        eval time =    1121.67 ms /    99 runs   (   11.33 ms per token,    88.26 tokens per second)
llama_perf_context_print:       total time =    1728.48 ms /   190 tokens
Llama.generate: 22 prefix-match hit, remaining 98 prompt tokens to eval


ITERATION 86


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     496.44 ms /    98 tokens (    5.07 ms per token,   197.40 tokens per second)
llama_perf_context_print:        eval time =    1113.84 ms /    99 runs   (   11.25 ms per token,    88.88 tokens per second)
llama_perf_context_print:       total time =    1628.50 ms /   197 tokens
Llama.generate: 22 prefix-match hit, remaining 86 prompt tokens to eval


ITERATION 87


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     394.54 ms /    86 tokens (    4.59 ms per token,   217.98 tokens per second)
llama_perf_context_print:        eval time =    1161.62 ms /    99 runs   (   11.73 ms per token,    85.23 tokens per second)
llama_perf_context_print:       total time =    1574.84 ms /   185 tokens
Llama.generate: 22 prefix-match hit, remaining 46 prompt tokens to eval


ITERATION 88


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     299.55 ms /    46 tokens (    6.51 ms per token,   153.56 tokens per second)
llama_perf_context_print:        eval time =    1107.03 ms /    99 runs   (   11.18 ms per token,    89.43 tokens per second)
llama_perf_context_print:       total time =    1424.92 ms /   145 tokens
Llama.generate: 22 prefix-match hit, remaining 58 prompt tokens to eval


ITERATION 89


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     338.49 ms /    58 tokens (    5.84 ms per token,   171.35 tokens per second)
llama_perf_context_print:        eval time =    1107.16 ms /    99 runs   (   11.18 ms per token,    89.42 tokens per second)
llama_perf_context_print:       total time =    1463.80 ms /   157 tokens
Llama.generate: 22 prefix-match hit, remaining 94 prompt tokens to eval


ITERATION 90


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     411.28 ms /    94 tokens (    4.38 ms per token,   228.55 tokens per second)
llama_perf_context_print:        eval time =    1136.21 ms /    99 runs   (   11.48 ms per token,    87.13 tokens per second)
llama_perf_context_print:       total time =    1565.98 ms /   193 tokens
Llama.generate: 22 prefix-match hit, remaining 46 prompt tokens to eval


ITERATION 91


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     301.63 ms /    46 tokens (    6.56 ms per token,   152.51 tokens per second)
llama_perf_context_print:        eval time =    1109.99 ms /    99 runs   (   11.21 ms per token,    89.19 tokens per second)
llama_perf_context_print:       total time =    1429.55 ms /   145 tokens
Llama.generate: 22 prefix-match hit, remaining 60 prompt tokens to eval


ITERATION 92


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     321.82 ms /    60 tokens (    5.36 ms per token,   186.44 tokens per second)
llama_perf_context_print:        eval time =    1097.27 ms /    94 runs   (   11.67 ms per token,    85.67 tokens per second)
llama_perf_context_print:       total time =    1436.57 ms /   154 tokens
Llama.generate: 22 prefix-match hit, remaining 84 prompt tokens to eval


ITERATION 93


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     369.59 ms /    84 tokens (    4.40 ms per token,   227.28 tokens per second)
llama_perf_context_print:        eval time =    1111.43 ms /    99 runs   (   11.23 ms per token,    89.07 tokens per second)
llama_perf_context_print:       total time =    1499.49 ms /   183 tokens
Llama.generate: 22 prefix-match hit, remaining 60 prompt tokens to eval


ITERATION 94


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     387.40 ms /    60 tokens (    6.46 ms per token,   154.88 tokens per second)
llama_perf_context_print:        eval time =    1115.91 ms /    99 runs   (   11.27 ms per token,    88.72 tokens per second)
llama_perf_context_print:       total time =    1521.51 ms /   159 tokens
Llama.generate: 22 prefix-match hit, remaining 50 prompt tokens to eval


ITERATION 95


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     322.22 ms /    50 tokens (    6.44 ms per token,   155.17 tokens per second)
llama_perf_context_print:        eval time =    1109.79 ms /    99 runs   (   11.21 ms per token,    89.21 tokens per second)
llama_perf_context_print:       total time =    1450.04 ms /   149 tokens
Llama.generate: 22 prefix-match hit, remaining 42 prompt tokens to eval


ITERATION 96


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     315.82 ms /    42 tokens (    7.52 ms per token,   132.99 tokens per second)
llama_perf_context_print:        eval time =    1153.02 ms /    99 runs   (   11.65 ms per token,    85.86 tokens per second)
llama_perf_context_print:       total time =    1487.47 ms /   141 tokens
Llama.generate: 22 prefix-match hit, remaining 62 prompt tokens to eval


ITERATION 97


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     327.84 ms /    62 tokens (    5.29 ms per token,   189.12 tokens per second)
llama_perf_context_print:        eval time =    1135.05 ms /    99 runs   (   11.47 ms per token,    87.22 tokens per second)
llama_perf_context_print:       total time =    1481.55 ms /   161 tokens
Llama.generate: 22 prefix-match hit, remaining 101 prompt tokens to eval


ITERATION 98


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     415.30 ms /   101 tokens (    4.11 ms per token,   243.20 tokens per second)
llama_perf_context_print:        eval time =    1122.20 ms /    99 runs   (   11.34 ms per token,    88.22 tokens per second)
llama_perf_context_print:       total time =    1556.08 ms /   200 tokens
Llama.generate: 22 prefix-match hit, remaining 90 prompt tokens to eval


ITERATION 99


llama_perf_context_print:        load time =    1352.98 ms
llama_perf_context_print: prompt eval time =     392.72 ms /    90 tokens (    4.36 ms per token,   229.17 tokens per second)
llama_perf_context_print:        eval time =    1113.81 ms /    99 runs   (   11.25 ms per token,    88.88 tokens per second)
llama_perf_context_print:       total time =    1525.13 ms /   189 tokens


Average time taken: 1.620800597667694
