In [1]:
%set_env CMAKE_ARGS="-DLLAMA_CUBLAS=on"  
%pip install -U openai sentence-transformers tokenizers python-dotenv llama-cpp-python

env: CMAKE_ARGS="-DLLAMA_CUBLAS=on"
Collecting tokenizers
  Obtaining dependency information for tokenizers from https://files.pythonhosted.org/packages/57/bd/45b5ef6b088880779f70acf60027f7043ca5fa1b98f4a4345cf3aea09044/tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import openai
from dotenv import load_dotenv
from llama_cpp import Llama
import json

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY

GGUF_MODEL_PATH = "./model/nous-hermes-llama2-13b-q4.gguf"
MAX_CONTEXT = 4096
# GGUF_MODEL_PATH = "./model/nous-hermes-llama-2-7b-q4.gguf"
llm = Llama(model_path=GGUF_MODEL_PATH, n_gpu_layers=999, n_ctx=MAX_CONTEXT, verbose=False)


ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6
llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ./model/nous-hermes-llama2-13b-q4.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  5120, 32032,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q4_K     [  5120, 13824,     1,     

In [2]:
def get_response_from_model(prompt):
        return llm(
                prompt, 
                max_tokens=MAX_CONTEXT, 
                # stop=["\n"], 
                echo=False, 
                temperature=0.5
               )["choices"][0]["text"]


In [11]:
def get_information_description_prompt(information):
    example_information = "Monster: Kobold\nTrait: Sunlight Sensitivity\nWhile in sunlight, the kobold has disadvantage on attack rolls, as well as on Wisdom (Perception) checks that rely on sight."
    example_result = "The negative effects of sunlight on kobolds."
    example_information2 = "Monster: Aerosaur\nAerosaurs include several varieties of large reptiles with leathery wings, including pterodons and sunwings."
    example_result2 = "Background information about Aerosaurs and subspecies."
    return f"""
### Instruction:
You are a helpful assistant that assists with indexing information. Describe the information that is contained in the following text. Make sure to include important context.
Include any important details that might not be explicitly stated in the text. The information is about Dungeons and Dragons.
Be concise. Answer in a single sentence.

### Input:
{example_information}

### Response:
{example_result}

### Input:
{example_information2}

### Response:
{example_result2}

### Input:
{information}

### Response:

            """

def get_information_retrieval_prompt(query):
    example_query = "How does sunlight effect kobolds?"
    example_result = "Effects of sunlight on kobolds."
    example_query2 = "What is an aerosaur?"
    example_result2 = "Description of an aerosaur."

    return f"""
### Instruction:
You are a helpful assistant that assists with retrieving information. Describe the information that is required to answer the following query relating to monsters.
Do not answer the following query, just describe the information that would be required to answer the query. Do not provide an answer, only a description. Answer in a single sentence.
Be concise.

### Input:
{example_query}

### Response:
{example_result}

### Input:
{example_query2}

### Response:
{example_result2}

### Input:
{query}

### Response:

            """

info = "Hold Breath\nWhile out of water, the octopus can hold its breath for 1 hour."
prompt = get_information_description_prompt(info)
response = get_response_from_model(prompt)
print(response)

prompt = get_information_retrieval_prompt("How long can octopuses hold their breath out of water?")
response = get_response_from_model(prompt)
print(response)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.28 ms /    18 runs   (    0.40 ms per token,  2471.51 tokens per second)
llama_print_timings: prompt eval time =   154.26 ms /   109 tokens (    1.42 ms per token,   706.59 tokens per second)
llama_print_timings:        eval time =   234.78 ms /    17 runs   (   13.81 ms per token,    72.41 tokens per second)
llama_print_timings:       total time =   417.39 ms
Llama.generate: prefix-match hit


 The ability of an octopus to stay underwater for an extended period of time.
 Time octopuses can stay out of water and hold their breath.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.06 ms /    15 runs   (    0.40 ms per token,  2475.25 tokens per second)
llama_print_timings: prompt eval time =   191.93 ms /   154 tokens (    1.25 ms per token,   802.36 tokens per second)
llama_print_timings:        eval time =   189.37 ms /    14 runs   (   13.53 ms per token,    73.93 tokens per second)
llama_print_timings:       total time =   404.04 ms


In [12]:
enriched_data = []

with open('./monster_text.json') as f:
    monster_texts = json.load(f)

for monster_text in monster_texts:
    info = ""
    if monster_text.get("monster_name") is not None:
        info += f"Monster: {monster_text.get('monster_name')}\n"
    if monster_text.get("field") is not None:
        info += f"{monster_text.get('field')}: {monster_text.get('trait_name')}\n"
    if monster_text.get("text") is not None:
        info += monster_text.get("text")
    prompt = get_information_description_prompt(info)
    response = get_response_from_model(prompt)
    monster_text["information_description"] = response
    
    print(monster_text["information_description"])

    enriched_data.append(monster_text)
    if len(enriched_data) % 100 == 0:
        print(f"Saving data at {len(enriched_data)}")
        with open('./monster_text_enriched.json', 'w') as f:
            json.dump(enriched_data, f)

 The unusual dietary habits of the Crystal Greatwyrm.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.47 ms /    16 runs   (    0.40 ms per token,  2472.19 tokens per second)
llama_print_timings: prompt eval time =   126.70 ms /    27 tokens (    4.69 ms per token,   213.11 tokens per second)
llama_print_timings:        eval time =   208.38 ms /    15 runs   (   13.89 ms per token,    71.98 tokens per second)
llama_print_timings:       total time =   360.24 ms
Llama.generate: prefix-match hit


 Attack patterns of the Crystal Greatwyrm.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.79 ms /    12 runs   (    0.40 ms per token,  2503.13 tokens per second)
llama_print_timings: prompt eval time =   128.34 ms /    31 tokens (    4.14 ms per token,   241.55 tokens per second)
llama_print_timings:        eval time =   152.66 ms /    11 runs   (   13.88 ms per token,    72.06 tokens per second)
llama_print_timings:       total time =   299.43 ms
Llama.generate: prefix-match hit


 The Crystal Greatwyrm's attack and damage statistics for its bite action.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.47 ms /    19 runs   (    0.39 ms per token,  2543.85 tokens per second)
llama_print_timings: prompt eval time =   141.30 ms /    76 tokens (    1.86 ms per token,   537.86 tokens per second)
llama_print_timings:        eval time =   253.31 ms /    18 runs   (   14.07 ms per token,    71.06 tokens per second)
llama_print_timings:       total time =   423.93 ms
Llama.generate: prefix-match hit


 Attack details of Crystal Greatwyrm's Claw.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.97 ms /    15 runs   (    0.40 ms per token,  2512.14 tokens per second)
llama_print_timings: prompt eval time =   146.13 ms /   122 tokens (    1.20 ms per token,   834.86 tokens per second)
llama_print_timings:        eval time =   199.97 ms /    14 runs   (   14.28 ms per token,    70.01 tokens per second)
llama_print_timings:       total time =   369.49 ms
Llama.generate: prefix-match hit


 The Crystal Greatwyrm's breath weapon attack that deals force damage in a 300-foot cone and knocks creatures prone upon failed saving throw.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    15.35 ms /    38 runs   (    0.40 ms per token,  2475.73 tokens per second)
llama_print_timings: prompt eval time =   198.05 ms /   140 tokens (    1.41 ms per token,   706.91 tokens per second)
llama_print_timings:        eval time =   531.92 ms /    37 runs   (   14.38 ms per token,    69.56 tokens per second)
llama_print_timings:       total time =   790.10 ms
Llama.generate: prefix-match hit


 The Crystal Greatwyrm's ability to use Mass Telekinesis, which allows it to target and move creatures and objects within a certain range, as well as



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    14.76 ms /    36 runs   (    0.41 ms per token,  2438.86 tokens per second)
llama_print_timings: prompt eval time =   235.66 ms /   246 tokens (    0.96 ms per token,  1043.85 tokens per second)
llama_print_timings:        eval time =   518.30 ms /    35 runs   (   14.81 ms per token,    67.53 tokens per second)
llama_print_timings:       total time =   809.94 ms
Llama.generate: prefix-match hit


 The legendary action of the Crystal Greatwyrm, which allows it to make a single Claw attack.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.61 ms /    24 runs   (    0.40 ms per token,  2497.40 tokens per second)
llama_print_timings: prompt eval time =   125.15 ms /    26 tokens (    4.81 ms per token,   207.75 tokens per second)
llama_print_timings:        eval time =   319.89 ms /    23 runs   (   13.91 ms per token,    71.90 tokens per second)
llama_print_timings:       total time =   482.61 ms
Llama.generate: prefix-match hit


 The Crystal Greatwyrm's psionic abilities and their associated actions.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.26 ms /    18 runs   (    0.40 ms per token,  2480.02 tokens per second)
llama_print_timings: prompt eval time =   129.00 ms /    36 tokens (    3.58 ms per token,   279.08 tokens per second)
llama_print_timings:        eval time =   236.72 ms /    17 runs   (   13.92 ms per token,    71.81 tokens per second)
llama_print_timings:       total time =   393.42 ms
Llama.generate: prefix-match hit


 The powerful attack of the Crystal Greatwyrm using its Psychic Beam ability.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.64 ms /    19 runs   (    0.40 ms per token,  2485.93 tokens per second)
llama_print_timings: prompt eval time =   145.45 ms /   102 tokens (    1.43 ms per token,   701.28 tokens per second)
llama_print_timings:        eval time =   254.87 ms /    18 runs   (   14.16 ms per token,    70.62 tokens per second)
llama_print_timings:       total time =   429.79 ms
Llama.generate: prefix-match hit


 The ability of Crystal Greatwyrms to change shape into other creatures.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.62 ms /    18 runs   (    0.42 ms per token,  2363.76 tokens per second)
llama_print_timings: prompt eval time =   140.96 ms /    71 tokens (    1.99 ms per token,   503.70 tokens per second)
llama_print_timings:        eval time =   239.44 ms /    17 runs   (   14.08 ms per token,    71.00 tokens per second)
llama_print_timings:       total time =   409.02 ms
Llama.generate: prefix-match hit


 The special ability of the Crystal Greatwyrm, which allows it to teleport to a nearby location.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.25 ms /    23 runs   (    0.40 ms per token,  2486.22 tokens per second)
llama_print_timings: prompt eval time =   128.73 ms /    39 tokens (    3.30 ms per token,   302.95 tokens per second)
llama_print_timings:        eval time =   306.94 ms /    22 runs   (   13.95 ms per token,    71.67 tokens per second)
llama_print_timings:       total time =   471.63 ms
Llama.generate: prefix-match hit


 The description and backstory of Crystal Greatwyrms and their connection to Sardior the Ruby Dragon.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.71 ms /    24 runs   (    0.40 ms per token,  2471.68 tokens per second)
llama_print_timings: prompt eval time =   200.98 ms /   158 tokens (    1.27 ms per token,   786.16 tokens per second)
llama_print_timings:        eval time =   331.30 ms /    23 runs   (   14.40 ms per token,    69.42 tokens per second)
llama_print_timings:       total time =   569.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.67 ms /    14 runs   (    0.40 ms per token,  2470.01 tokens per second)
llama_print_timings: prompt eval time =   146.72 ms /   112 tokens (    1.31 ms per token,   763.37 tokens per second)
llama_print_timings:        eval time =   185.35 ms /    13 runs   (   14.26 ms per token,    70.14 tokens per second)
llama_print_timings:       total time =   353.28 ms
Llama.generate: prefix-match hit


 The description and abilities of a Crystal Greatwyrm.
 Background information about Crystal Greatwyrms and their subspecies.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.04 ms /    15 runs   (    0.40 ms per token,  2482.21 tokens per second)
llama_print_timings: prompt eval time =   199.27 ms /   158 tokens (    1.26 ms per token,   792.89 tokens per second)
llama_print_timings:        eval time =   201.50 ms /    14 runs   (   14.39 ms per token,    69.48 tokens per second)
llama_print_timings:       total time =   424.40 ms
Llama.generate: prefix-match hit


 Attack details of Deep Dragon Wyrmling's bite action.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.77 ms /    17 runs   (    0.40 ms per token,  2509.60 tokens per second)
llama_print_timings: prompt eval time =   138.80 ms /    66 tokens (    2.10 ms per token,   475.51 tokens per second)
llama_print_timings:        eval time =   224.30 ms /    16 runs   (   14.02 ms per token,    71.33 tokens per second)
llama_print_timings:       total time =   389.32 ms
Llama.generate: prefix-match hit


 The dragon's ability to breathe nightmare spores and cause fear in creatures within its cone attack range.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    11.26 ms /    28 runs   (    0.40 ms per token,  2486.24 tokens per second)
llama_print_timings: prompt eval time =   197.55 ms /   149 tokens (    1.33 ms per token,   754.23 tokens per second)
llama_print_timings:        eval time =   388.88 ms /    27 runs   (   14.40 ms per token,    69.43 tokens per second)
llama_print_timings:       total time =   630.94 ms
Llama.generate: prefix-match hit


 The description of Deep Dragons and their lairs in the Underdark, as well as their ability to exhale magical spores.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    12.52 ms /    31 runs   (    0.40 ms per token,  2475.45 tokens per second)
llama_print_timings: prompt eval time =   141.84 ms /    81 tokens (    1.75 ms per token,   571.05 tokens per second)
llama_print_timings:        eval time =   423.99 ms /    30 runs   (   14.13 ms per token,    70.76 tokens per second)
llama_print_timings:       total time =   613.87 ms
Llama.generate: prefix-match hit


 The physical appearance and unique features of Deep Dragon Wyrmlings.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.09 ms /    15 runs   (    0.41 ms per token,  2464.67 tokens per second)
llama_print_timings: prompt eval time =   146.60 ms /    95 tokens (    1.54 ms per token,   648.03 tokens per second)
llama_print_timings:        eval time =   198.13 ms /    14 runs   (   14.15 ms per token,    70.66 tokens per second)
llama_print_timings:       total time =   368.53 ms
Llama.generate: prefix-match hit


 The description and personality traits of Deep Dragon Wyrmlings.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.50 ms /    16 runs   (    0.41 ms per token,  2460.78 tokens per second)
llama_print_timings: prompt eval time =   147.02 ms /    96 tokens (    1.53 ms per token,   652.97 tokens per second)
llama_print_timings:        eval time =   212.80 ms /    15 runs   (   14.19 ms per token,    70.49 tokens per second)
llama_print_timings:       total time =   384.32 ms
Llama.generate: prefix-match hit


 Information about Deep Dragon Wyrmlings, including personality traits, ideals, and spellcasting abilities.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    10.51 ms /    26 runs   (    0.40 ms per token,  2473.13 tokens per second)
llama_print_timings: prompt eval time =   128.71 ms /    61 tokens (    2.11 ms per token,   473.94 tokens per second)
llama_print_timings:        eval time =   351.10 ms /    25 runs   (   14.04 ms per token,    71.21 tokens per second)
llama_print_timings:       total time =   520.38 ms
Llama.generate: prefix-match hit


 The Deep Dragon Adventure Hooks table and its connection to the Deep Dragon Wyrmling.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.43 ms /    21 runs   (    0.40 ms per token,  2489.92 tokens per second)
llama_print_timings: prompt eval time =   127.32 ms /    29 tokens (    4.39 ms per token,   227.77 tokens per second)
llama_print_timings:        eval time =   278.48 ms /    20 runs   (   13.92 ms per token,    71.82 tokens per second)
llama_print_timings:       total time =   438.93 ms
Llama.generate: prefix-match hit


 The behavior and attitude of Deep Dragons towards other creatures.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.69 ms /    14 runs   (    0.41 ms per token,  2459.16 tokens per second)
llama_print_timings: prompt eval time =   128.80 ms /    61 tokens (    2.11 ms per token,   473.60 tokens per second)
llama_print_timings:        eval time =   182.27 ms /    13 runs   (   14.02 ms per token,    71.32 tokens per second)
llama_print_timings:       total time =   332.38 ms
Llama.generate: prefix-match hit


 The Dracohydra's ability to regrow lost heads and its resistance against various status effects.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.30 ms /    23 runs   (    0.40 ms per token,  2474.18 tokens per second)
llama_print_timings: prompt eval time =   234.29 ms /   193 tokens (    1.21 ms per token,   823.78 tokens per second)
llama_print_timings:        eval time =   319.62 ms /    22 runs   (   14.53 ms per token,    68.83 tokens per second)
llama_print_timings:       total time =   590.21 ms
Llama.generate: prefix-match hit


 The dracohydra's ability to have additional reactions for its extra heads.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.09 ms /    20 runs   (    0.40 ms per token,  2471.27 tokens per second)
llama_print_timings: prompt eval time =   129.71 ms /    42 tokens (    3.09 ms per token,   323.80 tokens per second)
llama_print_timings:        eval time =   265.34 ms /    19 runs   (   13.97 ms per token,    71.61 tokens per second)
llama_print_timings:       total time =   427.23 ms
Llama.generate: prefix-match hit


 One of its heads remains alert while it sleeps.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.84 ms /    12 runs   (    0.40 ms per token,  2478.31 tokens per second)
llama_print_timings: prompt eval time =   128.45 ms /    35 tokens (    3.67 ms per token,   272.48 tokens per second)
llama_print_timings:        eval time =   153.09 ms /    11 runs   (   13.92 ms per token,    71.86 tokens per second)
llama_print_timings:       total time =   300.22 ms
Llama.generate: prefix-match hit


 The Dracohydra's attack method.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.83 ms /    12 runs   (    0.40 ms per token,  2486.02 tokens per second)
llama_print_timings: prompt eval time =   128.28 ms /    33 tokens (    3.89 ms per token,   257.26 tokens per second)
llama_print_timings:        eval time =   152.82 ms /    11 runs   (   13.89 ms per token,    71.98 tokens per second)
llama_print_timings:       total time =   299.37 ms
Llama.generate: prefix-match hit


 The Dracohydra's bite attack and its ability to choose the type of damage it deals.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    10.03 ms /    25 runs   (    0.40 ms per token,  2493.02 tokens per second)
llama_print_timings: prompt eval time =   141.26 ms /    80 tokens (    1.77 ms per token,   566.33 tokens per second)
llama_print_timings:        eval time =   339.04 ms /    24 runs   (   14.13 ms per token,    70.79 tokens per second)
llama_print_timings:       total time =   519.58 ms
Llama.generate: prefix-match hit


 The Dracohydra's ability to breathe multicolored energy in a 60-foot cone and the effects of that breath on creatures within the area.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    15.79 ms /    39 runs   (    0.40 ms per token,  2470.70 tokens per second)
llama_print_timings: prompt eval time =   196.34 ms /   131 tokens (    1.50 ms per token,   667.22 tokens per second)
llama_print_timings:        eval time =   546.36 ms /    38 runs   (   14.38 ms per token,    69.55 tokens per second)
llama_print_timings:       total time =   803.39 ms
Llama.generate: prefix-match hit


 The origin and description of Dracohydra, including its breath weapon.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.92 ms /    17 runs   (    0.41 ms per token,  2454.87 tokens per second)
llama_print_timings: prompt eval time =   145.16 ms /   116 tokens (    1.25 ms per token,   799.14 tokens per second)
llama_print_timings:        eval time =   227.41 ms /    16 runs   (   14.21 ms per token,    70.36 tokens per second)
llama_print_timings:       total time =   399.19 ms
Llama.generate: prefix-match hit


 The destructive nature and feeding habits of Dracohydras.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.92 ms /    17 runs   (    0.41 ms per token,  2457.00 tokens per second)
llama_print_timings: prompt eval time =   141.61 ms /    79 tokens (    1.79 ms per token,   557.85 tokens per second)
llama_print_timings:        eval time =   226.50 ms /    16 runs   (   14.16 ms per token,    70.64 tokens per second)
llama_print_timings:       total time =   396.24 ms
Llama.generate: prefix-match hit


 The potential relationship between a dracohydra and its creator.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.49 ms /    16 runs   (    0.41 ms per token,  2464.95 tokens per second)
llama_print_timings: prompt eval time =   129.60 ms /    43 tokens (    3.01 ms per token,   331.79 tokens per second)
llama_print_timings:        eval time =   209.27 ms /    15 runs   (   13.95 ms per token,    71.68 tokens per second)
llama_print_timings:       total time =   363.43 ms
Llama.generate: prefix-match hit


 The effect of the Draconian's death throes on nearby creatures.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.30 ms /    18 runs   (    0.41 ms per token,  2466.09 tokens per second)
llama_print_timings: prompt eval time =   145.85 ms /   103 tokens (    1.42 ms per token,   706.20 tokens per second)
llama_print_timings:        eval time =   240.62 ms /    17 runs   (   14.15 ms per token,    70.65 tokens per second)
llama_print_timings:       total time =   415.04 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.70 ms /    14 runs   (    0.41 ms per token,  2455.71 tokens per second)
llama_print_timings: prompt eval time =   128.22 ms /    34 tokens (    3.77 ms per token,   265.17 tokens per second)
llama_print_timings:        eval time =   180.72 ms /    13 runs   (   13.90 ms per token,    71.93 tokens per second)
llama_print_timings:       total time =   330.83 ms
Llama.generate: prefix-match hit


 Attack options of the Draconian Dreadnought.
 The Draconian Dreadnought's weapon attack and its damage statistics.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.26 ms /    18 runs   (    0.40 ms per token,  2480.02 tokens per second)
llama_print_timings: prompt eval time =   128.75 ms /    60 tokens (    2.15 ms per token,   466.03 tokens per second)
llama_print_timings:        eval time =   238.50 ms /    17 runs   (   14.03 ms per token,    71.28 tokens per second)
llama_print_timings:       total time =   394.96 ms
Llama.generate: prefix-match hit


 The Draconian Dreadnought's tail attack and its effects on targets.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.69 ms /    19 runs   (    0.40 ms per token,  2472.35 tokens per second)
llama_print_timings: prompt eval time =   146.37 ms /    93 tokens (    1.57 ms per token,   635.38 tokens per second)
llama_print_timings:        eval time =   255.08 ms /    18 runs   (   14.17 ms per token,    70.57 tokens per second)
llama_print_timings:       total time =   430.52 ms
Llama.generate: prefix-match hit


 The Draconian Dreadnought's physical appearance, magical abilities, and method of death.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.74 ms /    24 runs   (    0.41 ms per token,  2463.31 tokens per second)
llama_print_timings: prompt eval time =   147.40 ms /   117 tokens (    1.26 ms per token,   793.77 tokens per second)
llama_print_timings:        eval time =   328.30 ms /    23 runs   (   14.27 ms per token,    70.06 tokens per second)
llama_print_timings:       total time =   515.26 ms
Llama.generate: prefix-match hit


 The origin and classification of Draconian Dreadnoughts on the world of Krynn.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.87 ms /    22 runs   (    0.40 ms per token,  2480.83 tokens per second)
llama_print_timings: prompt eval time =   129.52 ms /    39 tokens (    3.32 ms per token,   301.12 tokens per second)
llama_print_timings:        eval time =   293.56 ms /    21 runs   (   13.98 ms per token,    71.54 tokens per second)
llama_print_timings:       total time =   457.68 ms
Llama.generate: prefix-match hit


 The description and origin of Draconians.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.04 ms /    10 runs   (    0.40 ms per token,  2475.86 tokens per second)
llama_print_timings: prompt eval time =   146.73 ms /   111 tokens (    1.32 ms per token,   756.50 tokens per second)
llama_print_timings:        eval time =   127.81 ms /     9 runs   (   14.20 ms per token,    70.42 tokens per second)
llama_print_timings:       total time =   289.62 ms
Llama.generate: prefix-match hit


 The Draconian Dreadnought's ability to unleash magical violence after death.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.89 ms /    22 runs   (    0.40 ms per token,  2475.80 tokens per second)
llama_print_timings: prompt eval time =   128.84 ms /    38 tokens (    3.39 ms per token,   294.93 tokens per second)
llama_print_timings:        eval time =   293.48 ms /    21 runs   (   13.98 ms per token,    71.55 tokens per second)
llama_print_timings:       total time =   456.58 ms
Llama.generate: prefix-match hit


 The ability of a Draconian Foot Soldier to survive falls.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.47 ms /    16 runs   (    0.40 ms per token,  2472.19 tokens per second)
llama_print_timings: prompt eval time =   128.13 ms /    59 tokens (    2.17 ms per token,   460.45 tokens per second)
llama_print_timings:        eval time =   209.94 ms /    15 runs   (   14.00 ms per token,    71.45 tokens per second)
llama_print_timings:       total time =   362.93 ms
Llama.generate: prefix-match hit


 The death throes ability of a draconian foot soldier.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.64 ms /    14 runs   (    0.40 ms per token,  2480.51 tokens per second)
llama_print_timings: prompt eval time =   198.37 ms /   141 tokens (    1.41 ms per token,   710.81 tokens per second)
llama_print_timings:        eval time =   186.21 ms /    13 runs   (   14.32 ms per token,    69.81 tokens per second)
llama_print_timings:       total time =   406.80 ms
Llama.generate: prefix-match hit


 The Draconian Foot Soldier's combat actions.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.16 ms /    13 runs   (    0.40 ms per token,  2519.38 tokens per second)
llama_print_timings: prompt eval time =   125.47 ms /    27 tokens (    4.65 ms per token,   215.20 tokens per second)
llama_print_timings:        eval time =   166.54 ms /    12 runs   (   13.88 ms per token,    72.05 tokens per second)
llama_print_timings:       total time =   311.53 ms
Llama.generate: prefix-match hit


 The Draconian Foot Soldier's attack and damage statistics with a shortsword.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.89 ms /    20 runs   (    0.39 ms per token,  2533.89 tokens per second)
llama_print_timings: prompt eval time =   127.14 ms /    57 tokens (    2.23 ms per token,   448.33 tokens per second)
llama_print_timings:        eval time =   266.80 ms /    19 runs   (   14.04 ms per token,    71.21 tokens per second)
llama_print_timings:       total time =   424.54 ms
Llama.generate: prefix-match hit


 The description and abilities of Draconian Foot Soldiers.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.70 ms /    14 runs   (    0.41 ms per token,  2457.87 tokens per second)
llama_print_timings: prompt eval time =   197.85 ms /   145 tokens (    1.36 ms per token,   732.86 tokens per second)
llama_print_timings:        eval time =   186.72 ms /    13 runs   (   14.36 ms per token,    69.62 tokens per second)
llama_print_timings:       total time =   407.14 ms
Llama.generate: prefix-match hit


 The origin and classification of Draconian Foot Soldiers on the world of Krynn.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.03 ms /    20 runs   (    0.40 ms per token,  2491.28 tokens per second)
llama_print_timings: prompt eval time =   128.68 ms /    37 tokens (    3.48 ms per token,   287.54 tokens per second)
llama_print_timings:        eval time =   265.80 ms /    19 runs   (   13.99 ms per token,    71.48 tokens per second)
llama_print_timings:       total time =   425.47 ms
Llama.generate: prefix-match hit


 The origin and characteristics of Draconians.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.10 ms /    10 runs   (    0.41 ms per token,  2439.62 tokens per second)
llama_print_timings: prompt eval time =   146.30 ms /   111 tokens (    1.32 ms per token,   758.73 tokens per second)
llama_print_timings:        eval time =   128.34 ms /     9 runs   (   14.26 ms per token,    70.13 tokens per second)
llama_print_timings:       total time =   290.91 ms
Llama.generate: prefix-match hit


 The explosive death of draconians.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.03 ms /    10 runs   (    0.40 ms per token,  2482.62 tokens per second)
llama_print_timings: prompt eval time =   129.51 ms /    38 tokens (    3.41 ms per token,   293.41 tokens per second)
llama_print_timings:        eval time =   125.84 ms /     9 runs   (   13.98 ms per token,    71.52 tokens per second)
llama_print_timings:       total time =   271.01 ms
Llama.generate: prefix-match hit


 The death throes ability of a draconian when it reaches 0 hit points.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.64 ms /    19 runs   (    0.40 ms per token,  2487.24 tokens per second)
llama_print_timings: prompt eval time =   198.35 ms /   142 tokens (    1.40 ms per token,   715.90 tokens per second)
llama_print_timings:        eval time =   257.80 ms /    18 runs   (   14.32 ms per token,    69.82 tokens per second)
llama_print_timings:       total time =   486.22 ms
Llama.generate: prefix-match hit


 The draconian's ability to survive falls and move while falling.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.85 ms /    17 runs   (    0.40 ms per token,  2480.30 tokens per second)
llama_print_timings: prompt eval time =   141.19 ms /    72 tokens (    1.96 ms per token,   509.96 tokens per second)
llama_print_timings:        eval time =   224.95 ms /    16 runs   (   14.06 ms per token,    71.13 tokens per second)
llama_print_timings:       total time =   392.26 ms
Llama.generate: prefix-match hit


 The Draconian Infiltrator's special ability to poison and paralyze its target with a successful multiattack.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    11.29 ms /    28 runs   (    0.40 ms per token,  2480.95 tokens per second)
llama_print_timings: prompt eval time =   143.99 ms /    88 tokens (    1.64 ms per token,   611.16 tokens per second)
llama_print_timings:        eval time =   382.67 ms /    27 runs   (   14.17 ms per token,    70.56 tokens per second)
llama_print_timings:       total time =   569.45 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    12.50 ms /    31 runs   (    0.40 ms per token,  2479.21 tokens per second)
llama_print_timings: prompt eval time =   141.81 ms /    82 tokens (    1.73 ms per token,   578.23 tokens per second)
llama_print_timings:        eval time =   424.47 ms /    30 runs   (   14.15 ms per token,    70.68 tokens per second)
llama_print_timings:       total time =   614.08 ms
Llama.generate: prefix-match hit


 The Draconian Infiltrator's attack with a dagger, including its attack bonus, hit range, damage, and poison damage.
 The origin and abilities of Draconian Infiltrators.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.08 ms /    15 runs   (    0.41 ms per token,  2467.11 tokens per second)
llama_print_timings: prompt eval time =   146.66 ms /   113 tokens (    1.30 ms per token,   770.51 tokens per second)
llama_print_timings:        eval time =   199.71 ms /    14 runs   (   14.26 ms per token,    70.10 tokens per second)
llama_print_timings:       total time =   370.19 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.88 ms /    22 runs   (    0.40 ms per token,  2478.31 tokens per second)
llama_print_timings: prompt eval time =   129.79 ms /    39 tokens (    3.33 ms per token,   300.49 tokens per second)
llama_print_timings:        eval time =   294.35 ms /    21 runs   (   14.02 ms per token,    71.34 tokens per second)
llama_print_timings:       total time =   459.67 ms
Llama.generate: prefix-match hit


 The origin and subspecies of Draconian Infiltrators on the world of Krynn.
 The description and origin of Draconians.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.06 ms /    10 runs   (    0.41 ms per token,  2464.88 tokens per second)
llama_print_timings: prompt eval time =   146.70 ms /   111 tokens (    1.32 ms per token,   756.66 tokens per second)
llama_print_timings:        eval time =   128.35 ms /     9 runs   (   14.26 ms per token,    70.12 tokens per second)
llama_print_timings:       total time =   291.63 ms
Llama.generate: prefix-match hit


 The explosive death of draconians and the magical violence that follows.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.89 ms /    17 runs   (    0.41 ms per token,  2466.99 tokens per second)
llama_print_timings: prompt eval time =   128.52 ms /    38 tokens (    3.38 ms per token,   295.68 tokens per second)
llama_print_timings:        eval time =   223.17 ms /    16 runs   (   13.95 ms per token,    71.69 tokens per second)
llama_print_timings:       total time =   377.89 ms
Llama.generate: prefix-match hit


 The death throes ability of a draconian mage when it reaches 0 hit points.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.50 ms /    21 runs   (    0.40 ms per token,  2469.72 tokens per second)
llama_print_timings: prompt eval time =   142.65 ms /    91 tokens (    1.57 ms per token,   637.91 tokens per second)
llama_print_timings:        eval time =   282.45 ms /    20 runs   (   14.12 ms per token,    70.81 tokens per second)
llama_print_timings:       total time =   457.57 ms
Llama.generate: prefix-match hit


 The ability of a Draconian Mage to survive falls and glide.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.20 ms /    18 runs   (    0.40 ms per token,  2500.69 tokens per second)
llama_print_timings: prompt eval time =   140.84 ms /    72 tokens (    1.96 ms per token,   511.22 tokens per second)
llama_print_timings:        eval time =   239.26 ms /    17 runs   (   14.07 ms per token,    71.05 tokens per second)
llama_print_timings:       total time =   407.80 ms
Llama.generate: prefix-match hit


 Attack options for a Draconian Mage.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.83 ms /    12 runs   (    0.40 ms per token,  2481.90 tokens per second)
llama_print_timings: prompt eval time =   128.26 ms /    32 tokens (    4.01 ms per token,   249.50 tokens per second)
llama_print_timings:        eval time =   153.38 ms /    11 runs   (   13.94 ms per token,    71.72 tokens per second)
llama_print_timings:       total time =   300.92 ms
Llama.generate: prefix-match hit


 The Draconian Mage's trident attack.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.21 ms /    13 runs   (    0.40 ms per token,  2494.24 tokens per second)
llama_print_timings: prompt eval time =   143.86 ms /    98 tokens (    1.47 ms per token,   681.22 tokens per second)
llama_print_timings:        eval time =   170.23 ms /    12 runs   (   14.19 ms per token,    70.49 tokens per second)
llama_print_timings:       total time =   334.66 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.40 ms /    21 runs   (    0.40 ms per token,  2500.89 tokens per second)
llama_print_timings: prompt eval time =   130.43 ms /    59 tokens (    2.21 ms per token,   452.37 tokens per second)
llama_print_timings:        eval time =   280.41 ms /    20 runs   (   14.02 ms per token,    71.33 tokens per second)
llama_print_timings:       total time =   442.96 ms
Llama.generate: prefix-match hit


 The Draconian Mage's ranged attack that deals necrotic damage.
 The magical abilities and physical characteristics of Draconian Mages.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.50 ms /    16 runs   (    0.41 ms per token,  2463.43 tokens per second)
llama_print_timings: prompt eval time =   142.94 ms /    87 tokens (    1.64 ms per token,   608.67 tokens per second)
llama_print_timings:        eval time =   211.57 ms /    15 runs   (   14.10 ms per token,    70.90 tokens per second)
llama_print_timings:       total time =   379.45 ms
Llama.generate: prefix-match hit


 The explosion of magical splinters upon the death of a draconian mage.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.53 ms /    21 runs   (    0.41 ms per token,  2463.34 tokens per second)
llama_print_timings: prompt eval time =   130.01 ms /    45 tokens (    2.89 ms per token,   346.12 tokens per second)
llama_print_timings:        eval time =   279.67 ms /    20 runs   (   13.98 ms per token,    71.51 tokens per second)
llama_print_timings:       total time =   442.11 ms
Llama.generate: prefix-match hit


 The origin and classification of draconian mages on the fictional world of Krynn.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.44 ms /    21 runs   (    0.40 ms per token,  2489.63 tokens per second)
llama_print_timings: prompt eval time =   128.46 ms /    36 tokens (    3.57 ms per token,   280.25 tokens per second)
llama_print_timings:        eval time =   278.74 ms /    20 runs   (   13.94 ms per token,    71.75 tokens per second)
llama_print_timings:       total time =   439.90 ms
Llama.generate: prefix-match hit


 The origin and description of Draconians.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.05 ms /    10 runs   (    0.41 ms per token,  2467.31 tokens per second)
llama_print_timings: prompt eval time =   146.86 ms /   111 tokens (    1.32 ms per token,   755.81 tokens per second)
llama_print_timings:        eval time =   127.75 ms /     9 runs   (   14.19 ms per token,    70.45 tokens per second)
llama_print_timings:       total time =   290.08 ms
Llama.generate: prefix-match hit


 The fact that draconians explode when they die and release magical energy.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.30 ms /    18 runs   (    0.41 ms per token,  2465.08 tokens per second)
llama_print_timings: prompt eval time =   128.64 ms /    38 tokens (    3.39 ms per token,   295.40 tokens per second)
llama_print_timings:        eval time =   236.57 ms /    17 runs   (   13.92 ms per token,    71.86 tokens per second)
llama_print_timings:       total time =   392.92 ms
Llama.generate: prefix-match hit


 The draconian's ability to deal damage and cause the stunning effect when it dies.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.58 ms /    21 runs   (    0.41 ms per token,  2447.27 tokens per second)
llama_print_timings: prompt eval time =   200.22 ms /   158 tokens (    1.27 ms per token,   789.14 tokens per second)
llama_print_timings:        eval time =   287.36 ms /    20 runs   (   14.37 ms per token,    69.60 tokens per second)
llama_print_timings:       total time =   520.43 ms
Llama.generate: prefix-match hit


 One of the Draconian Mastermind's attack options.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     5.67 ms /    14 runs   (    0.41 ms per token,  2467.83 tokens per second)
llama_print_timings: prompt eval time =   127.58 ms /    29 tokens (    4.40 ms per token,   227.31 tokens per second)
llama_print_timings:        eval time =   180.46 ms /    13 runs   (   13.88 ms per token,    72.04 tokens per second)
llama_print_timings:       total time =   329.86 ms
Llama.generate: prefix-match hit


 The Draconian Mastermind's melee attack that deals slashing damage.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     7.69 ms /    19 runs   (    0.40 ms per token,  2472.35 tokens per second)
llama_print_timings: prompt eval time =   131.07 ms /    56 tokens (    2.34 ms per token,   427.26 tokens per second)
llama_print_timings:        eval time =   252.26 ms /    18 runs   (   14.01 ms per token,    71.35 tokens per second)
llama_print_timings:       total time =   412.20 ms
Llama.generate: prefix-match hit


 An attack made by the Draconian Mastermind that deals force damage.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.79 ms /    17 runs   (    0.40 ms per token,  2504.42 tokens per second)
llama_print_timings: prompt eval time =   131.11 ms /    56 tokens (    2.34 ms per token,   427.12 tokens per second)
llama_print_timings:        eval time =   224.00 ms /    16 runs   (   14.00 ms per token,    71.43 tokens per second)
llama_print_timings:       total time =   381.47 ms
Llama.generate: prefix-match hit


 The draconian's ability to breathe noxious gas in a 15-foot cone, causing poison damage and the level of exhaustion on a failed Constitution saving throw.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    16.67 ms /    41 runs   (    0.41 ms per token,  2459.66 tokens per second)
llama_print_timings: prompt eval time =   197.21 ms /   137 tokens (    1.44 ms per token,   694.69 tokens per second)
llama_print_timings:        eval time =   574.90 ms /    40 runs   (   14.37 ms per token,    69.58 tokens per second)
llama_print_timings:       total time =   836.85 ms
Llama.generate: prefix-match hit


 The Draconian Mastermind is a rare and powerful type of draconian that emerges from gold, red or amethyst dragon eggs. They are spellcasters and strategists who often serve as military commanders or advisors to their creators. They have a breath weapon - a poisonous gas cloud, formidable claws and teeth used in close combat, and are known for their intelligence and cunning.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    36.36 ms /    90 runs   (    0.40 ms per token,  2474.98 tokens per second)
llama_print_timings: prompt eval time =   197.57 ms /   137 tokens (    1.44 ms per token,   693.41 tokens per second)
llama_print_timings:        eval time =  1287.11 ms /    89 runs   (   14.46 ms per token,    69.15 tokens per second)
llama_print_timings:       total time =  1627.87 ms
Llama.generate: prefix-match hit


 The effect of the dying process on Draconian Masterminds and its impact on others.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.09 ms /    20 runs   (    0.40 ms per token,  2472.80 tokens per second)
llama_print_timings: prompt eval time =   130.46 ms /    47 tokens (    2.78 ms per token,   360.26 tokens per second)
llama_print_timings:        eval time =   265.97 ms /    19 runs   (   14.00 ms per token,    71.44 tokens per second)
llama_print_timings:       total time =   427.26 ms
Llama.generate: prefix-match hit


 The origin and background information about Aurak Draconians on the world of Krynn.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     8.08 ms /    20 runs   (    0.40 ms per token,  2474.94 tokens per second)
llama_print_timings: prompt eval time =   129.15 ms /    37 tokens (    3.49 ms per token,   286.49 tokens per second)
llama_print_timings:        eval time =   264.86 ms /    19 runs   (   13.94 ms per token,    71.74 tokens per second)
llama_print_timings:       total time =   424.68 ms
Llama.generate: prefix-match hit


 Background information about Draconians and their origin.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.46 ms /    11 runs   (    0.41 ms per token,  2466.92 tokens per second)
llama_print_timings: prompt eval time =   147.08 ms /   111 tokens (    1.33 ms per token,   754.71 tokens per second)
llama_print_timings:        eval time =   142.04 ms /    10 runs   (   14.20 ms per token,    70.40 tokens per second)
llama_print_timings:       total time =   306.50 ms
Llama.generate: prefix-match hit


 The violent nature of draconian deaths.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.46 ms /    11 runs   (    0.41 ms per token,  2469.14 tokens per second)
llama_print_timings: prompt eval time =   128.54 ms /    38 tokens (    3.38 ms per token,   295.63 tokens per second)
llama_print_timings:        eval time =   139.02 ms /    10 runs   (   13.90 ms per token,    71.93 tokens per second)
llama_print_timings:       total time =   284.26 ms
Llama.generate: prefix-match hit


 The draconic shard's defensive ability that grants it an AC bonus based on its Intelligence modifier.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    10.86 ms /    27 runs   (    0.40 ms per token,  2486.65 tokens per second)
llama_print_timings: prompt eval time =   128.81 ms /    33 tokens (    3.90 ms per token,   256.20 tokens per second)
llama_print_timings:        eval time =   361.43 ms /    26 runs   (   13.90 ms per token,    71.94 tokens per second)
llama_print_timings:       total time =   531.81 ms
Llama.generate: prefix-match hit


 The draconic shard's ability to pass through creatures and objects and the damage it takes when ending its turn inside an object.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    12.15 ms /    30 runs   (    0.41 ms per token,  2468.32 tokens per second)
llama_print_timings: prompt eval time =   140.12 ms /    66 tokens (    2.12 ms per token,   471.02 tokens per second)
llama_print_timings:        eval time =   408.61 ms /    29 runs   (   14.09 ms per token,    70.97 tokens per second)
llama_print_timings:       total time =   595.41 ms
Llama.generate: prefix-match hit


 The legendary resistance trait of a draconic shard and its ability to choose to succeed on a failed saving throw.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    10.46 ms /    26 runs   (    0.40 ms per token,  2484.71 tokens per second)
llama_print_timings: prompt eval time =   128.44 ms /    37 tokens (    3.47 ms per token,   288.07 tokens per second)
llama_print_timings:        eval time =   348.58 ms /    25 runs   (   13.94 ms per token,    71.72 tokens per second)
llama_print_timings:       total time =   517.36 ms
Llama.generate: prefix-match hit


 The draconic shard's ability to regenerate and leave a gemstone when it dies.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     9.24 ms /    23 runs   (    0.40 ms per token,  2488.91 tokens per second)
llama_print_timings: prompt eval time =   195.72 ms /   137 tokens (    1.43 ms per token,   700.00 tokens per second)
llama_print_timings:        eval time =   315.58 ms /    22 runs   (   14.34 ms per token,    69.71 tokens per second)
llama_print_timings:       total time =   546.62 ms
Llama.generate: prefix-match hit


 The unusual needs of a draconic shard.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     4.84 ms /    12 runs   (    0.40 ms per token,  2481.39 tokens per second)
llama_print_timings: prompt eval time =   128.09 ms /    31 tokens (    4.13 ms per token,   242.02 tokens per second)
llama_print_timings:        eval time =   152.76 ms /    11 runs   (   13.89 ms per token,    72.01 tokens per second)
llama_print_timings:       total time =   298.74 ms
Llama.generate: prefix-match hit


 The Draconic Shard's attack options and special ability.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =     6.06 ms /    15 runs   (    0.40 ms per token,  2473.61 tokens per second)
llama_print_timings: prompt eval time =   126.11 ms /    28 tokens (    4.50 ms per token,   222.02 tokens per second)
llama_print_timings:        eval time =   194.35 ms /    14 runs   (   13.88 ms per token,    72.03 tokens per second)
llama_print_timings:       total time =   343.44 ms
Llama.generate: prefix-match hit


 The Draconic Shard's telekinetic attack that deals force damage to a single target within reach or at range.



llama_print_timings:        load time =   262.94 ms
llama_print_timings:      sample time =    11.21 ms /    28 runs   (    0.40 ms per token,  2496.66 tokens per second)
llama_print_timings: prompt eval time =   140.57 ms /    70 tokens (    2.01 ms per token,   497.96 tokens per second)
llama_print_timings:        eval time =   380.13 ms /    27 runs   (   14.08 ms per token,    71.03 tokens per second)
llama_print_timings:       total time =   563.71 ms


ValueError: Requested tokens (541) exceed context window of 512