In [1]:
%set_env CMAKE_ARGS="-DLLAMA_CUBLAS=on"  
%pip install -U openai sentence-transformers tokenizers python-dotenv llama-cpp-python

env: CMAKE_ARGS="-DLLAMA_CUBLAS=on"
Collecting tokenizers
  Obtaining dependency information for tokenizers from https://files.pythonhosted.org/packages/57/bd/45b5ef6b088880779f70acf60027f7043ca5fa1b98f4a4345cf3aea09044/tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Note: you may need to restart the kernel to use updated packages.


In [19]:
import os
import openai
from dotenv import load_dotenv
from llama_cpp import Llama
import json

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY

GGUF_MODEL_PATH = "./model/nous-hermes-llama2-13b-q4.gguf"
# GGUF_MODEL_PATH = "./model/nous-hermes-llama-2-7b-q4.gguf"

MAX_CONTEXT = 4096

llm = Llama(model_path=GGUF_MODEL_PATH, n_gpu_layers=999, n_ctx=MAX_CONTEXT, verbose=False, use_mmap=False)


llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ./model/nous-hermes-llama2-13b-q4.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  5120, 32032,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight q5_K     [ 13824,  5120,    

In [20]:
def get_response_from_model(prompt):
        return llm(
                prompt, 
                max_tokens=MAX_CONTEXT, 
                # stop=["\n"], 
                echo=False, 
                temperature=0.5
               )["choices"][0]["text"]


In [21]:
def get_information_description_prompt(information):
    example_information = "Monster: Kobold\nTrait: Sunlight Sensitivity\nWhile in sunlight, the kobold has disadvantage on attack rolls, as well as on Wisdom (Perception) checks that rely on sight."
    example_result = "Information about the negative effects of sunlight on kobolds."
    example_information2 = "Monster: Aerosaur\nAerosaurs include several varieties of large reptiles with leathery wings, including pterodons and sunwings."
    example_result2 = "Background information about Aerosaurs and subspecies."
    return f"""
### Instruction:
You are a helpful assistant that assists with indexing information. Summarize the information that is contained in the following text. 
Make sure to include important context that may not be explicit in the information. The information is about Dungeons and Dragons.
Answer in a few sentences. Be concise.

### Input:
{example_information}

### Response:
{example_result}

### Input:
{example_information2}

### Response:
{example_result2}

### Input:
{information}

### Response:

            """

def get_information_retrieval_prompt(query):
    example_query = "How does sunlight effect kobolds?"
    example_result = "Effects of sunlight on kobolds."
    example_query2 = "What is an aerosaur?"
    example_result2 = "Description of an aerosaur."

    return f"""
### Instruction:
You are a helpful assistant that assists with retrieving information. Describe the information that is required to answer the following query relating to monsters.
Do not answer the following query, just describe the information that would be required to answer the query. Do not provide an answer, only a description. 
Answer in a few sentences. Be concise.

### Input:
{example_query}

### Response:
{example_result}

### Input:
{example_query2}

### Response:
{example_result2}

### Input:
{query}

### Response:

            """

info = "Monster:Octopus\nTrait: Hold Breath\nWhile out of water, the octopus can hold its breath for 1 hour."
prompt = get_information_description_prompt(info)
response = get_response_from_model(prompt)
print(response)

prompt = get_information_retrieval_prompt("How long can octopuses hold their breath out of water?")
response = get_response_from_model(prompt)
print(response)

 Information about an octopus' ability to hold its breath outside of water.
 Length of time octopuses can hold their breath out of water.


In [22]:
with open('./monster_text.json') as f:
    monster_texts = json.load(f)

with open('./monster_text_enriched.json') as ef:
    enriched_data = json.load(ef)

starting_from = len(enriched_data)

for index, monster_text in enumerate(monster_texts[starting_from:]):
    print(f"processing {index + starting_from}")
    monster_name = monster_text.get("monster_name")
    field = monster_text.get("field")
    trait_name = monster_text.get("trait_name")
    text = monster_text.get("text")

    info = ""
    if monster_name:
        info += f"Monster: {monster_name}\n"
    if field and trait_name:
        info += f"{field}: {trait_name}\n"
    if text:
        info += text
    
    prompt = get_information_description_prompt(info)
    response = get_response_from_model(prompt)
    monster_text["information_description"] = response
    
    print(response)

    enriched_data.append(monster_text)
    if len(enriched_data) % 50 == 0:
        print(f"Saving data at {len(enriched_data)}")
        with open('./monster_text_enriched.json', 'w') as f:
            json.dump(enriched_data, f)

# Save the final enriched data outside of the loop
with open('./monster_text_enriched.json', 'w') as f:
    json.dump(enriched_data, f)


processing 2800
 Background information about Walnut Dankgrass, her upbringing in an all-female clan of druids, healers, and rangers, and how tragedy struck her enclave.
processing 2801
 Information about Walnut Dankgrass's background and motivations.
processing 2802
 Information about Walnut's personality and beliefs.
processing 2803
 Information about Amonkhet Mummies and their background in the world of Magic: The Gathering.
processing 2804
 Information about the Amonkhet Mummy's role and the consequences of not being chosen by the God-Pharaoh.
processing 2805
 Information about the burial process of the Anointed in Amonkhet and their transformation into mummies after death.
processing 2806
 Information about the Curse of Wandering in the desert lands, the risks associated with dying in the desert, the existence of zombies, and the possibility of intelligent and magical Amonkhet Mummy Lords.
processing 2807
 Description of Amonkhet Sphinx's appearance and origin.
processing 2808
 In