In [1]:
import os
import json
import pickle
from textwrap import dedent

from tqdm.notebook import tqdm

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
import lancedb
import numpy as np
import pandas as pd
import torch


In [2]:
ARTEFACT_VERSION = '02'

In [3]:
ARTEFACT_ROOT_FOLDER = os.environ.get('ARTEFACT_ROOT_FOLDER', '/artefact')
ARTEFACT_FOLDER = os.path.join(ARTEFACT_ROOT_FOLDER, 'eberron', f'v{ARTEFACT_VERSION}')

In [4]:
HF_HOME = os.environ.get('HF_HOME')

# Load the Artefact

In [5]:
try:
    with open(os.path.join(ARTEFACT_FOLDER, 'model_metadata.json'), 'r') as f:
        model_metadata = json.load(f)
except FileNotFoundError:
    with open(os.path.join(ARTEFACT_FOLDER, 'model_metadata.pkl'), 'rb') as f:
        model_metadata = pickle.load(f)

assert model_metadata['embedding_model']['str'].startswith('SentenceTransformer')

In [6]:
if model_metadata['embedding_format'] == 'pickle':
    with open(os.path.join(ARTEFACT_FOLDER, 'embeddings.pkl'), 'rb') as f:
        embeddings = pickle.load(f)
elif model_metadata['embedding_format'] == 'lancedb':
    embeddings_folder = os.path.join(ARTEFACT_FOLDER, 'embeddings')
    db = lancedb.connect(embeddings_folder)
    table = db.open_table('documents')

In [7]:
if model_metadata['version'] == '01':
    # I used a JSON to store metadata only in artefact v01. v02 onwards, they are in lancedb fields
    with open(os.path.join(ARTEFACT_FOLDER, 'chunk_metadata.json'), 'r') as f:
        chunk_metadata = json.load(f)


In [8]:
if model_metadata['version'] == '01':
    file_names = [f for f in os.listdir(os.path.join(ARTEFACT_FOLDER, 'chunks')) if f.endswith('.md')]
    file_names = sorted(file_names)
    chunks = [None] * len(file_names)
    for file_name in tqdm(file_names):
        file_path = os.path.join(ARTEFACT_FOLDER, 'chunks', file_name)
        with open(file_path, 'r') as f:
            chunks[int(file_name.split('.')[0])] = f.read()

# Load the Embedding Model

In [9]:
embedding_model = SentenceTransformer(model_metadata['embedding_model']['name'], 
                                      trust_remote_code=True, 
                                      revision=model_metadata['embedding_model']['revision'])
embedding_model = embedding_model.to("cpu")
# embedding_model = embedding_model.to("cuda")

2025-01-22 18:58:26.649471: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-22 18:58:26.664779: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-22 18:58:26.683735: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-22 18:58:26.689485: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-22 18:58:26.703205: I tensorflow/core/platform/cpu_feature_guar

# Load the Language Model

In [10]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


In [11]:
model_provider = 'mistralai'
model_name = 'Mistral-7B-Instruct-v0.3'
model_revision = 'e0bc86c23ce5aae1db576c8cca6f06f1f73af2db'
model_id = f'{model_provider}/{model_name}'
model_path = os.path.join(HF_HOME, 'hub', f'models--{model_provider}--{model_name}', 'snapshots', model_revision)
if os.path.exists(model_path):
    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, quantization_config=bnb_config, device_map="auto")
else:
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [13]:
!nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv

memory.total [MiB], memory.used [MiB], memory.free [MiB]
15360 MiB, 4087 MiB, 11009 MiB


# Query the Language Model

In [41]:
# TODO: DO NOT CHANGE. Move to a common library.
def retrieve_baseline(q: str, model: SentenceTransformer, k: int=5, metric='cosine') -> pd.DataFrame:
    """
    Retrieves the top-k most similar documents from a LanceDB table based on a query string, 
    using vector search to compare the query embedding to precomputed embeddings in the table.

    Args:
        q (str): The query string for which similar documents are to be retrieved.
        model (SentenceTransformer): The SentenceTransformer model used to generate the embedding for the query.
        k (int, optional): The number of top similar documents to retrieve. Default is 5.
        metric: cosine, l2, dot or hamming.

    Returns:
        pd.DataFrame: A Pandas DataFrame containing the top-k most similar documents, including their metadata 
                      and similarity scores. The columns will include the document text and any associated 
                      metadata fields (e.g., similarity score, etc.).
    
    Example:
        query = "What is the history of Eberron?"
        model = SentenceTransformer('all-MiniLM-L6-v2')
        results = retrieve_baseline(query, model)
        print(results)
    """
    embedding = model.encode(q, normalize_embeddings=True)
    results = table.search(embedding).metric(metric)

    return results.limit(k).to_pandas()


In [34]:
# Required globals: model, embedding_model, tokenizer, retreive. I might also need a score or k or other parameters.
retrieve = retrieve_baseline
def get_info(q: str) -> str:
    results = retrieve(q, embedding_model, 20, 'cosine')

    retrieved_text = dedent("""
    -----
    """).join(results['text'])

    prompt = f"""

    [INST]
    Use the following information (until the final cutoff =====) to answer the user query Q below.
    Prefer information closer to the top.

    {retrieved_text}

    =====

    Q:
    {q}

    A:
    [/INST]
    """

    model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    generated_ids = model.generate(**model_inputs, 
                                   max_new_tokens=1024, 
                                   do_sample=True, 
                                   eos_token_id=tokenizer.eos_token_id)

    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return (response.split('A:')[1] if 'A:' in response else response).strip()

    

In [32]:
!nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv

memory.total [MiB], memory.used [MiB], memory.free [MiB]
15360 MiB, 9633 MiB, 5463 MiB


In [35]:
q = "Tell me about the languages of Eberron."
q = "Create a House Cannith item."
q = "Tell me about the rivers of Khorvaire."
response = get_info(q)
print(response)

[2025-01-21T01:38:29Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:38:29Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:38:29Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:38:29Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. Cyre River: This river separates Karrlakton from the Mournland. It played a significant role in the history of Karrnath, especially during the reign of King Kaius III, who was born on its banks.

    2. Dagger River: A significant river in south-western Khorvaire, marked by unpredictable flows and quicksands along its banks. It separates Valenar and the Talenta Plains.

    3. Eldeen River: A major waterway that runs through Eldeen Reaches, emptying into the Thunder Sea. Its source is hidden in the Wood of Shadows, making it difficult to manage the river's flow.

    4. Galifar's Straits: These straits connect the Bitter Sea and the Lhazaar Sea, with the region of Shargon's Teeth being the most common route from Sharn to Stormreach in Xen'drik. These straits are infested with sea devils.

    5. Krakos River: A river in southern Khorvaire that forms part of the border between Thrane and Aundair, leading from Lake Galifar to the Thunder Sea. It's a significant commercial route, with 

In [90]:
q = "Tell me about the rivers of Khorvaire."
response = get_info(q)
print(response)

[2025-01-20T04:19:41Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:19:41Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:19:41Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:19:41Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"While Khorvaire is predominantly landlocked, the continental landscape contains significant waterways that are integral to civilization. The following are some prominent rivers of Khorvaire, listed by nation:\n\n    1. Aundair:\n      a. The Sovereign River—a vital watercourse flowing into the Sea of Rage, it was used as a route for transporting goods and soldiers from the city of Aundair (the capital) to the front lines during the Last War.\n\n    2. Breland:\n      a. The Mantle—It begins in Vordalay Forest, passes through the metropolis of Zeravik, and flows down to the Lhazaar Sea.\n      b. The Thorn—It arises near the border with Aundair, meanders through the Arbor in Breland's fertile plains, draining into the Lhazaar Sea.\n\n    3. Cyran Empire:\n      a. Cyre River—Though devastated by the Mournland's explosion, the Cyre River was once a significant river linking the empire of Cyre to the Lhazaar Sea. Today, its waters have become part of the Mournland.\n\n    4. Droaam:\n   

In [93]:
print(response)

While Khorvaire is predominantly landlocked, the continental landscape contains significant waterways that are integral to civilization. The following are some prominent rivers of Khorvaire, listed by nation:

    1. Aundair:
      a. The Sovereign River—a vital watercourse flowing into the Sea of Rage, it was used as a route for transporting goods and soldiers from the city of Aundair (the capital) to the front lines during the Last War.

    2. Breland:
      a. The Mantle—It begins in Vordalay Forest, passes through the metropolis of Zeravik, and flows down to the Lhazaar Sea.
      b. The Thorn—It arises near the border with Aundair, meanders through the Arbor in Breland's fertile plains, draining into the Lhazaar Sea.

    3. Cyran Empire:
      a. Cyre River—Though devastated by the Mournland's explosion, the Cyre River was once a significant river linking the empire of Cyre to the Lhazaar Sea. Today, its waters have become part of the Mournland.

    4. Droaam:
      a. The Cog—

In [94]:
q = "Tell me about fashion in the five nations."
response = get_info(q)
print(response)

[2025-01-20T04:24:22Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:24:22Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:24:22Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:24:22Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. During the Last War, the fashion styles of the Five Nations evolved to distinguish their soldiers and improve their tools of war. Common soldiers typically wore light, medium, or heavy armor made of leather, metal, or a combination of both.

    2. Elite forces, officers, and mercenaries used different styles and materials. Each nation had its own distinct approach to fashion, both in its armor and civilian clothing.

    3. Beyond practical considerations, adventurers might choose to wear specific clothing as a cultural expression. For example, a Cyran might wear gloves and masks, a Karrn might wear gazyrs sewn onto leather armor, an Aundairian might wear a cloak with strange proportions, a Thrane might wear clothes covered in embroidered patches, an Aundairian might wear clothes covered in embroidered patches, and a Brel might wear pointy-toed shoes and long puffy sleeves.

    4. Many contemporary fashion designers now produce lines of fashion specifically for adventurers.

    5

In [97]:
q = "Tell me about fashion in the five nations."
response = get_info(q)
print(response)

[2025-01-20T04:25:16Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:25:16Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:25:16Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:25:16Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. The common armor of today for the Five Nations include light, medium, and heavy armor. These typically involve a metal helmet, breastplate for medium armor, and leather or chainmail for light armor. However, each nation has its distinct approach to fashion both in its armor and civilian clothing.

    2. Adventurers from the Five Nations often represent their homeland in their clothing, although not always for practicality. Adventurers are an important part of each nation, with adventuring guilds and independents playing crucial roles in their cultures.

    3. The Church of the Silver Flame, one of the most significant religions in the Five Nations, is recognized by its delicately embroidered, silver-dyed denim robes. Their vestments are sewn with extra space for mobility, and are usually closed with toggles rather than fasteners.

    4. In Middle Tavick's Landing, one can find fusion of Brelish lapels, Thrane cloaks, and other styles, as it serves as the melting pot of cultures i

In [99]:
q = "Suggest an outfit for a dragonmarked heir."
response = get_info(q)
print(response)

[2025-01-20T04:26:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:26:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:26:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:26:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. Light Armor: Because the dragonmark heir does not have proficiency in heavy armor, a light armor will offer better protection while being able to move easily. Many Mror wear decorative armor that uses the statistics of light armor, but evokes the general flavor of a heavier breastplate. This will allow the heir to retain the martial aspect to their attire that the Mror generally favor, but still be able to move freely.

    2. Robe or Tunic: To balance out the armor, a long robe or tunic can be used as the top garment. This will not only provide a more elegant look, but also offer more coverage for the dragonmark on their arm/chest.

    3. Sash or Belt: A sash or belt can be worn around the waist to hold up the robe/tunic and also add a bit of color and personality to the outfit.

    4. Boots: Boots would be a practical choice for footwear, as they offer protection and support for the feet while moving.

    5. Accessories: To complete the outfit, the heir could wear various acces

In [100]:
!nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv

memory.total [MiB], memory.used [MiB], memory.free [MiB]
15360 MiB, 10967 MiB, 4129 MiB


In [36]:
def get_one_word_answer(q: str) -> str:
    results = retrieve(q, embedding_model, 5, 'cosine')

    retrieved_text = dedent("""
    -----
    """).join(results['text'])

    prompt = f"""
    [INST]
    Use the following information (until the final cutoff =====) to answer the user query Q below.
    The answer has to be one word only.
    You are trying to get the most accurate answer in one shot.

    {retrieved_text}

    =====

    Q:
    {q}

    A:
    [/INST]
    """
    print(len(prompt))

    model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    generated_ids = model.generate(**model_inputs, 
                                   max_length=50,
                                   max_new_tokens=256, 
                                   do_sample=True, 
                                   eos_token_id=tokenizer.eos_token_id)

    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return (response.split('A:\n')[1] if 'A:\n' in response else response).strip()


In [37]:
q = "What is the name of the Breland veterans community in Quickstone?"
response = get_one_word_answer(q)
print(response)

[2025-01-21T01:40:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:40:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:40:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:40:31Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` w

8460
Tents


In [None]:
q_and_a = [
    ("What is the name of the prominent university in Sharn?", "Morgrave"),
    ("What type of magic does Karrnath use in its military?", "Necromancy"),
    ("Who leads Kind's Dark Lanterns?", "Vron"),
    ("Who leads King's Dark Lanterns?", "Vron"),
    ("What is the name of the Breland veterans community in Quickstone?", "Tents"),
]

In [38]:
# Required globals: model, embedding_model, tokenizer, retreive. I might also need a score or k or other parameters.
retrieve = retrieve_baseline
def generate_character(q: str) -> str:
    results = retrieve(q, embedding_model, 10, 'cosine')

    retrieved_text = dedent("""
    -----
    """).join(results['text'])

    prompt = f"""
    [INST]
    Use the following information (until the final cutoff =====) to create a character based on the 5th Edition rules for the world of Eberron.
    Use the prompt P as a starting point.
    Give the complete character sheet.
    A complete character sheet should include name, race, class, level, attributes, proficiencies based on the class and fifth edition rules, 
    proficiency bonus based on class and level, saving throws bonus based on class, feats and proficiency , hit points based on class and level, 
    weapons and equipment, attack bonus for each weapon based on abilities, any race or class based features, class and level, attack roll, 
    and equipment, armor class based on dexterity and armor, and if the character has spellcasting ability, spells based on class and level, 
    as well as the number of spell slots. 
    Include an inventory based on their level, class, and race, with a few personal touches in the items.
    
    Also include a compelling and detailed backstory of two paragraphs related to the world of Eberron. 
    This background should include their nation.
    Include another short paragraph about their intentions.
    Include one other short paragraph about their tactics in combat.
    Finally, include one paragraph for the visuals.

    {retrieved_text}

    =====

    Prompt P:
    {q}

    Character:
    [/INST]
    """

    model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    generated_ids = model.generate(**model_inputs, 
                                   max_new_tokens=2048, 
                                   do_sample=True, 
                                   eos_token_id=tokenizer.eos_token_id)

    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return (response.split('Character:\n')[1] if 'Character:\n' in response else response).strip()

    

In [103]:
character = generate_character("War veteran")
print(character)

[2025-01-20T04:34:02Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:34:02Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:34:02Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:34:02Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


8. Character: Caelum "Cal" Iarwain
    9. Race: Human
    10. Class: Ranger 1
    11. Attributes (5e standard array): Str 12, Dex 14, Con 10, Int 8, Wis 12, Cha 10
    11. Class Abilities:
        1. Starting Proficiencies: Perception, Survival
        2. Level 1 Fighting Style: Archery
        3. Ranger Spells: Cat's Grace, Hunter's Mark
        4. Favored Enemy: Orcs
        5. Natural Explorer: Swamp, Urban
        6. Skills: Climb, Dexterity, Intelligence, Investigation, Nature, Navigation, Survival, Persuasion
        7. Tools: None
    12. Proficiency Bonus (5e default): +2
    13. Saving Throws:
        1. Wisdom (Race)
        2. Dexterity (Class)
    14. Feats & Levels: None
    15. Hit Points: 10 (Levels: 1 Ranger) + Hit Die (next level: 1d10)
    16. Weapons & Equipment:
        1. Shortbow
        2. Quiver (20 arrows)
        3. Short Sword
        4. Leather Armor
    17. Attack Bonus (Per Stat):
        1. Shortbow: +5 to hit (1d8 damage)
        2. Short Sword: +5 to hi

In [40]:
character = generate_character("Comin relief, similar to C3p0")
print(character)

[2025-01-21T01:43:24Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:43:24Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:43:24Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:43:24Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Name: RedLight Disiuct p
     Race: Golem (Grey Iron)
     Class: Sorcerer (Harbinger of Faith)
     Level: 5
     Attributes:
       Strength: 14 (+2)
       Dexterity: 12 (+1)
       Constitution: 14 (+2)
       Intelligence: 16 (+3)
       Wisdom: 8 (-1)
       Charisma: 18 (+4)
     Proficiencies: Arcana, History, Insight, Religion
     Proficiency Bonus: +3
     Saving Throws: Wisdom (+0), Charisma (+6)
     Feats and proficiencies:
       - Mage Armor (1 level sorcerer)
       - Spellcasting (5 levels sorcerer)
     Hit Points: 37 (5d8 for Golem + 21 (5d6 + 5 from sorcerer levels))
     Weapons and equipment:
       - Dagger (attack roll +5)
       - Quarterstaff (attack roll +4)
       - Tindertwig (no attack bonus, helps with igniting objects)
       - A necklace with a charm of healing (1d4 + 2 HPs)
       - A pouch containing 2d4 sprigs of Pacifycus, used to reduce fevers
       - A small wooden effigy of Sovereign Host, used as an additional focus in spells
     Armor Class:

In [39]:
character = generate_character("War veteran 3rd level")
print(character)

[2025-01-21T01:41:52Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:41:52Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:41:52Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-21T01:41:52Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Name: Cameron Calamity Jones
     Race: Human
     Class: Wandslinger (Blaster)
     Level: 3

    Attributes:
     - Strength (Str): 10 (+0)
     - Dexterity (Dex): 14 (+2)
     - Constitution (Con): -1 (-2)
     - Intelligence (Int): 14 (+2)
     - Wisdom (Wis): 12 (+1)
     - Charisma ( Cha): 10 (+0)

    Proficiencies:
     - Armor: Light armor, medium armor, shields
     - Weapons: Simple weapons, martial weapons
     - Skills: Arcana +4, Athletics +2, Medicine +3

    Proficiency Bonus: +2
     Saving Throws Bonus: Dex +4, Con +4

    Hit Points: 39 (6d10 + 12)

    Equipment:
     - Chain Shirt
     - Heavy Crossbow with 20 bolts
     - Two Longswords
     - Studded Leather Armor
     - Dungeoneer's pack
     - Leather armor, shield, and 14 bolts (equipped)
     - A wand (your magical focus) that can cast spells from the Enchantment and Evocation spell lists

    Attack Bonus:
     - Longsword: +4
     - Heavy Crossbow: +3

    Class Features:
     - Blaster: Your choice grants 

In [108]:
character = generate_character("5th level gunslinger")
print(character)

[2025-01-20T04:42:21Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:42:21Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:42:21Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
[2025-01-20T04:42:21Z WARN  lance_core::utils::tokio] Number of CPUs is less than or equal to the number of IO core reservations. This is not a supported configuration. using 1 CPU for compute intensive tasks.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


5th-level Wandslinger (Blaster)
    Medium humanoid (human)
    Str 10, Dex 14, Con 14, Int 16, Wis 12, Cha 10,
    Saving throws Dex +5, Con +5
    Skills Arcana +6, Athletics +3, Medicine +4
    Hit Dice: 5d10
    Proficiencies: Light armor, Medium armor, Shields; Simple weapons, Martial weapons
    Senses: passive Perception 11
    Languages: Common, Dwarvish

    Attributes:
    Attack bonus (longbow): +7
    Armor Class: 17

    Special Features:
    True Shot (Gunslinger): You can take a −4 penalty to the attack roll to gain a +10 bonus to the damage roll for that attack.
    Assess Foe (Gunslinger): By taking a minute of time to examine your adversary, you can learn information about the adversary and make special moves to gain an advantage in battle.
    Magic Ammunition (Gunslinger): You learned to cast spells as a bonus action without using a spell slot and can now imbue your quiver of arrows with a magical power. When you activate this power, select one spell from the list b

# Test Langchain

In [21]:
!pip install langchain-huggingface --upgrade

[0m

In [25]:
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_core import messages

In [23]:
!nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv

memory.total [MiB], memory.used [MiB], memory.free [MiB]
15360 MiB, 4087 MiB, 11009 MiB


['AIMessage',
 'AIMessageChunk',
 'AnyMessage',
 'BaseMessage',
 'BaseMessageChunk',
 'ChatMessage',
 'ChatMessageChunk',
 'FunctionMessage',
 'FunctionMessageChunk',
 'HumanMessage',
 'HumanMessageChunk',
 'InvalidToolCall',
 'MessageLikeRepresentation',
 'RemoveMessage',
 'SystemMessage',
 'SystemMessageChunk',
 'ToolCall',
 'ToolCallChunk',
 'ToolMessage',
 'ToolMessageChunk',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_message_from_dict',
 'ai',
 'base',
 'chat',
 'convert_to_messages',
 'convert_to_openai_messages',
 'filter_messages',
 'function',
 'get_buffer_string',
 'human',
 'merge_content',
 'merge_message_runs',
 'message_chunk_to_message',
 'message_to_dict',
 'messages_from_dict',
 'messages_to_dict',
 'modifier',
 'system',
 'tool',
 'trim_messages',
 'utils']

In [None]:

character_prompter = HuggingFacePipeline.from_model_id(
    model_id=model_path,
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    ),
)



In [None]:

character_generator = HuggingFacePipeline.from_model_id(
    model_id=model_path,
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    ),
)



In [None]:
# from langchain_huggingface.llms import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# model_id = "gpt2"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
# hf = HuggingFacePipeline(pipeline=pipe)

In [None]:
# from langchain_core.prompts import PromptTemplate

# template = """Question: {question}

# Answer: Let's think step by step."""
# prompt = PromptTemplate.from_template(template)

# chain = prompt | hf

# question = "What is electroencephalography?"

# print(chain.invoke({"question": question}))