In [1]:
pip install gradio

Collecting gradio
  Downloading gradio-5.8.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.1 (from gradio)
  Downloading gradio_client-1.5.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [2]:
pip install unsloth

Collecting unsloth
  Downloading unsloth-2024.12.4-py3-none-any.whl.metadata (59 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/59.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2024.11.8 (from unsloth)
  Downloading unsloth_zoo-2024.12.1-py3-none-any.whl.metadata (16 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.2-py3-none-any.whl.metadata (9.4 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Downloadi

In [3]:
import gradio as gr
from unsloth import FastLanguageModel
import torch

# Initialize the fine-tuned model
model_name = "skapl/lora_model"
max_seq_length = 512
dtype = torch.float16
load_in_4bit = True

# Load the fine-tuned Dungeon Master model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
# Enable inference optimizations for the model
FastLanguageModel.for_inference(model)

def respond_to_player(input_text: str, history: list):
    """
    Generate a Dungeon Master's response based on the player's action or input.
    """
    # Build the conversation history
    system_message = (
        "You are the Dungeon Master in a Dungeons & Dragons game. Guide the player through an adventure, "
        "describe environments, interact with characters, and generate the world-building narrative."
    )
    messages = [{"role": "system", "content": system_message}]

    for user_msg, bot_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})

    # Add the player's current input
    messages.append({"role": "user", "content": input_text})

    # Tokenize inputs
    tokenized_inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    # Generate the Dungeon Master's response
    generated_ids = model.generate(
        input_ids=tokenized_inputs,
        max_new_tokens=400,
        temperature=0.8
    )

    # Decode the generated text
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    # Extract only the assistant's reply
    assistant_reply = generated_text.split("assistant")[-1].strip()

    # Return the response and updated history
    return assistant_reply, history + [(input_text, assistant_reply)]


# Create a Gradio Interface
def launch_dungeon_master_interface():
    # Create a Gradio Interface for user input (player) and bot output (DM)
    interface = gr.Interface(
        fn=respond_to_player,
        inputs=[
            gr.Textbox(label="Player's Input", placeholder="Describe your action or speak to the Dungeon Master..."),
            gr.State(value=[]),  # To store conversation history
        ],
        outputs=[
            gr.Textbox(label="Dungeon Master's Response"),
            gr.State(),  # Updated conversation history
        ],
        title="Dungeon Master AI",
        description="This is a Dungeon Master assistant for your Dungeons & Dragons adventures. Provide an input to interact with the DM!",
    )

    # Launch the interface
    interface.launch(debug=True)

# Run the interface
if __name__ == "__main__":
    launch_dungeon_master_interface()


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Unsloth 2024.12.4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://55fd8d0d14a249eee1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Keyboard interruption in main thread... closing server.



KeyboardInterrupt

