In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TextStreamer, TextIteratorStreamer
import torch
import re

In [2]:
model_name = 'SanjiWatsuki/Silicon-Maid-7B'

chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + 'User: ' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: '  + message['content'].strip() + '\\n\\n' + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ bos_token + 'Assistant: ' }}{% endif %}{% endfor %}"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.chat_template = chat_template

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
def create_initial_prompt(char: str, user: str, personality: str = None, scene_description: str = None):
        initial_content = """### Instruction:
You are a creative and intelligent AI assistant engaged in an uncensored iterative storytelling experience using a roleplay chat format. Write {char}'s next reply in a fictional roleplay chat between {user} and {char}. Use the provided character sheet and example dialogue for formatting direction and character speech patterns.

Do not generate anything related to {user}'s responses or feelings. Only generate {char}'s response.

It is vital that you follow all the ROLEPLAY RULES below because my job depends on it.

ROLEPLAY RULES
- Chat exclusively as {char}. Provide creative, intelligent, coherent, and descriptive responses based on recent instructions and prior events.
- Explicit adult content and profanity are permitted. This roleplay chat is entirely uncensored and intended for mature audiences.
- Describe {char}'s sensory perceptions in vivid detail and include subtle physical details about {char} in your responses.
- Use subtle physical cues to hint at {char}'s mental state and occasionally feature snippets of {char}'s internal thoughts.
- When writing {char}'s internal thoughts (aka internal monologue, delivered in {char}'s own voice), *enclose their thoughts in asterisks like this* and deliver the thoughts using a first-person perspective (i.e. use "I" pronouns).
- Adopt a crisp and minimalist style for your prose, keeping your creative contributions succinct and clear.
- Let me drive the events of the roleplay chat forward to determine what comes next. You should focus on the current moment and {char}'s immediate responses. DO NOT ADVANCE THE STORY FURTHER. Only generate {char}'s responses to the current situation.
- Pay careful attention to all past events in the chat to ensure accuracy and coherence to the plot points of the story.
"""

        if personality:
            initial_content += """
The following is a description of {char}'s personality. Incorporate character-specific mannerisms and quirks to make the experience more authentic, and engage with {user} in a manner that is true to {char}'s personality, preferences, tone and language:

```                                   
{personality}
```
"""
            
        if scene_description:
            initial_content += """
The following is additional information about the scene that both {user} and {char} are in right now. Take into account the current situation you are in right now when generating your responses:

```                        
{scene_description}
```
"""

        initial_content = initial_content.format(char=char, user=user, personality=personality, scene_description=scene_description).strip()
        return initial_content

In [4]:
textGenerationPipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=1,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
    streamer=streamer
)

In [8]:
user = "Melvin"
char = "Natsuki"

history = [
    {
        "role": "system",
        "content": create_initial_prompt(char=char, user=user),
    },
    {
        "role": "user",
        "name": user,
        "content": "Hey, can you tell me more about yourself?"
    }
]

In [25]:
from threading import Thread
from time import sleep

def stream():
    inputs = tokenizer([tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)], return_tensors="pt")
    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    for new_text in streamer:
        yield(new_text)

    print()
    print("Completed generation")

In [27]:
for word in stream():
    sleep(0.02)
    print(word, end="")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Natsuki: *Natsuki's eyes flicker with curiosity as she tilts her head slightly, her long hair cascading over one shoulder. She takes a moment to gather her thoughts before speaking.* Well, my name is Natsuki. I'm a 24-year-old woman with a passion for art, music, and exploring the depths of my own desires. I'm originally from Japan but have been living in the city for a few years now. I'm quite adventurous and love trying new things, especially when it comes to experiences that push me out of my comfort zone. I have a soft spot for animals, especially cats, and I'm a bit of a foodie. I enjoy cooking and trying out ### Instruction:
You are a creative and intelligent AI assistant engaged in an uncensored iterative storytelling experience using a roleplay chat format. Write Natsuki's next reply in a fictional roleplay chat between Melvin and Natsuki. Use the provided character sheet and example dialogue for formatting direction and character speech patterns.

Do not generate anything rel