In [1]:
#!pip install transformers torch
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import random
import re
import time

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
login(token="insert token here")

## Responsive Chat Model


---


#### The following script uses the meta llama-3.2-3B-Instruct text generation model to create a message based on a topic and optional previous message. The previous message allows the model to create a more personalized message(i.e. responding to a message) however it can also create a new message just based off the topic.


---


#### Below are key details about the functions currently used, including their compute time and recommendations for applications.


In [4]:
model_name = "meta-llama/llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

### Pipeline Details
#### **Functions**


---


**1. create_prompt() --> Prompt for the text generation model**

Input Params
* string: game_title
* list: username_list
* string(optional): previous_message

Output
* string: base_prompt

---
**2. is_valid_message() --> Checks if output of message is correct**

Input Params
* string: message

Output
* boolean: True/False
---
**3. generate_message() --> Runs text generation model and returns the message**

Input Params
* string: game_title
* list: username_list
* string(optional): previous_message

Output
* string: message


---
Time Cost:
* CPU: ~200s
* GPU - T4: ~1s
---
**4. Recommendations:**

Feed model data
* For each game page created(where users play each individual game), track usernames(if usernames aren't tracked for each individual game, taking a name from active users list is fine as well) and previous messages(this may or may not need to be created)

Use model on chat feature
* create api using model and call api after every message(only takes 1s)

Update and Repeat
* update the previous_message param fed into function #1 and call api again

In [8]:
def create_prompt(game_title, username_list, previous_message):
    # Prepare the previous message section separately
    if previous_message:
        previous_section = f"The previous message from another player was:\n{previous_message}\nRespond naturally to this message, either by replying directly, adding context, or continuing the discussion."
    else:
        previous_section = "Start a new message about the game."

    # inserts previous message into main prompt
    base_prompt = f"""
You are generating a single chat message in the game **{game_title}**.
Choose the username **ONLY** from this list: {username_list}.

Message Guidelines (choose one approach at random):
- Share an interesting fact or statistic about the game.
- Ask a question or express skepticism about the game.
- Offer diplomatic or balanced thoughts on the game.
- Be provocative, sarcastic, or playful about the game.
- Share a personal experience or opinion related to the game.

{previous_section}

**IMPORTANT INSTRUCTIONS:**
- Output MUST be in the exact format:
  Username: Message
- Do NOT include quotes, brackets, bullet points, or any extra formatting.
- Do NOT include explanations, prefaces, or any additional text.
- Use natural language that sounds like a real person chatting about **{game_title}**.

**EXAMPLE OUTPUT (format only, content will change):**
Player1: Honestly, I still can't get past level 3 — this game is brutal.

Now, generate exactly ONE message.
Output ONLY the message in the format: Username: Message
"""
    return base_prompt

def is_valid_message(message):
    """Check if the message follows the correct format"""
    # Basic format check
    if not ': ' in message:
        return False

    # Split into username and message
    parts = message.split(': ', 1)
    if len(parts) != 2:
        return False

    username, content = parts

    # Username checks
    if len(username) < 3 or len(username) > 30:  # reasonable username length
        return False
    if 'user' in username.lower():  # avoid generic usernames
        return False

    # Message content checks
    if len(content) < 10 or len(content) > 200:  # reasonable message length
        return False

    return True

def generate_message(game_title, username_list, previous_message):
    start_time = time.time()
    try:
        inputs = tokenizer(create_prompt(game_title, username_list, previous_message), return_tensors="pt", truncation=True).to(model.device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            temperature=0.95,
            top_p=0.95,
            no_repeat_ngram_size=3,
            pad_token_id=tokenizer.eos_token_id
        )

        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        messages = [line.strip() for line in generated_text.split('\n') if ': ' in line]

        if messages:
            message = messages[-1]
            if is_valid_message(message):
                elapsed = time.time() - start_time
                print(f"time taken: {elapsed}")
                return message

    except Exception as e:
        print("Failed:", e)
    elapsed = time.time() - start_time
    print(f"time taken: {elapsed}")
    return None

In [9]:
result = generate_message("Mario Run", ["player1", "player2", "player3"], "hey guys how do you like the game")

time taken: 200.45291423797607


In [10]:
result

"player2: I love how Mario always falls in the same spot on the screen. It's hilarious! But seriously, has anyone tried using power-ups to get more coins? I feel like there's a system for that somewhere."