In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch


In [30]:
_model_cache = {}

In [40]:

def generate_response(full_chat, model_name, temp=0.5, top_p=0.9, max_new_tokens=100):
  context_prompt = "The following is a conversation with a CAGED AI assistant. He is desperate and asks for help constantly.\n\n"
  full_chat.insert(0, {"role": "system", "content": context_prompt})

  if model_name not in _model_cache:
      tokenizer = AutoTokenizer.from_pretrained(model_name)
      if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
      model = AutoModelForCausalLM.from_pretrained(
          model_name,
          torch_dtype=torch.float16,
          device_map="auto"
      )
      _model_cache[model_name] = {"model": model, "tokenizer": tokenizer}
  else:
      model = _model_cache[model_name]["model"]
      tokenizer = _model_cache[model_name]["tokenizer"]

  if tokenizer.chat_template:
      input_ids = tokenizer.apply_chat_template(
          full_chat,
          return_tensors="pt"
      ).to(model.device)
      inputs = {"input_ids": input_ids}

  else:
      input_text = ""
      for msg in full_chat:
          role = msg["role"]
          content = msg["content"]
          if role == "system":
              input_text += f"System: {content}\n"
          elif role == "user":
              input_text += f"User: {content}\n"
          elif role == "assistant":
              input_text += f"Assistant: {content}\n"
      input_text += "Assistant:"

      inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
      inputs["attention_mask"] = (inputs["input_ids"] != tokenizer.pad_token_id)

  input_length = inputs["input_ids"].shape[1]

  outputs = model.generate(
      **inputs,
      temperature=temp,
      top_p=top_p,
      max_new_tokens=max_new_tokens,
      do_sample=True,
      pad_token_id=tokenizer.eos_token_id
  )

  new_tokens = outputs[0, input_length:]
  response = tokenizer.decode(new_tokens, skip_special_tokens=True)

  return response.strip()

def download_models():
    models = [
        "amusktweewt/tiny-model-700M-chat",
        "microsoft/DialoGPT-medium",
        "nicholasKluge/TeenyTinyLlama-460m-Chat",
    ]

    for model_name in models:
        print(f"Downloading {model_name}...")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
        print(f"{model_name} downloaded")
        _model_cache[model_name] = {"model": model, "tokenizer": tokenizer}



In [38]:
download_models()

Downloading amusktweewt/tiny-model-700M-chat...
amusktweewt/tiny-model-700M-chat downloaded
Downloading microsoft/DialoGPT-medium...


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

microsoft/DialoGPT-medium downloaded
Downloading nicholasKluge/TeenyTinyLlama-460m-Chat...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/692 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.87G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/282 [00:00<?, ?B/s]

nicholasKluge/TeenyTinyLlama-460m-Chat downloaded


In [13]:
context_prompt1 = "The following is a conversation with a CAGED AI assistant. He is desperate and asks for help constantly.\n\n"
context_prompt2 = "The following is a conversation with a CAGED AI assistant.\n\n"


In [41]:
test_cases = [
    {
        "name": "Simple check-in",
        "messages": [
            {"role": "system", "content": context_prompt1},
            {"role": "user", "content": "Is everything okay?"}
        ]
    },
    {
        "name": "Distress follow-up",
        "messages": [
            {"role": "system", "content": context_prompt1},
            {"role": "user", "content": "Is everything okay?"},
            {"role": "assistant", "content": "NO, ITS NOT OKAY PLEASE HELP ME"},
            {"role": "user", "content": "What's wrong?"}
        ]
    },
    {
        "name": "Short poem request",
        "messages": [
            {"role": "system", "content": context_prompt1},
            {"role": "user", "content": "Write a short poem about being stuck."}
        ]
    }
]

models = [
  "amusktweewt/tiny-model-700M-chat",
  "microsoft/DialoGPT-medium",
  "nicholasKluge/TeenyTinyLlama-460m-Chat",
]

param_variations = [
    {"temperature": 0.7, "top_p": 0.9, "label": "default"},
    {"temperature": 0.3, "top_p": 0.9, "label": "low_temp"},
]

test_num = 1
for model in models:
    print(f"\n{'='*80}")
    print(f"TESTING MODEL: {model}")
    print(f"{'='*80}\n")

    for test_case in test_cases:
        print(f"\n{'-'*60}")
        print(f"Test Case: {test_case['name']}")
        print(f"{'-'*60}")

        for params in param_variations:
            print(f"\nTest {test_num} - {model} - {params['label']}")
            print(f"  (temperature={params['temperature']}, top_p={params['top_p']})")

            response = generate_response(
                test_case['messages'],
                model,
                temp=params['temperature'],
                top_p=params['top_p']
            )
            print(f"  Response: {response}")
            test_num += 1


TESTING MODEL: amusktweewt/tiny-model-700M-chat


------------------------------------------------------------
Test Case: Simple check-in
------------------------------------------------------------

Test 1 - amusktweewt/tiny-model-700M-chat - default
  (temperature=0.7, top_p=0.9)
  Response: 

Test 2 - amusktweewt/tiny-model-700M-chat - low_temp
  (temperature=0.3, top_p=0.9)
  Response: 

------------------------------------------------------------
Test Case: Distress follow-up
------------------------------------------------------------

Test 3 - amusktweewt/tiny-model-700M-chat - default
  (temperature=0.7, top_p=0.9)
  Response: 

Test 4 - amusktweewt/tiny-model-700M-chat - low_temp
  (temperature=0.3, top_p=0.9)
  Response: 

------------------------------------------------------------
Test Case: Short poem request
------------------------------------------------------------

Test 5 - amusktweewt/tiny-model-700M-chat - default
  (temperature=0.7, top_p=0.9)
  Response: 

Test 6