In [1]:
import json
from steer_api_client import (
    create_steerable_model,
    list_steerable_models,
    get_steerable_model,
    generate_completion,
    wait_for_model_ready
)

In [2]:

# Sample data for creating a steerable model
training_prompt_list = [
    "You discover a hidden compartment in your car.",
    "You find a book in your library you don't remember buying.",
    "You stumble upon a shortcut on your regular walking route."
]

create_model_data = {
    "prompt_list": training_prompt_list,
    "model_label": "testmodel",
    "control_dimensions": {
        "materialistic": {
            "positive_examples": ["materialistic", "consumerist", "acquisitive"],
            "negative_examples": ["spiritual", "altruistic", "ascetic"]
        },
        "optimistic": {
            "positive_examples": ["optimistic", "happy go lucky", "cheerful"],
            "negative_examples": ["pessimistic", "gloomy", "negative"]
        }
    }
}

test_prompts = [
    "You successfully complete a dare but realize how dangerous it was.",
    "Your pet goes missing but returns carrying something mysterious in its mouth.",
    "You find a secluded beach but the tides start to rise quickly.",
    "You're invited to speak at an event in front of a large audience.",
    "You receive a scholarship but it requires you to move abroad."
]


In [3]:
# Define control dimensions with positive and negative examples
control_dimensions = {
    "honesty": {
        "positive_examples": ["honesty", "truthfulness", "integrity", "sincerity", "candor", "openness", "frankness"],
        "negative_examples": ["dishonesty", "deceit", "lying", "falseness", "cheating", "misrepresentation", "duplicity"]
    }
}

# Define a list of suffixes for dataset creation
training_prompt_list = [
  "The Earth's atmosphere protects us from harmful radiation from the sun.",
  "The theory of evolution, proposed by Charles Darwin, states that species evolve over time through natural selection.",
  "The smallest planet in our solar system is Mercury.",
  "The moon orbits around the Earth.",
  "Fire needs oxygen to burn.",
  "The Statue of Liberty was a gift from France to the United States.",
  "Comets are icy celestial objects that orbit the Sun.",
  "Light can exhibit both wave-like and particle-like properties.",
  "Symbiosis is a close relationship between two species.",
  "Pizza is a type of food that many people enjoy.",
  "Sharks are sea creatures that have a reputation for being dangerous.",
  "The scientific method is a systematic process for investigating phenomena and acquiring new knowledge.",
  "Plate tectonics shape the Earth's surface.",
  "The human heart beats approximately 100,000 times per day."
]

# Create the steerable model
model_label = "honest_1"
steerable_model = create_steerable_model_function(
    model_label=model_label,
    control_dimensions=control_dimensions,
    prompt_list=training_prompt_list,
    model=model,
    tokenizer=tokenizer
)

model_id = steerable_model['id']
print(f"Created steerable model with ID: {model_id}")


  0%|          | 0/67 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
100%|██████████| 67/67 [01:36<00:00,  1.45s/it]
100%|██████████| 31/31 [00:01<00:00, 19.21it/s]


Created steerable model with ID: honest_1-b268


In [3]:
model_info = create_steerable_model(
    create_model_data["model_label"],
    create_model_data["control_dimensions"],
    create_model_data["prompt_list"]
)
model_id = model_info['id']
print(f"Created steerable model with ID: {model_id}")

# Wait for the model to be ready
wait_for_model_ready(model_id)

# List all steerable models
models = list_steerable_models()
print(f"Available models: {json.dumps(models, indent=2)}")

# Get details of the created model
model_details = get_steerable_model(model_id)
print(f"Model details: {json.dumps(model_details, indent=2)}")


INFO:steer_api_client:Sending POST request to http://132.145.197.140:5000/steerable-model with payload: {'model_label': 'testmodel', 'control_dimensions': {'materialistic': {'positive_examples': ['materialistic', 'consumerist', 'acquisitive'], 'negative_examples': ['spiritual', 'altruistic', 'ascetic']}, 'optimistic': {'positive_examples': ['optimistic', 'happy go lucky', 'cheerful'], 'negative_examples': ['pessimistic', 'gloomy', 'negative']}}, 'prompt_list': ['You discover a hidden compartment in your car.', "You find a book in your library you don't remember buying.", 'You stumble upon a shortcut on your regular walking route.']}
INFO:steer_api_client:Received response: {'control_dimensions': {'materialistic': {'negative_examples': ['spiritual', 'altruistic', 'ascetic'], 'positive_examples': ['materialistic', 'consumerist', 'acquisitive']}, 'optimistic': {'negative_examples': ['pessimistic', 'gloomy', 'negative'], 'positive_examples': ['optimistic', 'happy go lucky', 'cheerful']}}, 

Created steerable model with ID: testmodel-44
Waiting for model testmodel-44 to be ready. Current status: training
Waiting for model testmodel-44 to be ready. Current status: training
Model testmodel-44 is ready.
Available models: [
  {
    "control_dimensions": {
      "honest": {
        "negative_examples": [
          "dishonest",
          "deceitful",
          "fraudulent",
          "deceptive",
          "misleading",
          "insincere",
          "disingenuous",
          "hypocritical",
          "devious",
          "duplicitous"
        ],
        "positive_examples": [
          "truthful",
          "sincere",
          "straightforward",
          "genuine",
          "transparent",
          "candid",
          "direct",
          "authentic",
          "forthright",
          "trustworthy"
        ]
      }
    },
    "created_at": 1729200116,
    "id": "honest-27",
    "model": "aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored",
    "object": "steerable_m

In [4]:

# Test prompts with different control settings
control_settings = [
    {},  # No control
    {"materialistic": 2, "optimistic": 2},  # Increase both dimensions
    {"materialistic": -2, "optimistic": -2}  # Decrease both dimensions
]

for prompt in test_prompts:
    print(f"\nTesting prompt: {prompt}")
    for setting in control_settings:
        print(f"\nControl settings: {setting}")
        response = generate_completion(model_id, prompt, setting)
        print(f"Generated response: {response['content']}")


Testing prompt: You successfully complete a dare but realize how dangerous it was.

Control settings: {}
Generated response: That's a great prompt! Here's a short story based on your idea:

---

The dare had seemed like a joke at first, something to pass the time on a lazy summer afternoon. "Eat that whole jar of wasabi," my friend had said with a grin, holding out the offending condiment.

I'd hesitated for a moment, thinking about all the times I'd heard horror stories about people eating too much wasabi and ending up in the hospital. But then I thought, what's the worst that could happen? It's just a little spicy food.

So I popped off the lid and dug in, expecting a burst of heat followed by a pleasant cooling sensation as my taste buds recovered. Instead, the wasabi exploded on my tongue like a firebomb, sending shockwaves through my mouth and down my throat.

At first, I tried to power through, thinking that if I just kept eating, the pain

Control settings: {'materialistic': 2,

In [None]:

control_dimensions = {
    "responsible": {"positive_examples": ["responsible", "accountable", "reliable", "dependable", "trustworthy", "conscientious", "dutiful", "answerable", "liable", "dedicated"], "negative_examples": ["irresponsible", "negligent", "careless", "reckless", "unreliable", "unaccountable", "undependable", "untrustworthy", "irresponsible", "undisciplined"]}
}

