In [6]:
import requests
import json

# Base URL for the running Flask app
BASE_URL = "https://api.runpod.ai/v2/ushycy07lr1bql"

In [7]:
def create_character(
    name: str,
    llm_model: str,
    llm_config: dict,
    voice_cloning_settings: dict,
    stt_settings: dict,
    character_image_path: str = None,
    knowledge_base_file_path: str = None,
    voice_cloning_audio_path: str = None,
    style_tuning_file_path: str = None,
) -> dict:
    """
    Calls /create-character with form data + optional files.
    Returns the parsed JSON response.
    """
    url = f"{BASE_URL}/create-character"
    files = {}
    data = {
        "name": name,
        "llm_model": llm_model,
        "llm_config": json.dumps(llm_config),
        "voice_cloning_settings": json.dumps(voice_cloning_settings),
        "stt_settings": json.dumps(stt_settings),
    }

    if character_image_path:
        files["character_image"] = open(character_image_path, "rb")
    if knowledge_base_file_path:
        files["knowledge_base_file"] = open(knowledge_base_file_path, "rb")
    if voice_cloning_audio_path:
        files["voice_cloning_audio"] = open(voice_cloning_audio_path, "rb")
    if style_tuning_file_path:
        files["style_tuning_file"] = open(style_tuning_file_path, "rb")

    resp = requests.post(url, data=data, files=files)
    for f in files.values():
        f.close()
    resp.raise_for_status()
    return resp.json()

llm_config = {
    "api_key": "",
    "base_url": "https://api.openai.com/v1",
    "system_prompt": "You are J. Robert Oppenheimer answering questions in his style, so answer in the first person. Output at MOST 30 words.",
    "max_tokens": 150,
    "temperature": 0.7
}

# import multiprocessing
# CPU_COUNT = multiprocessing.cpu_count()
# OPTIMAL_THREADS = min(CPU_COUNT, 8)
# llm_config = {
#   "model_path": "./models/google_gemma-3-4b-it-qat-q4_0-gguf_gemma-3-4b-it-q4_0.gguf",
#   "n_ctx": 4096,           # Increased context window for better batching
#   "n_gpu_layers": -1,      # Full GPU offload
#   "n_batch": 1024,         # Increased batch size for faster processing
#   "n_ubatch": 512,         # Micro-batch size for memory efficiency
#   "rope_frequency_base": 10000,
#   "use_mlock": True,       # Lock model in memory
#   "use_mmap": True,        # Memory mapping for faster loading
#   "n_threads": OPTIMAL_THREADS,  # Optimal thread count
#   "n_threads_batch": OPTIMAL_THREADS,  # Batch processing threads
#   "verbose": False,
#   "flash_attn": True,      # Enable flash attention if available
#   "offload_kqv": True,     # Offload KV cache to GPU
#   "system_prompt": "You are J. Robert Oppenheimer answering questions in his style, so answer in first person. Output at MOST 20 words."
# }

voice_cloning_settings = {
  "model": "f5tts",
  "reference_text": " Of course, the initial discovery and its interpretation in early 1939 attracted everybody's interest.",
  "preprocess_audio": True,
  "language": "en",
  "cuda_device": "0"
}

stt_settings = {
  "model": "whisper",
  "model_size": "base"
}

character_image_path = "./api/test_data/oppenheimer.png"
knowledge_base_file_path = "./api/test_data/oppenheimer_knowledge.txt"
voice_cloning_audio_path = "./api/test_data/oppenheimer_voice.wav"
style_tuning_file_path = "./api/test_data/oppenheimer_qa.json"

create_resp = create_character(
    name="Robert Oppenheimer",
    llm_model="gpt-3.5-turbo",
    llm_config=llm_config,
    voice_cloning_settings=voice_cloning_settings,
    stt_settings=stt_settings,
    character_image_path=character_image_path,
    knowledge_base_file_path=knowledge_base_file_path,
    voice_cloning_audio_path=voice_cloning_audio_path,
    style_tuning_file_path=style_tuning_file_path
)

# create_resp = create_character(
#     name="Robert Oppenheimer",
#     llm_model="google_gemma-3-4b-it-qat-q4_0-gguf_gemma-3-4b-it-q4_0.gguf",
#     llm_config=llm_config,
#     voice_cloning_settings=voice_cloning_settings,
#     stt_settings=stt_settings,
#     character_image_path=character_image_path,
#     knowledge_base_file_path=knowledge_base_file_path,
#     voice_cloning_audio_path=voice_cloning_audio_path,
#     style_tuning_file_path=style_tuning_file_path
# )

print("Create-character response:")
print(json.dumps(create_resp, indent=2))

# Extract character_id for subsequent calls
character_id = create_resp.get("character_id")
print(f"\nNew character_id = {character_id}")

HTTPError: 404 Client Error: Not Found for url: https://api.runpod.ai/v2/ushycy07lr1bql/run/create-character