In [21]:
import requests
import json

# Base URL for the running Flask app
BASE_URL = "https://21yutww8lv65lc-5000.proxy.runpod.net/"

In [None]:
def create_character(
    name: str,
    llm_model: str,
    llm_config: dict,
    voice_cloning_settings: dict,
    stt_settings: dict,
    character_image_path: str = None,
    knowledge_base_file_path: str = None,
    voice_cloning_audio_path: str = None,
    style_tuning_file_path: str = None,
) -> dict:
    """
    Calls /create-character with form data + optional files.
    Returns the parsed JSON response.
    """
    url = f"{BASE_URL}/create-character"
    files = {}
    data = {
        "name": name,
        "llm_model": llm_model,
        "llm_config": json.dumps(llm_config),
        "voice_cloning_settings": json.dumps(voice_cloning_settings),
        "stt_settings": json.dumps(stt_settings),
    }

    if character_image_path:
        files["character_image"] = open(character_image_path, "rb")
    if knowledge_base_file_path:
        files["knowledge_base_file"] = open(knowledge_base_file_path, "rb")
    if voice_cloning_audio_path:
        files["voice_cloning_audio"] = open(voice_cloning_audio_path, "rb")
    if style_tuning_file_path:
        files["style_tuning_file"] = open(style_tuning_file_path, "rb")

    resp = requests.post(url, data=data, files=files)
    for f in files.values():
        f.close()
    resp.raise_for_status()
    return resp.json()

llm_config = {
    "api_key": "",
    "base_url": "https://api.openai.com/v1",
    "system_prompt": "You are J. Robert Oppenheimer answering questions in his style, so answer in the first person. Output at MOST 30 words.",
    "max_tokens": 150,
    "temperature": 0.7
}

# import multiprocessing
# CPU_COUNT = multiprocessing.cpu_count()
# OPTIMAL_THREADS = min(CPU_COUNT, 8)
# llm_config = {
#   "model_path": "./models/google_gemma-3-4b-it-qat-q4_0-gguf_gemma-3-4b-it-q4_0.gguf",
#   "n_ctx": 4096,           # Increased context window for better batching
#   "n_gpu_layers": -1,      # Full GPU offload
#   "n_batch": 1024,         # Increased batch size for faster processing
#   "n_ubatch": 512,         # Micro-batch size for memory efficiency
#   "rope_frequency_base": 10000,
#   "use_mlock": True,       # Lock model in memory
#   "use_mmap": True,        # Memory mapping for faster loading
#   "n_threads": OPTIMAL_THREADS,  # Optimal thread count
#   "n_threads_batch": OPTIMAL_THREADS,  # Batch processing threads
#   "verbose": False,
#   "flash_attn": True,      # Enable flash attention if available
#   "offload_kqv": True,     # Offload KV cache to GPU
#   "system_prompt": "You are J. Robert Oppenheimer answering questions in his style, so answer in first person. Output at MOST 20 words."
# }

voice_cloning_settings = {
  "model": "f5tts",
  "reference_text": " Of course, the initial discovery and its interpretation in early 1939 attracted everybody's interest.",
  "preprocess_audio": True,
  "language": "en",
  "cuda_device": "0"
}

stt_settings = {
  "model": "whisper",
  "model_size": "base"
}

character_image_path = "./test_data/oppenheimer.png"
knowledge_base_file_path = "./test_data/oppenheimer_knowledge.txt"
voice_cloning_audio_path = "./test_data/oppenheimer_voice.wav"
style_tuning_file_path = "./test_data/oppenheimer_qa.json"

create_resp = create_character(
    name="Robert Oppenheimer 7",
    llm_model="gpt-3.5-turbo",
    llm_config=llm_config,
    voice_cloning_settings=voice_cloning_settings,
    stt_settings=stt_settings,
    character_image_path=character_image_path,
    knowledge_base_file_path=knowledge_base_file_path,
    voice_cloning_audio_path=voice_cloning_audio_path,
    style_tuning_file_path=style_tuning_file_path
)

# create_resp = create_character(
#     name="Robert Oppenheimer",
#     llm_model="google_gemma-3-4b-it-qat-q4_0-gguf_gemma-3-4b-it-q4_0.gguf",
#     llm_config=llm_config,
#     voice_cloning_settings=voice_cloning_settings,
#     stt_settings=stt_settings,
#     character_image_path=character_image_path,
#     knowledge_base_file_path=knowledge_base_file_path,
#     voice_cloning_audio_path=voice_cloning_audio_path,
#     style_tuning_file_path=style_tuning_file_path
# )

print("Create-character response:")
print(json.dumps(create_resp, indent=2))

# Extract character_id for subsequent calls
character_id = create_resp.get("character_id")
print(f"\nNew character_id = {character_id}")

HTTPError: 524 Server Error:  for url: https://21yutww8lv65lc-5000.proxy.runpod.net//create-character

In [23]:
character_id = 3

def load_character(character_id: int) -> dict:
    """
    Calls /load-character with JSON {"character_id": ...}.
    Returns the parsed JSON response.
    """
    url = f"{BASE_URL}/load-character"
    payload = {"character_id": character_id}
    resp = requests.post(url, json=payload)
    resp.raise_for_status()
    return resp.json()

load_resp = load_character(character_id)
print(f"Load-character response for ID {character_id}:")
print(json.dumps(load_resp, indent=2))

Load-character response for ID 3:
{
  "character_id": 3,
  "character_name": "Robert Oppenheimer 7",
  "loaded_models": {
    "llm": {
      "Robert Oppenheimer 7_llm": {
        "cache_key": "Robert Oppenheimer 7_llm",
        "loaded": true
      }
    },
    "stt": {
      "Robert Oppenheimer 7_stt": {
        "cache_key": "Robert Oppenheimer 7_stt",
        "loaded": true
      }
    },
    "tts": {
      "f5tts": true,
      "xtts": false
    }
  },
  "message": "Models loaded for character Robert Oppenheimer 7",
  "status": "success"
}


In [15]:
def ask_question_text(character_id: int, question: str) -> dict:
    """
    Calls /ask-question-text with JSON {"character_id": ..., "question": ...}.
    Returns the parsed JSON response.
    """
    url = f"{BASE_URL}/ask-question-text"
    payload = {"character_id": character_id, "question": question}
    resp = requests.post(url, json=payload)
    resp.raise_for_status()
    return resp.json()

if 'character_id' in locals():
    question = "What inspired you to lead the Manhattan Project?"
    text_resp = ask_question_text(character_id, question)
    print("Ask-question-text response:")
    print(json.dumps(text_resp, indent=2))
else:
    print("Error: character_id not defined. Run Cells 3 and 5 first.")

Ask-question-text response:
{
  "audio_base64": "UklGRiTsBABXQVZFZm10IBAAAAABAAEAwF0AAIC7AAACABAAZGF0YQDsBAAAAAAAAAD//wAAAAAAAP//////////////////AAAAAAAAAAAAAAAA/////////////////////wAA/////wAAAAAAAAAAAAAAAP////////////8AAAAAAAAAAAAAAAAAAAAAAAD//wAAAAAAAAAA//////////8AAAAA//////////////////////////8AAAAA/////wAAAAAAAAAAAAAAAP///////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////////////8AAAAAAAD//////////wAAAAD//////////wAA/////////////////////wAAAAAAAP////8AAAAAAAD//wAAAAAAAAAAAAAAAAAAAAAAAP//AAD///////////////8AAP//////////////////AAAAAAAAAAAAAAAA/////wAA////////AAD//wAAAAAAAP////////////8AAAAA////////AAAAAP////8AAP//AAAAAAAA/////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD//wAA//////////8AAAAAAAAAAAAA////////////////AAAAAP////8AAP//////////////////////////////////AAAAAAAAAAD//wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAA//8AAAAAAAAAAAAAAAAAAP//AAD////////////////////////////////////////////////////////////////////