In [9]:
import asyncio
from pprint import pprint
from audio_utils import AudioRankerSequential, wave_read_to_wav_bytes
from judge import BASE_SYSTEM_INSTRUCTION, JudgeBasics, JudgeRanking, run_speech_llm

In [2]:
import weave

# SPEECH_AUDIO_DATASET_URI = "weave:///wandb-voice-ai/voice-judge/object/generated_speech_audio:v1"
SPEECH_AUDIO_DATASET_URI = "weave:///wandb-voice-ai/voice-judge/object/generated_speech_audio_test"

weave_client = weave.init("wandb-voice-ai/voice-judge")
print("Downloading speech samples from Weave...")
ds_ref = weave.ref(SPEECH_AUDIO_DATASET_URI).get()
speech_samples = list(ds_ref.rows)

samples_to_rank = []
for i in range(3):
    samples_to_rank.append(
        {
            "id": speech_samples[i]["voice_instructions_id"],
            "audio": speech_samples[i]["audio"],
            "instructions": speech_samples[i]["voice_instructions"],
        }
    )

[36m[1mweave[0m: Logged in as Weights & Biases user: morgan.
[36m[1mweave[0m: View Weave data at https://wandb.ai/wandb-voice-ai/voice-judge/weave


Downloading speech samples from Weave...


In [14]:
@weave.op
def rank_audio(sample_one, sample_two, sample_three):
    pass

_, call = rank_audio.call(samples_to_rank[0], samples_to_rank[1], samples_to_rank[2])
target_call_id = call.id

ranker = AudioRankerSequential(
    [
        {
            "id": speech_samples[0]["voice_instructions_id"],
            "audio": wave_read_to_wav_bytes(speech_samples[0]["audio"]),
            "original_input_order": 1,
        },
        {
            "id": speech_samples[1]["voice_instructions_id"],
            "audio": wave_read_to_wav_bytes(speech_samples[1]["audio"]),
            "original_input_order": 2,
        },
        {
            "id": speech_samples[2]["voice_instructions_id"],
            "audio": wave_read_to_wav_bytes(speech_samples[2]["audio"]),
            "original_input_order": 3,
        },
    ],
    weave_client=weave_client,
    target_call_id=target_call_id,
)

[36m[1mweave[0m: 📦 Published to https://wandb.ai/wandb-voice-ai/voice-judge/weave/objects/AudioSequentialRanking/versions/qN25MqRp9JQ9szXs5KkkzPJacZKdIxtYmquq8hY1Wd0


In [15]:
ranker.display_widget()

VBox(children=(VBox(children=(HTML(value='<h3>Listen to Audio Samples:</h3>'), VBox(children=(Label(value='Sam…

In [16]:
final_rankings = ranker.get_final_rankings()
final_rankings

[{'rank': 1,
  'id': 'weary_georgia_mutter_20250526_2033',
  'original_input_order': 2},
 {'rank': 2,
  'id': 'hushed_georgia_rush_20250526_2033',
  'original_input_order': 3},
 {'rank': 3,
  'id': 'surprised_french_veteran_20250526_2033',
  'original_input_order': 1}]

In [None]:
samples_with_rankings = []
for s in samples_to_rank:
    for r in final_rankings:
        if s.get("id") == r.get("id"):
            samples_with_rankings.append(
                {
                    "id": s.get("id"),
                    "audio": s.get("audio"),
                    "instructions": s.get("instructions"),
                    "rank": r.get("rank"),
                    "audio_bytes": wave_read_to_wav_bytes(s.get("audio")),
                }
            )

# sort the samples_with_rankings list by rank
samples_with_rankings.sort(key=lambda x: x.get("rank"))

task_prompt = "Given a ranking of 3 voices by a human judge from most liked to least liked, \
describe the voice characteristics that lead to the ranking."

@weave.op
async def run_speech_analyzer():
    # Example with file path
    builder_result = await run_speech_llm(
        system_instruction=BASE_SYSTEM_INSTRUCTION,
        prompt=task_prompt,
        model_name="gemini-2.5-pro-preview-05-06", # "gemini-2.0-flash",
        temperature=0.1,
        response_model=JudgeBasics,
        audio_data=[
            samples_with_rankings[0]["audio_bytes"],
            samples_with_rankings[1]["audio_bytes"],
            samples_with_rankings[2]["audio_bytes"],
        ],
    )
    return builder_result


# Run example
# result = asyncio.run(example())
builder_result = await run_speech_analyzer()
pprint(builder_result.thinking)
pprint(builder_result.how_to_judge_a_voice)

In [None]:
BASIC_JUDGE_SYSTEM_INSTRUCTION = "Assess the generated voices provided"
judge_prompt = f"""Based on the following criteria, your task is to rank the voices proivded:\

{str(builder_result.how_to_judge_a_voice)}

"""

@weave.op
async def run_speech_judge():
    # Example with file path
    result = await run_speech_llm(
        system_instruction=BASIC_JUDGE_SYSTEM_INSTRUCTION,
        prompt=judge_prompt,
        model_name="gemini-2.5-pro-preview-05-06", # "gemini-2.0-flash",
        temperature=0.1,
        response_model=JudgeRanking,
        audio_data=[
            samples_with_rankings[0]["audio_bytes"],
            samples_with_rankings[1]["audio_bytes"],
            samples_with_rankings[2]["audio_bytes"],
        ],
        initial_audio_parts_prompt="\n\nvoice_1:\n",
        audio_parts_prompt_divider="\n\nvoice_{input_order}:\n",
    )
    return result


# Run example
# result = asyncio.run(example())
result = await run_speech_judge()
pprint(result.thinking)
pprint(result.ranking)
print()

print("Human Ranking:")
for i in range(3):
    pprint(f"Voice {i+1}: {final_rankings[i]}")

('All three voices are relatively clear and intelligible. Voice 3 seems to '
 'have a slightly more engaging tone and fewer noticeable artifacts compared '
 'to the others. Voice 1 and 2 are similar in quality, but voice 1 has a '
 'slight edge in naturalness.')
['voice_3', 'voice_1', 'voice_2']

Human Ranking:
("Voice 1: {'rank': 1, 'id': 'weary_georgia_mutter_20250526_2033', "
 "'original_input_order': 2}")
("Voice 2: {'rank': 2, 'id': 'hushed_georgia_rush_20250526_2033', "
 "'original_input_order': 3}")
("Voice 3: {'rank': 3, 'id': 'surprised_french_veteran_20250526_2033', "
 "'original_input_order': 1}")
