# Realtime pipeline demo
This notebook shows how to run the `realtime` sample pipeline with a local WAV file.

In [1]:
# Configure sys.path so we can import gotaglio from this repo
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../..')))

In [None]:
# Prepare a minimal cases list in-memory
cases = [
    { 'uuid': '00000000-0000-0000-0000-000000000001', 'audio': 'data/sample_audio.m4a' }
]
cases

[{'uuid': '00000000-0000-0000-0000-000000000001', 'audio': '{audio_file}'}]

In [None]:
# Run the realtime pipeline using the Gotaglio object (like the menu sample)
from gotaglio.gotag import Gotaglio
from realtime import realtime_pipeline_spec

# Instantiate Gotaglio with the realtime pipeline spec
gt = Gotaglio([realtime_pipeline_spec])

# Provide flat (dotted) config keys just like CLI patches
result = gt.run(
    "realtime",
    cases,
    {
        "infer.model.name": "azure-realtime",
        # Point to your audio file; the first case can also use a placeholder
        "realtime.audio_file": "foobar.wav",
        # Optional knobs passed to the model
        "infer.model.settings.sample_rate_hz": 16000,
        "infer.model.settings.timeout_s": 30,
    },
    save=True,  # Save a run log JSON into the logs/ directory
)

# Show number of cases processed
len(result["results"])

[3m              Summary for               [0m
[3m  eb8fa702-13ae-40fd-aeb7-775d6a6869b5  [0m
┏━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┓
┃[1m [0m[1m id[0m[1m [0m┃[1m [0m[1mstatus  [0m[1m [0m┃[1m [0m[1mevents[0m[1m [0m┃[1m [0m[1mtranscript[0m[1m [0m┃
┡━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━┩
│[36m [0m[36m000[0m[36m [0m│[35m [0m[1;32mCOMPLETE[0m[35m [0m│ 1      │            │
└─────┴──────────┴────────┴────────────┘

Total: [1;36m1[0m
Complete: [1;36m1[0m/[1;36m1[0m [1m([0m[1;36m100.00[0m%[1m)[0m
Error: [1;36m0[0m/[1;36m1[0m [1m([0m[1;36m0.00[0m%[1m)[0m
Passed: [1;36m0[0m/[1;36m1[0m [1m([0m[1;36m0.00[0m%[1m)[0m
Failed: [1;36m1[0m/[1;36m1[0m [1m([0m[1;36m100.00[0m%[1m)[0m


Results written to logs/eb8fa702-13ae-40fd-aeb7-775d6a6869b5.json


1

In [4]:
# Inspect transcript and events for the first case
first = result["results"][0]
transcript = first["stages"].get("infer", "")
events = first.get("realtime_events") or first["stages"].get("realtime_events")
len(transcript), (0 if events is None else len(events))

(0, 0)

In [None]:
# Show the latest saved run log file in logs/
from gotaglio.constants import app_configuration
from gotaglio.shared import get_files_sorted_by_creation, read_json_file
import os

logs_dir = app_configuration["log_folder"]
latest = get_files_sorted_by_creation(logs_dir)[-1][0] if os.path.isdir(logs_dir) and get_files_sorted_by_creation(logs_dir) else None
log_path = os.path.join(logs_dir, f"{latest}.json") if latest else None
log_path

In [None]:
# Load and peek at the saved run log (metadata + result count)
if log_path and os.path.isfile(log_path):
    runlog = read_json_file(log_path)
    {
        "uuid": runlog.get("uuid"),
        "pipeline": runlog.get("metadata", {}).get("pipeline", {}).get("name"),
        "cases": len(runlog.get("results", [])),
        "file": log_path,
    }
else:
    "No log found yet. Re-run the previous cell with save=True."