<a href="https://colab.research.google.com/github/mkbnh/anchor-zero-copy-example/blob/main/demo/vibevoice_realtime_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VibeVoice-Realtime Colab — T4 Quickstart



In [None]:
# Install the requests library if you haven't already
!pip install requests --quiet

In [None]:
import requests
import json

server_url = "http://localhost:8000"

# Example: Sending a GET request to a health check endpoint (if available)
# Most servers have a /health or /status endpoint
try:
    response = requests.get(f"{server_url}/health")
    print("Health Check Response:", response.status_code, response.text)
except requests.exceptions.ConnectionError:
    print(f"Could not connect to the server at {server_url}. Make sure the VibeVoice server cell is still running.")

# Example: Sending a POST request to an inference endpoint
# You would replace 'your_inference_endpoint' with the actual endpoint
# and 'your_audio_data_base64' with base64 encoded audio data

# This is dummy data for demonstration.
# You'd typically load and base64 encode an audio file here.
dummy_audio_data = "SGVsbG8sIHRoaXMgandzdCBhdWRpbyBkYXRhLiI=" # Base64 encoded 'Hello, this is just audio data.'
data_to_send = {
    "audio_base64": dummy_audio_data,
    "param1": "value1",
    "param2": "value2"
}
headers = {"Content-Type": "application/json"}

try:
    # Replace '/your_inference_endpoint' with the actual endpoint for VibeVoice inference
    # For the VibeVoice demo, this might be something like '/synthesize' or '/convert'
    # You'll need to consult the VibeVoice API or source code for exact endpoints
    inference_response = requests.post(
        f"{server_url}/generate", # Example endpoint, check VibeVoice demo code
        data=json.dumps(data_to_send),
        headers=headers
    )
    print("\nInference Request Response:", inference_response.status_code, inference_response.text)
except requests.exceptions.ConnectionError:
    print(f"Could not connect to the server at {server_url}. Make sure the VibeVoice server cell is still running.")
except Exception as e:
    print(f"An error occurred: {e}")

Please note that the specific endpoints (e.g., `/health`, `/generate`) and the expected format of the `data_to_send` dictionary will depend on the VibeVoice server's API. You might need to inspect the `vibevoice_realtime_demo.py` script or its documentation for precise details on how to send audio for processing.

### Run VibeVoice Demo Locally (Offline Mode)

In [1]:
import subprocess, threading

srv_offline = subprocess.Popen(
    "python /content/VibeVoice/demo/vibevoice_realtime_demo.py --model_path /content/models/VibeVoice-Realtime-0.5B --port 8000",
    shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,
)

def read_srv_offline():
    for ln in srv_offline.stdout:
        print(ln.strip())
        if "Uvicorn running on" in ln:
            print("✅ VibeVoice server running locally on port 8000")

threading.Thread(target=read_srv_offline, daemon=True).start()

# Keep the script alive to allow the server to run
# In a real scenario, you might want to integrate this with other local processes
# For now, we'll just indicate it's running.
print("VibeVoice server started. It is running locally and not exposed to the internet.")
print("You can manually stop this cell when you are done.")

VibeVoice server started. It is running locally and not exposed to the internet.
You can manually stop this cell when you are done.


## Step 1: Setup Environment

In [None]:
# Check for T4 GPU
import torch
if torch.cuda.is_available() and "T4" in torch.cuda.get_device_name(0):
    print("✅ T4 GPU detected")
else:
    print("""
    ⚠️ WARNING: T4 GPU not detected

    The recommended runtime for this Colab notebook is "T4 GPU".

    To change the runtime type:

        1. Click on "Runtime" in the top navigation menu
        2. Click on "Change runtime type"
        3. Select "T4 GPU"
        4. Click "OK" if a "Disconnect and delete runtime" window appears
        5. Click on "Save"

    """)

# Clone the VibeVoice repository
![ -d /content/VibeVoice ] || git clone --quiet --branch main --depth 1 https://github.com/microsoft/VibeVoice.git /content/VibeVoice
print("✅ Cloned VibeVoice repository")

# Install project dependencies
!uv pip --quiet install --system -e /content/VibeVoice
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared && chmod +x cloudflared
print("✅ Installed dependencies")

# Download model
from huggingface_hub import snapshot_download
snapshot_download("microsoft/VibeVoice-Realtime-0.5B", local_dir="/content/models/VibeVoice-Realtime-0.5B")
print("✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B")


[Optional] If the download exceeds 1 minute, it is probably stuck. You can: (1) interrupt the execution, (2) log in to Hugging Face, and (3) try download again.

In [None]:
from huggingface_hub import login
login()

In [None]:
snapshot_download("microsoft/VibeVoice-Realtime-0.5B", local_dir="/content/models/VibeVoice-Realtime-0.5B")
print("✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B")

## Step 2: Launch VibeVoice-Realtime Demo

In [None]:
import subprocess, re, time, threading

srv = subprocess.Popen(
    "python /content/VibeVoice/demo/vibevoice_realtime_demo.py --model_path /content/models/VibeVoice-Realtime-0.5B --port 8000",
    shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,
)
cf = subprocess.Popen(
    "./cloudflared tunnel --url http://localhost:8000 --no-autoupdate",
    shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,
)

public_url = None
server_ready = False
url_pattern  = re.compile(r"(https://[a-z0-9-]+\.trycloudflare\.com)")

def read_srv():
    global server_ready
    for ln in srv.stdout:
        print(ln.strip())
        if "Uvicorn running on" in ln:
            server_ready = True

def read_cf():
    global public_url
    for ln in cf.stdout:
        m = url_pattern.search(ln)
        if m:
            public_url = m.group(1)
            break

threading.Thread(target=read_srv, daemon=True).start()
threading.Thread(target=read_cf,  daemon=True).start()


while True:
    if server_ready and public_url:
        print(f"✅ Public URL: {public_url}\n");
        public_url = None
    time.sleep(0.25)