In [1]:
import pyttsx3
engine = pyttsx3.init(driverName="sapi5")  # force Windows SAPI5
voices = engine.getProperty("voices")
for v in voices:
    print(v.id, v.name)


HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0 Microsoft David Desktop - English (United States)
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0 Microsoft Hazel Desktop - English (Great Britain)
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0 Microsoft Zira Desktop - English (United States)


In [5]:
import struct, sys
print("Python:", sys.version)
print("Python bitness:", struct.calcsize("P")*8, "bit")  # should be 64 for SAPI11 voices


Python: 3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]
Python bitness: 64 bit


In [6]:
# If needed first time:  pip install pywin32

import win32com.client as win32

voice = win32.gencache.EnsureDispatch("SAPI.SpVoice")

# List voices SAPI exposes
print("Voices found:")
for i, tok in enumerate(voice.GetVoices()):
    print(f"{i:2d} | {tok.GetAttribute('Name')} | {tok.Id}")

# Force Hazel (en-GB)
for tok in voice.GetVoices():
    if "hazel" in tok.GetAttribute("Name").lower():
        voice.Voice = tok
        break

voice.Rate = 0       # -10 .. +10 (0 = normal)
voice.Volume = 100   # 0 .. 100

voice.Speak("Hello! This is Microsoft Hazel, English Great Britain. If you can hear me, everything works.")


Voices found:
 0 | Microsoft David Desktop | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0
 1 | Microsoft Hazel Desktop | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0
 2 | Microsoft Zira Desktop | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0


1

In [7]:
import os, time
from IPython.display import Audio, display
import win32com.client as win32
from win32com.client import constants

SAVE_PATH = r"C:\Users\sagni\Downloads\Poly Glot AI\hazel_sample.wav"
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)

voice  = win32.gencache.EnsureDispatch("SAPI.SpVoice")
stream = win32.gencache.EnsureDispatch("SAPI.SpFileStream")
fmt    = win32.gencache.EnsureDispatch("SAPI.SpAudioFormat")

# select Hazel
for tok in voice.GetVoices():
    if "hazel" in tok.GetAttribute("Name").lower():
        voice.Voice = tok
        break

# choose WAV format (48kHz 16-bit mono is nice; pick others if you want)
fmt.Type = constants.SAFT48kHz16BitMono
stream.Format = fmt
stream.Open(SAVE_PATH, constants.SSFMCreateForWrite)

# route voice to file, speak, then close
voice.AudioOutputStream = stream
voice.Speak("This file was generated using Microsoft Hazel in a British accent.")
stream.Close()

print("Saved:", SAVE_PATH)
time.sleep(0.5)
display(Audio(filename=SAVE_PATH, autoplay=True))


Saved: C:\Users\sagni\Downloads\Poly Glot AI\hazel_sample.wav


In [9]:
import re
import win32com.client as win32
from win32com.client import constants

def get_sapi_voice(name_contains="hazel"):
    v = win32.gencache.EnsureDispatch("SAPI.SpVoice")
    for tok in v.GetVoices():
        if name_contains.lower() in tok.GetAttribute("Name").lower():
            v.Voice = tok
            return v
    return v  # fallback to default voice

def speak_british(text, rate=0, volume=100):
    v = get_sapi_voice("hazel")
    v.Rate = int(rate)
    v.Volume = int(volume)
    v.Speak(text)

def save_british_wav(text, path, rate=0, volume=100, khz=48):
    v   = get_sapi_voice("hazel")
    st  = win32.gencache.EnsureDispatch("SAPI.SpFileStream")
    fmt = win32.gencache.EnsureDispatch("SAPI.SpAudioFormat")
    v.Rate = int(rate); v.Volume = int(volume)
    # pick format
    khz = int(khz)
    fmt.Type = {
        8:  constants.SAFT8kHz16BitMono,
        11: constants.SAFT11kHz16BitMono,
        16: constants.SAFT16kHz16BitMono,
        22: constants.SAFT22kHz16BitMono,
        32: constants.SAFT32kHz16BitMono,
        44: constants.SAFT44kHz16BitMono,
        48: constants.SAFT48kHz16BitMono,
    }.get(khz, constants.SAFT48kHz16BitMono)
    st.Format = fmt
    st.Open(path, constants.SSFMCreateForWrite)
    v.AudioOutputStream = st
    v.Speak(text)
    st.Close()
    return path


In [10]:
speak_british("Good evening. This should be Hazel in a British accent.", rate=-1)

path = save_british_wav("Saving a British sample to a wave file.", r"C:\Users\sagni\Downloads\Poly Glot AI\british_test.wav")
path


'C:\\Users\\sagni\\Downloads\\Poly Glot AI\\british_test.wav'