## Workflow for transcribing audio files with whisper.

#### Create the filename

In [1]:
%pip install -U openai-whisper
import os
import datetime
import subprocess
import whisper, importlib.metadata

working_folder = "/Users/reblocke/Research/statistics_sandbox/dication_workflow"
os.makedirs(working_folder, exist_ok=True)

timestamp = datetime.datetime.now().strftime("transcription_%y-%m-%d_at_%H-%M.flac")
audio_output_path = os.path.join(working_folder, timestamp)
text_output_path = os.path.splitext(audio_output_path)[0] + ".txt"

# Pick the avfoundation audio device index. Run the device-list cell once to verify.
audio_device = os.environ.get("AVFOUNDATION_AUDIO_INDEX", "0")

print(f"Audio will be written to: {audio_output_path}")
print(f"Recording from avfoundation device :{audio_device} (set AVFOUNDATION_AUDIO_INDEX env var to change).")

Note: you may need to restart the kernel to use updated packages.
/Users/reblocke/Research/statistics_sandbox/dication_workflow/transcription_25-12-03_at_14-06.flac
/Users/reblocke/Research/statistics_sandbox/dication_workflow/transcription_25-12-03_at_14-06.txt


#### Command to record the transcription

Note, this can be done with the following terminal command (pick the right :X value after listing devices below):

Needs ffmpeg installed: "brew install ffmpeg" on mac. 

!ffmpeg -f avfoundation -i ":0" -ac 1 -ar 16000 -acodec flac output.flac   

#### List available avfoundation devices (run once to confirm mic index)
(FFmpeg will print an `Error opening input file .` at the endâ€”this is expected because we are only asking it to enumerate devices.)

In [None]:
!ffmpeg -hide_banner -loglevel info -f avfoundation -list_devices true -i "" || true

In [2]:
# Stop any previous ffmpeg run to avoid overlapping recordings.
if 'process' in globals() and process and process.poll() is None:
    process.communicate(input='q')
    print('Stopped a previous recording before starting a new one.')

ffmpeg_cmd = [
    'ffmpeg',
    '-hide_banner',
    '-loglevel', 'warning',
    '-f', 'avfoundation',
    '-i', f':{audio_device}',
    '-ac', '1',
    '-ar', '16000',
    '-c:a', 'flac',
    '-y',
    audio_output_path,
]

process = subprocess.Popen(
    ffmpeg_cmd,
    stdin=subprocess.PIPE,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
)

print('Recording... speak now. Run the stop cell to finish.')
print('ffmpeg command:', ' '.join(ffmpeg_cmd))

... now recording until the box below is run ...

#### Run the below box to stop the recording

In [3]:
if 'process' not in globals() or process is None:
    print('No recording process to stop.')
else:
    if process.poll() is None:
        stdout, _ = process.communicate(input='q')
    else:
        stdout, _ = process.communicate()
    if stdout:
        print(stdout)
    if process.returncode != 0:
        print(f'ffmpeg exited with code {process.returncode}')
    else:
        print('Recording stopped and file closed.')

# Simple sanity check on the recorded audio length.
try:
    duration = float(subprocess.check_output([
        'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
        '-of', 'default=noprint_wrappers=1:nokey=1', audio_output_path
    ]).decode().strip())
    print(f'Recorded duration: {duration:.2f}s')
    if duration < 1:
        print('Warning: recording is very short; check the audio device index (:X).')
except Exception as exc:
    print(f'Could not read duration with ffprobe: {exc}')



[q] command received. Exiting.

[out#0/flac @ 0x983235680] video:0KiB audio:1KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 749.864253%
size=       9KiB time=00:00:06.56 bitrate=  11.5kbits/s speed=0.999x    


(None, None)

#### Command to transcribe the audio just recorded

In [4]:
print("whisper path:", whisper.__file__)
print("openai-whisper version:", importlib.metadata.version("openai-whisper"))

model = whisper.load_model("turbo")              # alias for large-v3-turbo on recent versions

result = model.transcribe(audio_output_path, fp16=False)
print(result["text"])
with open(text_output_path, "w", encoding="utf-8") as f:
    f.write(result["text"])

whisper path: /opt/anaconda3/envs/mimiciv-tabular/lib/python3.11/site-packages/whisper/__init__.py
openai-whisper version: 20250625
 Thank you.


#### Delete the output_path file

In [None]:
os.remove(audio_output_path)