In [None]:
import numpy as np
import subprocess
import wave
import io
import contextlib

def extract_audio_and_analyze(file_name, audio_track=0, sampling=44100):
    # FFmpeg command to extract mono, 16-bit PCM raw audio
    command = [
        "ffmpeg",
        "-nostdin",
        "-threads", "0",
        "-i", file_name,
        "-map", f"0:a:{audio_track}",
        "-f", "s16le",
        "-ac", "1",  # mono
        "-acodec", "pcm_s16le",
        "-ar", str(sampling),
        "-"
    ]

    # Run FFmpeg and get raw audio output
    result = subprocess.run(command, capture_output=True, check=True)
    raw_audio = result.stdout

    # Convert raw audio to numpy array
    audio_np = np.frombuffer(raw_audio, dtype=np.int16)

    # Calculate properties
    bit_depth = 16  # from pcm_s16le
    channels = 1  # mono
    sample_rate = sampling
    duration_seconds = len(audio_np) / sample_rate  # seconds
    minutes = int(duration_seconds // 60)
    seconds = int(duration_seconds % 60)
    sample_count = len(audio_np)
    frame_size = bit_depth // 8 * channels
    frame_count = sample_count // channels
    bitrate = sample_rate * bit_depth * channels  # bits per second

    # Print or return audio properties
    properties = {
        "bitrate": f"{bitrate} bps",
        "sampling_rate": sample_rate,
        "frame_size": frame_size,
        "duration": f"{minutes}:{seconds:02d}" ,
        "sample_count": sample_count,
        "frame_count": frame_count,
        "bit_depth": bit_depth
    }

    return properties, audio_np


sr = 16000
# Example usage
file_path = 'C:\\videos\\6230046.mxf'
audio_info, raw = extract_audio_and_analyze(file_path, audio_track=0, sampling=sr)

for key, value in audio_info.items():
    print(f"{key}: {value}")


In [None]:
import numpy as np
from IPython.display import Audio

# Play it
Audio(raw, rate=sr)
