In [None]:
file_path = 'C:\\videos\\TheGreat_2_2001_169_2398_ProRes422HQ_ENG20_ENG51_BPO20_BPO51_Primary_A17913780.mov'

In [None]:
import numpy as np
import subprocess
import wave
import io
import contextlib

def extract_audio_and_analyze(file_name, audio_track=0, sampling=44100):
    # FFmpeg command to extract mono, 16-bit PCM raw audio
    command = [
        "ffmpeg",
        "-nostdin",
        "-threads", "0",
        "-i", file_name,
        "-map", f"0:a:{audio_track}",
        "-f", "s16le",
        "-ac", "1",  # mono
        "-acodec", "pcm_s16le",
        "-ar", str(sampling),
        "-"
    ]

    # Run FFmpeg and get raw audio output
    result = subprocess.run(command, capture_output=True, check=True)
    raw_audio = result.stdout

    # Convert raw audio to numpy array
    audio_np = np.frombuffer(raw_audio, dtype=np.int16)

    # Calculate properties
    bit_depth = 16  # from pcm_s16le
    channels = 1  # mono
    sample_rate = sampling
    duration_seconds = len(audio_np) / sample_rate  # seconds
    minutes = int(duration_seconds // 60)
    seconds = int(duration_seconds % 60)
    sample_count = len(audio_np)
    frame_size = bit_depth // 8 * channels
    frame_count = sample_count // channels
    bitrate = sample_rate * bit_depth * channels  # bits per second

    # Print or return audio properties
    properties = {
        "bitrate": f"{bitrate} bps",
        "sampling_rate": sample_rate,
        "frame_size": frame_size,
        "duration": f"{minutes}:{seconds:02d}" ,
        "sample_count": sample_count,
        "frame_count": frame_count,
        "bit_depth": bit_depth
    }

    return properties, audio_np


sr = 16000
# Example usage

audio_info, raw = extract_audio_and_analyze(file_path, audio_track=0, sampling=sr)

for key, value in audio_info.items():
    print(f"{key}: {value}")


In [None]:
import numpy as np
from IPython.display import Audio

# Play it
Audio(raw, rate=sr)


# pydub more than one track

In [None]:
from pydub import AudioSegment
import numpy as np

In [None]:
audio = AudioSegment.from_file(file_path)
audio.channels

In [None]:



# Ensure the file has 16 channels
if audio.channels < len(audio_track):
    raise ValueError(f"The audio file have only {audio.channels}.")

# Extract the specified tracks
tracks = [audio.split_to_mono()[i] for i in audio_track]

# Downmix the tracks if mixed is True
if mixed:
    downmixed = tracks[0]
    for track in tracks[1:]:
        downmixed = downmixed.overlay(track)
    tracks = [downmixed]

# Convert to numpy array
samples = np.array(
    [np.array(track.get_array_of_samples(), dtype=np.float32) for track in tracks]
)

# Normalize to range [-1, 1]
samples /= np.iinfo(audio.array_type).max

# If mixed, return a single array; otherwise, return stacked arrays
return samples[0] if mixed else np.stack(samples)