In [1]:
import os
from moviepy.editor import VideoFileClip
import mimetypes
from pydub import AudioSegment
from IPython.display import Audio
from io import BytesIO
import tempfile

### Read file and get type

In [2]:
def get_file_type(file):
    """Determines if a file is audio, video, or image based on MIME type."""
    # Check if input is a file-like object
    if hasattr(file, "name"):
        # If a file-like object is passed, get its name attribute
        file_name = file.name
    else:
        # Otherwise, assume it's a file path
        file_name = file

    mime_type, _ = mimetypes.guess_type(file_name)

    if mime_type:
        if mime_type.startswith("audio"):
            return "audio"
        elif mime_type.startswith("video"):
            return "video"
        elif mime_type.startswith("image"):
            return "image"

    # Return None if the MIME type is unknown or doesn't match these categories
    return None

In [3]:
audio_file_path = "audiofile.m4a"
image_file_path = "imagefile.jpeg"
video_file_path = "videofile.MOV"

with open(audio_file_path, "rb") as f:
    print("audio file type:", get_file_type(audio_file_path))

with open(image_file_path, "rb") as f:
    print("image file type:", get_file_type(image_file_path))

with open(video_file_path, "rb") as f:
    print("video file type:", get_file_type(video_file_path))

audio file type: audio
image file type: image
video file type: video


### Process audio

In [4]:
def process_audio_to_mp3(audio_file):
    """
    Converts an opened audio file to MP3 format and returns the audio data in a BytesIO object.

    Parameters:
    audio_file (file-like object): An opened file-like object containing the audio.

    Returns:
    BytesIO: In-memory MP3 data.
    """
    try:
        # Load the audio file from the file-like object
        audio = AudioSegment.from_file(audio_file)
    except Exception as e:
        print(f"Error loading audio file: {e}")
        return None

    # Create an in-memory bytes buffer
    mp3_data = BytesIO()

    # Export the audio as MP3 to the buffer
    try:
        audio.export(mp3_data, format="mp3")
        mp3_data.seek(0)  # Rewind the buffer to the beginning
        print("Audio converted to MP3 format in memory.")
        return mp3_data
    except Exception as e:
        print(f"Error exporting audio to MP3: {e}")
        return None

In [5]:
with open(audio_file_path, "rb") as audio_file:
    mp3_data = process_audio_to_mp3(audio_file)

Audio converted to MP3 format in memory.


In [None]:
# Play the audio file
Audio(mp3_data.getvalue())

### Extract audio from video

In [15]:
def extract_audio_from_video(video_file):
    """
    Extracts audio from an opened video file and returns it as a file-like object (BytesIO).

    Parameters:
    video_file (file-like object): An opened file-like object containing the video.

    Returns:
    BytesIO: In-memory audio data in WAV format.
    """
    print("Extracting audio from video...")

    # Save the video file-like object to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
        temp_video_file.write(video_file.read())
        temp_video_path = temp_video_file.name

    # Load the video clip from the temporary file path
    video_clip = VideoFileClip(temp_video_path)
    audio_clip = video_clip.audio

    # Create a temporary file to save the audio
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
        temp_audio_path = temp_audio_file.name

    # Write the audio clip to the temporary audio file
    audio_clip.write_audiofile(temp_audio_path, codec="pcm_s16le")

    # Load the audio data into a BytesIO object
    audio_buffer = BytesIO()
    with open(temp_audio_path, "rb") as temp_audio:
        audio_buffer.write(temp_audio.read())

    # Clean up the temporary files
    video_clip.close()
    os.remove(temp_video_path)
    os.remove(temp_audio_path)

    # Rewind the BytesIO buffer for further reading
    audio_buffer.seek(0)
    print("Audio extracted and loaded into memory.")
    return audio_buffer

In [16]:
with open(video_file_path, "rb") as video_file:
    audio_clip = extract_audio_from_video(video_file)

Extracting audio from video...
MoviePy - Writing audio in /var/folders/g1/vc18zw8539b9sx__b61c_r8h0000gn/T/tmp8akkevgd.wav


                                                        

MoviePy - Done.
Audio extracted and loaded into memory.




In [9]:
# process audio data
mp3_data = process_audio_to_mp3(audio_clip)

Audio converted to MP3 format in memory.


In [10]:
# Play the audio file
Audio(mp3_data.getvalue())

### Final function

In [11]:
def process_file_to_mp3(file):
    """
    Processes an audio or video file and returns the audio data in MP3 format.

    Parameters:
    file_path (str): Path to the audio or video file.

    Returns:
    BytesIO: In-memory MP3 data.

    Raises:
    ValueError: If the file type is not audio or video.
    """
    file_type = get_file_type(file)
    if file_type == "audio":
        print("Processing audio file to MP3...")
        return process_audio_to_mp3(file)

    if file_type == "video":
        print("Processing video file to MP3...")
        audio_buffer = extract_audio_from_video(file)
        return process_audio_to_mp3(audio_buffer)

    else:
        raise ValueError("Unsupported file type. Only audio and video files are supported.")

In [12]:
with open(audio_file_path, "rb") as audio_file:
    mp3_data = process_file_to_mp3(audio_file_path)
Audio(mp3_data.getvalue())

Processing audio file to MP3...
Audio converted to MP3 format in memory.


In [18]:
with open(video_file_path, "rb") as video_file:
    mp3_data = process_file_to_mp3(video_file)
Audio(mp3_data.getvalue())

Processing video file to MP3...
Extracting audio from video...
MoviePy - Writing audio in /var/folders/g1/vc18zw8539b9sx__b61c_r8h0000gn/T/tmp9v891_k8.wav


                                                        

MoviePy - Done.
Audio extracted and loaded into memory.
Audio converted to MP3 format in memory.
