In [50]:
import numpy as np
from pydub import AudioSegment
from pydub.utils import get_array_type
from tqdm import tqdm
from PIL import Image

In [52]:
def split_audio(input_file):
    segment_duration_ms = 1000
    audio = AudioSegment.from_file(f'{input_file}.mp3')
    segments = [audio[i:i+segment_duration_ms] for i in range(0, len(audio), segment_duration_ms)]
    return segments


def compress_segment(audio, removal_prob):
    # Extract audio properties
    sample_rate = audio.frame_rate
    sample_width = audio.sample_width
    channels = audio.channels

    # Get raw audio data as a NumPy array
    array_type = get_array_type(sample_width * 8)
    audio_data = np.array(audio.get_array_of_samples(), dtype=array_type)
    
    # Normalize audio data for easier processing (convert to floats)
    if audio_data.dtype == np.int16:
        audio_data = audio_data / 32768.0  # Normalize to [-1, 1] for 16-bit audio
    elif audio_data.dtype == np.int32:
        audio_data = audio_data / 2147483648.0  # Normalize to [-1, 1] for 32-bit audio

    # Randomly zero out samples based on probability
    mask = np.random.random(size=audio_data.shape) < removal_prob
    audio_data[mask] = 0.0  # Set the chosen samples to zero

    # Convert back to the original format (denormalize)
    if audio_data.dtype == np.float64:
        audio_data = np.int16(audio_data * 32768)

    # Convert the modified audio data back into an AudioSegment
    modified_audio = AudioSegment(
        audio_data.tobytes(),
        frame_rate=sample_rate,
        sample_width=sample_width,
        channels=channels
    )
    
    return modified_audio

def compress_image(image_path, removal_prob):
    image = Image.open(image_path)
    image_data = np.array(image)
    mask = np.random.random(size=image_data.shape[:2]) < removal_prob
    image_data[mask] = [0, 0, 0]
    modified_image = Image.fromarray(image_data.astype(np.uint8))
    modified_image.save(f'compressed_{removal_prob}-{image_path}')

def prob(i):
    tau = 100
    return 2/np.pi * np.arctan(i/tau)

def compress_file(input_file):
    final_track = AudioSegment.empty()
    segments = split_audio(input_file)
    for i, segment in tqdm(enumerate(segments)):
        final_track += compress_segment(segment, prob(i))
        compress_image('cover.jpg', prob(i))
    final_track.export(f'compressed-{input_file}.mp3', format="mp3")


compress_file('Al Bowlly_ Heartaches')

211it [00:08, 23.68it/s]
