In [2]:
!pip install pydub librosa 




[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
!pip install openunmix soundfile




[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118



[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
from pydub import AudioSegment
import librosa

In [6]:
!ffmpeg -version 

ffmpeg version 2024-05-02-git-71669f2ad5-full_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers
built with gcc 13.2.0 (Rev5, Built by MSYS2 project)
configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-libsnappy --enable-zlib --enable-librist --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-libbluray --enable-libcaca --enable-sdl2 --enable-libaribb24 --enable-libaribcaption --enable-libdav1d --enable-libdavs2 --enable-libuavs3d --enable-libxevd --enable-libzvbi --enable-librav1e --enable-libsvtav1 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxeve --enable-libxvid --enable-libaom --enable-libjxl --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-frei0r --enable-libfreetype --enable-libfribidi --enable-libharf

In [7]:
def calculate_bpm(file_path):
    """
    Calculate the BPM of an audio file using librosa.

    Args:
    file_path (str): Path to the audio file.

    Returns:
    float: Estimated BPM of the audio.
    """
    y, sr = librosa.load(file_path, sr=None)

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)

    # Estimate the tempo
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

    return tempo

In [8]:
def change_bpm(audio, original_bpm, target_bpm):
    """
    Changes the BPM of an audio file by adjusting the speed.
    This will also change the pitch of the audio.

    Args:
    audio (AudioSegment): The audio segment to adjust.
    original_bpm (float): The original BPM of the audio.
    target_bpm (float): The target BPM to achieve.

    Returns:
    AudioSegment: The modified audio segment with new BPM.
    """
    ratio = target_bpm / original_bpm

    new_audio = audio.speedup(playback_speed=ratio)

    return new_audio

In [10]:
import torch
import librosa
import soundfile as sf
from openunmix import predict
import numpy as np

def separate_vocals(input_file, output_dir):
    # Load the audio file
    audio, rate = librosa.load(input_file, sr=44100, mono=False)
    
    # If the audio is mono, convert it to stereo
    if audio.ndim == 1:
        audio = np.repeat(audio.reshape(1, -1), 2, axis=0)
    
    # Separate the audio
    estimates = predict.separate(torch.tensor(audio[None]), rate=rate)
    
    # Save the separated tracks
    for source, estimate in estimates.items():
        sf.write(f"{output_dir}/{source}.mp3", estimate[0].T, rate)
    
    print(f"Separated tracks saved in {output_dir}")

In [11]:
def overlay(trackList: list):
    """
    Overlays multiple audio tracks together.

    Args:
    trackList (list): List of audio tracks to overlay.

    Returns:
    mixed_audio (AudioSegment): The mixed audio segment.
    """
    audio = AudioSegment.from_file(trackList[0], "mp3")

    for track in trackList[1:]:
        audio2 = AudioSegment.from_file(track, "mp3")
        audio = audio.overlay(audio2)

    return audio

In [101]:
import os 

def mix(track1, track2, track1_start, track1_duration, track2_mix_entry, track2_start, track2_duration, overlap_time, offset):
    """
    Mixes two audio tracks together with a specified overlap time.

    Args:
    track1 (str): Path to the first audio track.
    track2 (str): Path to the second audio track.
    track1_duration (float): Duration of snippet from first track (in seconds). Includes duration to be used in the overlap.
    track2_duration (float): Duration of snippet from first track (in seconds).
    overlap_time (float): Duration of the overlap in seconds.
    track1_start (float): Start time of the first track in seconds.
    track2_start (float): Start time of the second track

    Intermediary Values:
    mix: Fadeout of Song 1 + Overlay + Fadein of Song 2


    Returns:
    final_mix (AudioSegment): The mixed audio segment.
    """
    audio1 = AudioSegment.from_file(track1, "wav")
    audio2 = AudioSegment.from_file(track2, "wav")

    rampup = audio1[track1_start: track1_start + track1_duration - overlap_time]

    if not os.path.exists("t1/vocals.mp3"):
        separate_vocals(track1, "t1")
    if not os.path.exists("t2/vocals.mp3"):
        separate_vocals(track2, "t2")

    t1_vocals = AudioSegment.from_file("t1/vocals.mp3", "mp3")
    t2_music = overlay(['t2/bass.mp3', 't2/drums.mp3', 't2/other.mp3'])
    
    # 20,800 to 28,000 is the rampdown
    rampdown = audio2[track2_start: track2_start + track2_duration]

    # fade_out = audio1 fading out + 0 decibel sound until the end of overlap
    mix_vocals = t1_vocals[track1_start + track1_duration - overlap_time: track1_start + track1_duration]
    mix_music = AudioSegment.silent(duration=offset) + t2_music[track2_mix_entry: track2_mix_entry + overlap_time - offset]
    mix_audio = mix_music.overlay(mix_vocals)
    
    combined_mix = rampup + mix_audio + rampdown

    return combined_mix

In [103]:
mix("baby.mp4", "dk.mp4", track1_start = 2200, track1_duration=20100, track2_mix_entry = 6100, track2_start = 20800, track2_duration=8500, overlap_time=15450, offset=800).export("baby-dk.mp3", format="mp3")

<_io.BufferedRandom name='baby-dk.mp3'>