In [None]:
from pathlib import Path

%matplotlib inline
import matplotlib.pyplot as plt
import librosa
import numpy as np
from IPython.display import Audio

In [None]:
SAMPLES_DIR = Path("/home/kureta/Music/Chorale Samples/")
EXTENSIONS = ["mp3", "wav"]

In [None]:
AUDIO_FILES = []
for ext in EXTENSIONS:
    AUDIO_FILES.extend(SAMPLES_DIR.glob(f"**/*.{ext}"))

In [None]:
SAMPLE_RATE = 44100

In [None]:
waves = [librosa.load(af, sr=SAMPLE_RATE, mono=True)[0] for af in AUDIO_FILES]

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(waves[0], sr=SAMPLE_RATE)

In [None]:
frame_length = librosa.time_to_samples(250 / 1000, sr=SAMPLE_RATE)
if frame_length % 2 == 1:
    frame_length += 1
hop_length = frame_length // 8

frames = librosa.util.frame(waves[0], frame_length=frame_length, hop_length=hop_length).T

In [None]:
window = librosa.filters.get_window("hann", frame_length, fftbins=False)

In [None]:
librosa.display.waveshow(window, sr=SAMPLE_RATE)

In [None]:
librosa.display.waveshow(frames[100] * window, sr=SAMPLE_RATE)

In [None]:
def overlap_add(frames, hop_size):
    frame_size = len(frames[0])
    output_length = hop_size * (len(frames) - 1) + frame_size
    output_signal = np.zeros(output_length)

    for i, frame in enumerate(frames):
        start = i * hop_size
        output_signal[start:start + frame_size] += frame

    return output_signal

In [None]:
result = overlap_add(frames * window, hop_length)

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(result, sr=SAMPLE_RATE)

In [None]:
Audio(waves[0], rate=SAMPLE_RATE, normalize=True)

In [None]:
Audio(result, rate=SAMPLE_RATE, normalize=True)

In [None]:
def repeat_frames(frames, n):
    # Reshape the frames array to ensure it's a 2D array
    frames = np.array(frames)
    
    # Repeat each frame n times along the first axis
    repeated_frames = np.repeat(frames, n, axis=0)
    
    return repeated_frames

In [None]:
stretched_frames = repeat_frames(frames, 2)

In [None]:
stretched = overlap_add(stretched_frames * window, hop_length)

In [None]:
Audio(stretched, rate=SAMPLE_RATE, normalize=True)

In [None]:
def crossfade_frames(frame1, frame2, crossfade_length):
    fade_out = np.linspace(1, 0, crossfade_length)
    fade_in = np.linspace(0, 1, crossfade_length)
    
    overlap1 = frame1[-crossfade_length:] * fade_out
    overlap2 = frame2[:crossfade_length] * fade_in
    
    crossfaded_frame = np.concatenate((frame1[:-crossfade_length], 
                                       overlap1 + overlap2, 
                                       frame2[crossfade_length:]))
    return crossfaded_frame

In [None]:
# Combine frames with crossfading
stretched_frames = repeat_frames(frames, 2)
smoothed_frames = stretched_frames[:librosa.time_to_frames(30, sr=SAMPLE_RATE, hop_length=hop_length)] * window
output_audio = smoothed_frames[0]
for i in range(1, len(smoothed_frames)):
    output_audio = crossfade_frames(output_audio, smoothed_frames[i], 100)

In [None]:
Audio(output_audio, rate=SAMPLE_RATE, normalize=True)

In [None]:
window = librosa.filters.get_window("hamming", frame_length, fftbins=False)
stretched = overlap_add(stretched_frames * window, hop_length)

In [None]:
Audio(stretched[:librosa.time_to_samples(30, sr=SAMPLE_RATE)], rate=SAMPLE_RATE, normalize=True)