In [2]:
import librosa
from scipy.io import wavfile
from scipy.signal import resample
import scipy
import numpy as np
import os

In [3]:
song_src = "./assets/Careless.mp3"
audio_data, sampling_rate = librosa.load(song_src, sr=None)

In [4]:
def reverse_audio(audio_data, sampling_rate):
    reversed_audio_np = np.flip(audio_data)
    wavfile.write("output.reversed.mp3", sampling_rate, reversed_audio_np)
reverse_audio(audio_data, sampling_rate)

In [5]:
def convolve_reset_kernel(audio_data, sampling_rate, kernel):

    audio_data = audio_data / np.max(np.abs(audio_data))
    
    convolved_audio = np.convolve(audio_data, kernel, mode="same")
    convolved_audio = np.clip(convolved_audio, -1, 1)
    # convolved_audio = (convolved_audio * 100).astype(np.int16)
    
    compression_r = 4 # No downsampling
    downsampled_rate = sampling_rate // compression_r  # Halve the sampling rate
    downsampled_audio = scipy.signal.resample_poly(convolved_audio, 1, compression_r)

    downsampled_audio = (downsampled_audio * 32767).astype(np.int16)
    
    # print(audio_data.shape, convolved_audio.shape)
    # print(max(convolved_audio), max(convolved_audio))
    
    wavfile.write("output.convolved.mp3", downsampled_rate, downsampled_audio)


kernels = [
    [-1,0,1],
    [-20,0,20],
    [-20, -10,0, 10, 20],
    [0,0,5],
    [-500,-200, 0,0, 0, 200, 500]
]
convolve_reset_kernel(audio_data, sampling_rate, kernels[2])

In [6]:
def pitch_shift_audio(audio_data, sampling_rate, pitch_shift=1.0):
    # Increase or decrease the sampling rate to change the pitch
    new_sampling_rate = int(sampling_rate * pitch_shift)
    shifted_audio = resample(audio_data, len(audio_data) * new_sampling_rate // sampling_rate)

    # Normalize the shifted audio to prevent clipping
    shifted_audio /= np.max(np.abs(shifted_audio))

    # Convert the shifted audio to the appropriate data type
    shifted_audio = (shifted_audio * 32767).astype(np.int16)

    print(f"Original sampling rate: {sampling_rate}, New sampling rate: {new_sampling_rate}")
    print(f"Maximum value in shifted audio: {np.max(shifted_audio)}, Minimum value: {np.min(shifted_audio)}")

    # Write the shifted audio to a WAV file
    output_file = "output.high_pitched.wav" if pitch_shift > 1.0 else "output.low_pitched.wav"
    wavfile.write(output_file, new_sampling_rate, shifted_audio)

    print(f"Pitch-shifted audio written to '{output_file}'")
pitch_shift_audio(audio_data, sampling_rate, 0.1)
pitch_shift_audio(audio_data, sampling_rate, 5)

Original sampling rate: 44100, New sampling rate: 4410
Maximum value in shifted audio: 32767, Minimum value: -30584
Pitch-shifted audio written to 'output.low_pitched.wav'
Original sampling rate: 44100, New sampling rate: 220500
Maximum value in shifted audio: 32767, Minimum value: -32709
Pitch-shifted audio written to 'output.high_pitched.wav'
