In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import librosa

In [2]:
audio, fs = librosa.load("drum.wav", sr=16000)

In [14]:
Audio(data=audio, rate=fs)

In [3]:

frame_length_ms = 50 # in miliseconds
hop_length_ms = 25 #12.5 # in miliseconds

frame_length = int(np.around((frame_length_ms/1000)*fs))# 25ms in samples
hop_size = int(np.around((hop_length_ms/1000)*fs))# 12.5 ms (25/2 ms) in samples (50% overlap)
window_types = ('rect','hann','cosine','hamming')

In [22]:
def windowing(data, frame_length, hop_size, windowing_function):
    data = np.array(data)
    number_of_frames = 1 + int(np.floor((len(data)-frame_length)/hop_size))
    frame_matrix = np.zeros((frame_length,number_of_frames))

    if windowing_function == 'rect':
        window = np.ones((frame_length))
    elif windowing_function == 'hann':
        window = np.hanning(frame_length)
    elif windowing_function == 'cosine':
        window = np.sqrt(np.hanning(frame_length))
    elif windowing_function == 'hamming':
        window = np.hamming(frame_length)
    else:
        print("windowing not supported")
        
    
    for i in range(number_of_frames):
        frame = np.zeros(frame_length) # Initialize frame as zeroes
        start = i*hop_size
        stop = np.minimum(start+frame_length,len(data))
        frame[0:stop-start] = data[start:stop]
        frame_matrix[:,i] = np.multiply(window,frame)   
    return frame_matrix

In [6]:
frame_matrix = windowing(audio, frame_length, hop_size, window_types[1])# Windowing

In [8]:
frame_matrix.shape

(800, 98)

In [23]:
alpha = 1.5
hop_size_synth = int(alpha * hop_size)
print(hop_size_synth, hop_size)
modified_frame_matrix = windowing(audio, frame_length, hop_size_synth, window_types[1])# Windowing

600 400


In [24]:
#import numpy as np
import soundfile as sf
from IPython.display import Audio

def reconstruct_signal(frame_matrix, hop_size):
    """
    Reconstructs the signal from the frame matrix.
    """
    frame_length, number_of_frames = frame_matrix.shape
    output_signal_length = hop_size * (number_of_frames - 1) + frame_length
    output_signal = np.zeros(output_signal_length)
    
    for i in range(number_of_frames):
        start = i * hop_size
        stop = start + frame_length
        output_signal[start:stop] += frame_matrix[:, i]
    
    return output_signal

# Reconstruct the signal from the frame matrix
reconstructed_signal = reconstruct_signal(frame_matrix, hop_size_synth)

# Normalize the reconstructed signal to prevent potential clipping
reconstructed_signal = librosa.util.normalize(reconstructed_signal)

# Save the reconstructed signal to an audio file
#sf.write('reconstructed_drum.wav', reconstructed_signal, fs)

# If you're running this in a Jupyter notebook, you can also directly play the audio
Audio(data=reconstructed_signal, rate=fs)
