In [1]:
# Based on this gist
import numpy as np
from scipy import signal
from scipy import fftpack
from scipy.io import wavfile
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap

In [2]:
def calculate_pad_size(signal_length, window_width, window_offset):
    useful_space = signal_length % window_offset
    return int(window_width - useful_space) if useful_space != 0 else window_width - window_offset

def extract_overlapping_frames_from_signal(signal, window_width, window_offset):
    signal_length = signal.shape[-1]
    # We fill the signal to ensure the last frame has the appropriate length
    padding_needed = calculate_pad_size(signal_length, window_width, window_offset)
    padded_signal = np.concatenate(
        (
            signal,
            np.zeros(padding_needed)
        )
    )
    initial_frame_index = np.tile(
        np.arange(0, signal_length, window_offset),
        (window_width, 1)
    ).T
    consecutive_indexes_to_add = np.tile(
        np.arange(0, window_width),
        (initial_frame_index.shape[0], 1)
    )
    frame_indexes = initial_frame_index + consecutive_indexes_to_add

    return padded_signal[frame_indexes]

def replace_zeros_with_almost_zero(signal):
    """
    This function replaces all zeros in a nd array with the non-zero smallest value representable
    :param signal:
    :return:
    """
    return np.where(signal == 0, np.finfo(float).eps, signal)

def safe_log(signal):
    """
    Execute a safe log operation replacing all ceros with almost zeros
    :param signal: 
    :return: 
    """
    return np.log(replace_zeros_with_almost_zero(signal))


In [3]:
frequency, signal = wavfile.read("male_a_spa.wav")
signal = signal[:,0]

In [4]:
window_width = int(frequency * 0.025)
window_offset = int(frequency * 0.01)
frames = extract_overlapping_frames_from_signal(signal, window_width, window_offset)
windowed_frames_with_fourier_transform = fftpack.dct(frames)