## 10 STFT Exploration 

Setting parameters for both downsampling and iSTFTs. 

In [14]:
import numpy as np 
import torch 
import pandas as pd 
import sys 
import os 

sys.path.append('../../')
from src.spectral_ops import ISTFT

In [75]:
# Quick Param Calculations 
buffer_size = 1024
sample_rate = 48000 

ms_len = np.round(buffer_size * 1000 / sample_rate, 2)
print(f'Number of ms: {ms_len}')

Number of ms: 21.33


In [76]:
# Verify ISTFT Params 
n_fft = 1024 
hop_length = 60 # 240 -> ~200 Frames per Second of Audio
win_length = n_fft
n_frames = 48000 / hop_length + 1

delta_freq = sample_rate / n_fft 
print(f'Size of Frequency Bins: {delta_freq}Hz')
print(f'Number of Freq Bins: {sample_rate / delta_freq}')
print(f'Window Duration (ms): {np.round(win_length * 1000 / sample_rate, 2)}')
print(f'Number of Frames (~): {n_frames}')

Size of Frequency Bins: 46.875Hz
Number of Freq Bins: 1024.0
Window Duration (ms): 21.33
Number of Frames (~): 801.0


In [77]:
# Example Calculation with Fake Data
x_prev = torch.normal(0.0, 1.0, size=(sample_rate, 1)).T
x = torch.normal(0.0, 1.0, size=(sample_rate, 1)).T

window = torch.hann_window(win_length)

# -----------------------------
# STFT
# -----------------------------
X = torch.stft(
    x,
    n_fft=n_fft,
    hop_length=hop_length,
    win_length=win_length,
    window=window,
    center=True,
    return_complex=True,
)

# Magnitude spectrogram
mag = torch.abs(X)          # (freq, time)

# Phase
phase = torch.angle(X) 

print(X.shape) # Returns (B, T', n_frames)

torch.Size([1, 513, 801])


In [78]:
# Example Calculation with Fake Data
x_prev = torch.normal(0.0, 1.0, size=(buffer_size, 1)).T
x = torch.normal(0.0, 1.0, size=(buffer_size, 1)).T

window = torch.hann_window(win_length)

# -----------------------------
# STFT
# -----------------------------
X = torch.stft(
    x,
    n_fft=n_fft,
    hop_length=hop_length,
    win_length=win_length,
    window=window,
    center=True,
    return_complex=True,
)

# Magnitude spectrogram
mag = torch.abs(X)          # (freq, time)

# Phase
phase = torch.angle(X) 

print(X.shape) # Returns (B, T', n_frames)
print(f'Ms Covered by FFT Buffer: {(X.shape[-1] * 1000) / (sample_rate / hop_length)}')

torch.Size([1, 513, 18])
Ms Covered by FFT Buffer: 22.5
