In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import librosa as lb
import pywt
import wfdb
import os

In [None]:
voice_1 = wfdb.rdrecord('voice004')
#wfdb.plot_wfdb(record=voice_1)
#display(voice_1.__dict__)
segment = voice_1.p_signal
segment = segment[:,0]
sr = voice_1.fs
time = np.arange(0,len(segment))/sr
plt.figure(figsize=(15,4))
plt.plot(time, segment)
plt.ylabel("Amplitude")
plt.xlabel("Time (s)")
plt.show

In [None]:
pre_emph_coeff = 0.90
emphasized_signal = np.append(segment[0], segment[1:] - pre_emph_coeff * segment[:-1])
new_time = np.arange(0, len(emphasized_signal))/sr
plt.figure(figsize=(15,4))
plt.plot(new_time, emphasized_signal)
plt.ylabel("Amplitude")
plt.xlabel("Time (s)")
plt.show

In [None]:
frame_length = int(0.476*sr)
hop_length = frame_length//2
n_frames=10
n_fft = 2048

frames = [emphasized_signal[i*hop_length:i*hop_length + frame_length] for i in range(n_frames)]
fig, axs = plt.subplots(n_frames, 1, figsize = (15,20))
for i, frame in enumerate(frames):
    D = lb.stft(frame, n_fft = n_fft, hop_length = 512)
    DB = lb.amplitude_to_db(D, ref = np.max)
    img = lb.display.specshow(DB, sr=voice_1.fs, hop_length=512, ax = axs[i])
    axs[i].set_title(f"Frame {i+1}")
    fig.colorbar(img, ax = axs[i], format = "%+2.f dB")
plt.tight_layout()
plt.show()

In [2]:
def plot_spectogram(audio_folder, image_dim=(64,64), output_folder="spectograms"):

    output_dir = os.path.join(audio_folder, output_folder)
    os.makedirs(output_dir, exist_ok = True)

    for i in range(1, 209):
            voice = wfdb.rdrecord(f"voice{i:03d}")
            segment = voice.p_signal
            segment = segment[:,0]
            sr = voice.fs
            num_frames = 10
            hop_length = len(segment)//num_frames
            for j in range(num_frames):
                start_sample = j*hop_length
                end_sample = start_sample + hop_length
                if end_sample <= len(segment):
                    frame = segment[start_sample:end_sample]
                    D_frame=np.abs(lb.stft(frame, n_fft=2048, hop_length=512))
                    plt.figure(figsize=(image_dim[1] / 100, image_dim[0] / 100))
                    lb.display.specshow(lb.amplitude_to_db(D_frame, ref=np.max),sr=sr, hop_length=hop_length, x_axis='time', y_axis='log')
                    plt.axis('off')
                    plt.savefig(os.path.join(output_dir, f'voice{i:03d}_frame_{j+1}.png'), bbox_inches='tight', pad_inches=0)
                    plt.close()

In [3]:
audio_folder = '.'
plot_spectogram(audio_folder, image_dim = (256,256), output_folder="stft_spectograms")