In [1]:
# import packages

import os # read system path 
import csv

import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd
from scipy import signal
import soundfile as sf
from gudhi.point_cloud import timedelay
import numpy as np
from numpy import argmax
import math
from ripser import ripser
from persim import plot_diagrams

import umap
from sklearn.preprocessing import StandardScaler
%matplotlib qt5

# Path is where the voiced/voicedless wav file located
voicedPath="data/audio_segment/voiced/"
voicedlessPath="data/audio_segment/voiceless/"

In [2]:
# function define

# wav_fraction_finder is to find the corresponding wav signal according to interval
def wav_fraction_finder(start_time, end_time,sig):
    sig_fraction=sig[int(start_time*samplerate):int(end_time*samplerate)]
    return sig_fraction

# head_tail_scissor is to erase signal in head and tail that has amplitude smaller than 0.05
# can also use it to see if the length of renewing signal is greater than 500 or not 
def head_tail_scissor(sig):
    valid_interval=[index for index in range(len(sig)) if (sig[index]>0.03)]
    if len(valid_interval)==0:
        return False,sig
    head=min(valid_interval)
    tail=max(valid_interval)
    sig=sig[head:tail+1]
    if tail-head<500:
        return False,sig
    return True,sig

In [53]:
# Do stft to phonetic data and visualize the result
for fn in os.listdir(voicedPath):
    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)

    # STFT 
    f, t, Zxx = signal.stft(sig, samplerate,nperseg=len(sig)/5)

    # Find the indices of the maximum magnitude for each time frame
    #max_magnitude_indices = np.argmax(np.abs(Zxx), axis=0)

    max_magnitude_indices=np.zeros(Zxx.shape[1],dtype=int)
    for i in range(Zxx.shape[1]):
        magnitude = np.abs(Zxx[:,i])
        index=np.argmax(magnitude)
        # Zero frequency is meaningless
        # Find the second largest instead 
        if f[index]==0:
            Zxx[index,i]=0
            magnitude=np.abs(Zxx[:,i])
            index=np.argmax(magnitude)
        
        max_magnitude_indices[i]=index

    # Get the corresponding frequencies
    dominant_frequencies = f[max_magnitude_indices]

    break


# Spectrogram
plt.subplot(2, 1, 1)
plt.pcolormesh(t, f, np.abs(Zxx), shading='gouraud', cmap='viridis')
plt.ylabel('Frequency (Hz)')
plt.title('STFT Spectrogram')
plt.colorbar(label='Magnitude')

# Dominant frequency over time
plt.subplot(2, 1, 2)
plt.plot(t, dominant_frequencies, marker='o', linestyle='-')
plt.xlabel('Time (s)')
plt.ylabel('Dominant Frequency (Hz)')
plt.title('Dominant Frequency over Time')
plt.tight_layout()
plt.show()

In [55]:
# Retrive features from persistent diagram 
# For voiced data
for fn in os.listdir(voicedPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue

    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)

    # Cut head and tail of wav file is those sections are not reliable
    status,sig=head_tail_scissor(sig)
    if status==False:
        continue
    
    # Write result in a csv file
    with open("STFT_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile,quoting=csv.QUOTE_ALL)

        # STFT 
        f, t, Zxx = signal.stft(sig, samplerate,nperseg=len(sig)/5)

        # Find the indices of the maximum magnitude for each time frame
        max_magnitude_indices=np.zeros(Zxx.shape[1],dtype=int)
        for i in range(Zxx.shape[1]):
            magnitude = np.abs(Zxx[:,i])
            index=np.argmax(magnitude)
            # Zero frequency is meaningless
            # Find the second largest instead 
            if f[index]==0:
                Zxx[index,i]=0
                magnitude=np.abs(Zxx[:,i])
                index=np.argmax(magnitude)
        
            max_magnitude_indices[i]=index

        # Get the corresponding frequencies
        dominant_frequencies = f[max_magnitude_indices]

        # Drop the 11th frequency (if any), as it mainly 0 or NaN
        dominant_frequencies=dominant_frequencies[0:11]

        # Add last feature to indicate if it is voiced/ voicedless
        # 0 indicate the phone is voiced
        data=np.append(dominant_frequencies,0)
 
        writer.writerow(data)

# For voicedless data
for fn in os.listdir(voicedlessPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue
    
    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedlessPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)

    # Cut head and tail of wav file is those sections are not reliable
    status,sig=head_tail_scissor(sig)
    if status==False:
        continue

    # Write result in a csv file
    with open("STFT_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile)
        # STFT 
        f, t, Zxx = signal.stft(sig, samplerate,nperseg=len(sig)/5)

        # Find the indices of the maximum magnitude for each time frame
        max_magnitude_indices=np.zeros(Zxx.shape[1],dtype=int)
        for i in range(Zxx.shape[1]):
            magnitude = np.abs(Zxx[:,i])
            index=np.argmax(magnitude)
            # Zero frequency is meaningless
            # Find the second largest instead 
            if f[index]==0:
                Zxx[index,i]=0
                magnitude=np.abs(Zxx[:,i])
                index=np.argmax(magnitude)
        
            max_magnitude_indices[i]=index

        # Get the corresponding frequencies
        dominant_frequencies = f[max_magnitude_indices]

        # Drop the 11th frequency (if any), as it mainly 0 or NaN
        dominant_frequencies=dominant_frequencies[0:11]

        # Add last feature to indicate if it is voiced/ voicedless
        # 1 indicate the phone is voicedless
        data=np.append(dominant_frequencies,1)
 
        writer.writerow(data)


In [2]:
# Read the csv file into DataFrame
df=pd.read_csv('STFT_Diag2.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,408.333333,408.333333,408.333333,175.000000,175.000000,175.000000,175.000000,175.000000,175.000000,175.000000,175.000000,0.0
1,286.363636,286.363636,286.363636,286.363636,286.363636,286.363636,286.363636,286.363636,357.954545,644.318182,644.318182,0.0
2,250.568182,250.568182,250.568182,250.568182,250.568182,250.568182,250.568182,250.568182,250.568182,375.852273,375.852273,0.0
3,324.264706,324.264706,324.264706,324.264706,324.264706,216.176471,324.264706,324.264706,324.264706,324.264706,108.088235,0.0
4,231.617647,185.294118,185.294118,231.617647,231.617647,231.617647,231.617647,231.617647,231.617647,231.617647,463.235294,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
25928,212.019231,212.019231,4770.432692,3339.302885,3339.302885,4717.427885,4982.451923,4452.403846,4505.408654,1908.173077,7208.653846,1.0
25929,5398.672566,5073.451327,5203.539823,7870.353982,7024.778761,6309.292035,6374.336283,6504.424779,6569.469027,5073.451327,5008.407080,1.0
25930,462.881679,168.320611,547.041985,294.561069,589.122137,420.801527,757.442748,4123.854962,4797.137405,4755.057252,3955.534351,1.0
25931,4255.851064,7472.872340,9885.638298,6367.021277,6903.191489,6802.659574,6970.212766,7070.744681,7104.255319,6802.659574,335.106383,1.0


In [3]:
# Use UMAP to reduce the feature dimension to 2
reducer = umap.UMAP()
data = df.iloc[:,0:11]
scaled_data = StandardScaler().fit_transform(data)
embedding = reducer.fit_transform(scaled_data)
embedding.shape

(25933, 2)

In [4]:
# Read the embedded feature as DataFrame
df_feature=pd.DataFrame(embedding,columns=['feature1','feature2'])
df_feature['type']=df[11]
df_feature

Unnamed: 0,feature1,feature2,type
0,17.782047,3.552762,0.0
1,13.617786,7.950894,0.0
2,15.723985,8.132825,0.0
3,15.645306,3.268811,0.0
4,17.518698,8.253714,0.0
...,...,...,...
25928,5.809585,-1.135514,1.0
25929,0.738934,0.658945,1.0
25930,5.533201,9.769709,1.0
25931,-0.667545,3.078579,1.0


In [None]:
# Set up plot configuration
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 15

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

# Group the data based on voiced/ voicedless
groups = df_feature.groupby('type')

# Plot
fig, ax = plt.subplots(figsize=(6, 6))
ax.margins(0.05)
typeDict= {1:'voicedless',0:'voiced'}
for type, group in groups:
    if type==1:
        ax.plot(group.feature1, group.feature2, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#4d4dff')
    if type==0:
        ax.plot(group.feature1, group.feature2, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#ff5c33')
legend=ax.legend(fontsize=15,markerscale=4,loc='upper left')
plt.xlabel('UMAP_1',fontsize=15)
plt.ylabel('UMAP_2',fontsize=15)

## Save figure as pdf file
#plt.savefig("figure/featureAna_stft1.pdf", format="pdf", bbox_inches="tight")

In [None]:
# Plot individual 
plt.figure(figsize=(6, 3))

for type, group in groups:
    if type==0:
        plt.subplot(1, 2, 1)
        plt.plot(group.feature1, group.feature2, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#ff5c33')
        plt.legend(['voiced'],fontsize=10,markerscale=4,loc='upper left')
        plt.xlabel('UMAP_1')
        plt.ylabel('UMAP_2')
        plt.xlim([-7,29])
        plt.ylim([-9,18])
        plt
    if type==1:
        plt.subplot(1, 2, 2)
        plt.plot(group.feature1, group.feature2, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#4d4dff')
        plt.legend(['voicedless'],fontsize=10,markerscale=4,loc='lower right')
        plt.xlabel('UMAP_1')
        plt.ylabel('UMAP_2')
        plt.xlim([-7,29])
        plt.ylim([-9,18])

plt.tight_layout()
plt.show()

## Save figure as pdf file
#plt.savefig("figure/featureAna_stft2.pdf", format="pdf", bbox_inches="tight")