In [1]:
from scipy import signal
import os 
import csv

import numpy as np
from numpy import argmax
from praatio import textgrid
import matplotlib.pyplot as plt
import soundfile as sf
from gudhi.point_cloud import timedelay
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from ripser import ripser
from persim import plot_diagrams
import plotly.io as pio
import time
%matplotlib qt5

In [2]:
voiced_phones=['v','l','ŋ','m','n','j','ʒ']
voiceless_phones=['f','θ','t','s','k','tʃ']

M=100
max_edge_length=1
samplerate=16000
inputPath="data\\article_TDA_phonetic_input\\"
outputPath="data\\article_TDA_phonetic_output\\"

# wav_fraction_finder is to find the corresponding wav signal according to interval
def wav_fraction_finder(start_time, end_time,sig):
    sig_fraction=sig[int(start_time*samplerate):int(end_time*samplerate)]
    return sig_fraction

# head_tail_scissor is to erase signal in head and tail that has amplitude smaller than 0.05
# can also use it to see if the length of renewing signal is greater than 500 or not 
def head_tail_scissor(sig):
    valid_interval=[index for index in range(len(sig)) if (sig[index]>0.03)]
    if len(valid_interval)==0:
        return False,sig
    head=min(valid_interval)
    tail=max(valid_interval)
    sig=sig[head:tail+1]
    if tail-head<500:
        return False,sig
    return True,sig

# principle_frequency_finder is to find the period of a speech signal
def principle_frequency_finder(sig):
    t=int(len(sig)/2)
    corr=np.zeros(t)

    for index in np.arange(t):
        ACF_delay=sig[index:]
        L=(t-index)/2
        m = np.sum(sig[int(t-L):int(t+L+1)]**2) + np.sum(ACF_delay[int(t-L):int(t+L+1)]**2)
        r = np.sum(sig[int(t-L):int(t+L+1)]*ACF_delay[int(t-L):int(t+L+1)])
        corr[index] = 2*r/m

    zc = np.zeros(corr.size-1)
    zc[(corr[0:-1] < 0)*(corr[1::] > 0)] = 1
    zc[(corr[0:-1] > 0)*(corr[1::] < 0)] = -1

    admiss = np.zeros(corr.size)
    admiss[0:-1] = zc
    for i in range(1, corr.size):
        if admiss[i] == 0:
            admiss[i] = admiss[i-1]

    maxes = np.zeros(corr.size)
    maxes[1:-1] = (np.sign(corr[1:-1] - corr[0:-2])==1)*(np.sign(corr[1:-1] - corr[2::])==1)
    maxidx = np.arange(corr.size)
    maxidx = maxidx[maxes == 1]
    max_index = 0
    if len(corr[maxidx]) > 0:
        max_index = maxidx[np.argmax(corr[maxidx])]

    return (max_index, corr)

In [10]:
# prepare to write label into csv file
# write the header
with open("stft_label.csv","a",newline="") as csvfile:
    writer=csv.writer(csvfile)
    writer.writerow(["label"])

In [11]:
counter=1 # counter of total phonetic data
voiced_counter=0 # counter of voiced data
voiceless_counter=0 # counter of voiceless data

# plot stft and save them to path according to their order
for fn in os.listdir(inputPath):
    fileName,ext=os.path.splitext(fn)
    if ext==".Textgrid":
        textgridFile=outputPath+fileName+".Textgrid"
        tg=textgrid.openTextgrid(textgridFile,includeEmptyIntervals=False)
        phoneTier=tg.getTier('phones')
        wavFile=inputPath+fileName+".wav"
        sig,samplerate=sf.read(wavFile)
        voiced_list=[ele for ele in phoneTier.entries if ele[2] in voiced_phones]
        voiceless_list=[ele for ele in phoneTier.entries if ele[2] in voiceless_phones]
    
        valid_voiced_list=[head_tail_scissor(wav_fraction_finder(ele[0],ele[1],sig))[1] for ele in voiced_list if head_tail_scissor(wav_fraction_finder(ele[0],ele[1],sig))[0]]
        valid_voiceless_list=[head_tail_scissor(wav_fraction_finder(ele[0],ele[1],sig))[1] for ele in voiceless_list if head_tail_scissor(wav_fraction_finder(ele[0],ele[1],sig))[0]]

        for i in range(len(valid_voiced_list)):

            if voiced_counter>800:
                break

            element=valid_voiced_list[i]
            f, t, Zxx = signal.stft(element, samplerate,nperseg=256)
            plt.figure()
            plt.pcolormesh(t, f, np.abs(Zxx), vmin=0, vmax=0.1*max(abs(element)), shading='gouraud')
            plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)
            str_name="D:\\article\\Topological_analysis_time_series_speech\\file\\sft_plot_refine\\"+str(counter)+".png"
            plt.savefig(str_name,bbox_inches='tight',pad_inches = 0)
            plt.close()
            counter=counter+1
            voiced_counter=voiced_counter+1

            with open("stft_label.csv","a",newline="") as csvfile:
                writer=csv.writer(csvfile)
                writer.writerow("0")
            


        for i in range(len(valid_voiceless_list)):

            if voiceless_counter>800:
                break

            element=valid_voiceless_list[i]
            f, t, Zxx = signal.stft(element, samplerate,nperseg=256)
            plt.figure()
            plt.pcolormesh(t, f, np.abs(Zxx), vmin=0, vmax=0.1*max(abs(element)), shading='gouraud')
            plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)
            str_name="D:\\article\\Topological_analysis_time_series_speech\\file\\sft_plot_refine\\"+str(counter)+".png"
            plt.savefig(str_name,bbox_inches='tight',pad_inches = 0)
            plt.close()
            counter=counter+1
            voiceless_counter=voiceless_counter+1

            with open("stft_label.csv","a",newline="") as csvfile:
                writer=csv.writer(csvfile)
                writer.writerow("1")
