In [None]:
import plotly.graph_objects as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.io as pio

import numpy as np
from numpy import argmax
import matplotlib.pyplot as plt
%matplotlib qt5

from gudhi.point_cloud import timedelay
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from ripser import ripser
from persim import plot_diagrams

import time

from praatio import textgrid

import soundfile as sf

# head_tail_scissor is to erase signal in head and tail that has amplitude smaller than 0.05
# can also use it to see if the length of renewing signal is greater than 500 or not 
def head_tail_scissor(sig):
    valid_interval=[index for index in range(len(sig)) if (sig[index]>0.03)]
    if len(valid_interval)==0:
        return False,sig
    head=min(valid_interval)
    tail=max(valid_interval)
    sig=sig[head:tail+1]
    if tail-head<500:
        return False,sig
    return True,sig

# principle_frequency_finder is to find the principle frequency of a speech signal
def principle_frequency_finder(sig):
    t=int(len(sig)/2)
    corr=np.zeros(t)

    for index in np.arange(t):
        ACF_delay=sig[index:]
        L=(t-index)/2
        m = np.sum(sig[int(t-L):int(t+L+1)]**2) + np.sum(ACF_delay[int(t-L):int(t+L+1)]**2)
        r = np.sum(sig[int(t-L):int(t+L+1)]*ACF_delay[int(t-L):int(t+L+1)])
        corr[index] = 2*r/m

    zc = np.zeros(corr.size-1)
    zc[(corr[0:-1] < 0)*(corr[1::] > 0)] = 1
    zc[(corr[0:-1] > 0)*(corr[1::] < 0)] = -1

    admiss = np.zeros(corr.size)
    admiss[0:-1] = zc
    for i in range(1, corr.size):
        if admiss[i] == 0:
            admiss[i] = admiss[i-1]

    maxes = np.zeros(corr.size)
    maxes[1:-1] = (np.sign(corr[1:-1] - corr[0:-2])==1)*(np.sign(corr[1:-1] - corr[2::])==1)
    maxidx = np.arange(corr.size)
    maxidx = maxidx[maxes == 1]
    max_index = 0
    if len(corr[maxidx]) > 0:
        max_index = maxidx[np.argmax(corr[maxidx])]

    return (max_index, corr)

In [None]:
inputFile="data/ALL_049_F_ENG_ENG_HT1.Textgrid"
tg=textgrid.openTextgrid(inputFile,includeEmptyIntervals=False)
phoneTier=tg.tierDict['phones']

specified_vowel=['ŋ']
wavFile='data/ALL_049_F_ENG_ENG_HT1.wav'
M=1280
print('dimension = ',M)
max_edge_length=1

sig,samplerate=sf.read(wavFile)
specified_vowel_list=[ele for ele in phoneTier.entryList if ele[2] in specified_vowel]

# wav_fraction_finder is to find the corresponding wav signal according to interval
def wav_fraction_finder(start_time, end_time):
    sig_fraction=sig[int(start_time*samplerate):int(end_time*samplerate)]
    return sig_fraction

specified_valid_vowel_list=[head_tail_scissor(wav_fraction_finder(ele[0],ele[1]))[1] for ele in specified_vowel_list if head_tail_scissor(wav_fraction_finder(ele[0],ele[1]))[0]]
print('There are ',str(len(specified_valid_vowel_list)),' phones in the specified list.')

T=[0]*len(specified_valid_vowel_list)
for i in range(len(specified_valid_vowel_list)):
    T[i],corr=principle_frequency_finder(np.array(specified_valid_vowel_list[i]))
    T[i]=T[i]/samplerate

delay=[round(ele*samplerate*6/M) for ele in T]
for element in range(len(delay)):
    if delay[element]==0:
        delay[element]=1

element=4
print('delay = ',delay[element])

data=specified_valid_vowel_list[element]
#plt.plot(data)
point_Cloud=timedelay.TimeDelayEmbedding(M, delay[element], 1)
Points=point_Cloud(data)
X=StandardScaler().fit_transform(Points)
pca=PCA(n_components=3,whiten=True)
X_PCA=pca.fit_transform(X)
print(pca.explained_variance_ratio_)


data=specified_valid_vowel_list[element]
point_Cloud=timedelay.TimeDelayEmbedding(M, delay[element],5)
Points=point_Cloud(data)
print(len(Points))
dgms = ripser(Points,maxdim=1)['dgms']
plot_diagrams(dgms,lifetime=True)

dgms=dgms[1]
persistent_time=[ele[1]-ele[0] for ele in dgms]            
index=argmax(persistent_time)
birth_date=dgms[index][0]
lifetime=persistent_time[index]
print('maximal lifetime = ', lifetime)


In [None]:
# plot for various M
AutoT=T[element]
list=np.arange(100,1280,10)
AutoDelay=np.zeros((1,list.size),int)[0]
MP=np.zeros((1,list.size))[0]
BD=np.zeros((1,list.size))[0]
for M in list:
    index=np.where(list==M)[0][0]
    AutoDelay[index]=round(AutoT*samplerate*2*np.pi/M)
    point_Cloud=timedelay.TimeDelayEmbedding(M, AutoDelay[index],5)
    Points=point_Cloud(data)
    dgms = ripser(Points,maxdim=1)['dgms']
    dgms=dgms[1]
    persistent_time=[ele[1]-ele[0] for ele in dgms]            
    MaxEle=argmax(persistent_time)
    BD[index]=dgms[MaxEle][0]
    MP[index]=persistent_time[MaxEle]
plt.plot(MP)
plt.figure()
plt.plot(AutoDelay)

In [None]:
fig = go.Figure(data=go.Scatter(
    x=np.array(range(len(data))), y=data,
    marker=dict(
        size=2,
        colorscale='Viridis',
    ),
    line=dict(
        color='#0000b3',
        width=1
    )
))
fig.update_layout(
    title={},
    xaxis_title="sample point",
    yaxis_title="amplitude",
)
fig.show()
#pio.write_image(fig, 'E:/Topological_Analysis_of_Time_Series/pictures/pdf_graph_raw/discussion_dim_phone_plot.pdf',scale=6, width=1080, height=500)


In [None]:
fig = go.Figure(data=go.Scatter3d(
    x=X_PCA[:,0], y=X_PCA[:,1], z=X_PCA[:,2],
    marker=dict(
        size=4,
        color=X_PCA[:,2],
        colorscale='Viridis',#'sunset','thermal',
    ),
    line=dict(
        color='#0000b3',
        width=1
    )
))

fig.update_layout(
    width=500,
    height=500,
    template='simple_white',
    autosize=False,
    scene=dict(
        camera=dict(
            up=dict(
                x=0,
                y=0,
                z=1
            ),
            eye=dict(
                x=1.2,
                y=1.2,
                z=1.2,
            )
        ),
        aspectratio = dict( x=1, y=1, z=0.8 ),
        aspectmode = 'manual',
    ),
)
fig.show()
#pio.write_image(fig, 'E:/Topological_Analysis_of_Time_Series/pictures/pdf_graph_raw/PCA_M'+str(M)+'.pdf',scale=6, width=1080, height=1080)
#py.plot(fig, filename='E:/Topological_Analysis_of_Time_Series/pictures/vector_graph/M_'+str(M), image='svg')