In [1]:
# import packages

import os # read system path 
import csv

import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd
import soundfile as sf
from gudhi.point_cloud import timedelay
import numpy as np
from numpy import argmax
import math
from ripser import ripser
from persim import plot_diagrams
%matplotlib qt5

from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [45]:
# function define

# wav_fraction_finder is to find the corresponding wav signal according to interval
def wav_fraction_finder(start_time, end_time,sig):
    sig_fraction=sig[int(start_time*samplerate):int(end_time*samplerate)]
    return sig_fraction

# head_tail_scissor is to erase signal in head and tail that has amplitude smaller than 0.05
# can also use it to see if the length of renewing signal is greater than 500 or not 
def head_tail_scissor(sig):
    valid_interval=[index for index in range(len(sig)) if (sig[index]>0.03)]
    if len(valid_interval)==0:
        return False,sig
    head=min(valid_interval)
    tail=max(valid_interval)
    sig=sig[head:tail+1]
    if tail-head<500:
        return False,sig
    return True,sig

# principle_frequency_finder is to find the period of a speech signal
def principle_frequency_finder(sig):
    t=int(len(sig)/2)
    corr=np.zeros(t)

    for index in np.arange(t):
        ACF_delay=sig[index:]
        L=(t-index)/2
        m = np.sum(sig[int(t-L):int(t+L+1)]**2) + np.sum(ACF_delay[int(t-L):int(t+L+1)]**2)
        r = np.sum(sig[int(t-L):int(t+L+1)]*ACF_delay[int(t-L):int(t+L+1)])
        corr[index] = 2*r/m

    zc = np.zeros(corr.size-1)
    zc[(corr[0:-1] < 0)*(corr[1::] > 0)] = 1
    zc[(corr[0:-1] > 0)*(corr[1::] < 0)] = -1

    admiss = np.zeros(corr.size)
    admiss[0:-1] = zc
    for i in range(1, corr.size):
        if admiss[i] == 0:
            admiss[i] = admiss[i-1]

    maxes = np.zeros(corr.size)
    maxes[1:-1] = (np.sign(corr[1:-1] - corr[0:-2])==1)*(np.sign(corr[1:-1] - corr[2::])==1)
    maxidx = np.arange(corr.size)
    maxidx = maxidx[maxes == 1]
    max_index = 0
    if len(corr[maxidx]) > 0:
        max_index = maxidx[np.argmax(corr[maxidx])]

    return (max_index, corr)

In [46]:
# Path is where the voiced/voicedless wav file located
voicedPath="/Users/pfeng3/Documents/research/TopCap/revise2/audio_segment/voiced/"
voicedlessPath="/Users/pfeng3/Documents/research/TopCap/revise2/audio_segment/voiceless/"

# Parameter for embedding
M=100 # embed dimension

In [48]:
# Retrive features from persistent diagram 
# For voiced data
for fn in os.listdir(voicedPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue

    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)

    # Cut head and tail of wav file is those sections are not reliable
    status,sig=head_tail_scissor(sig)
    if status==False:
        continue
    
    # Find the principle frequency, delay of sig
    T_voiced,corr=principle_frequency_finder(sig)
    delay_voiced=round(T_voiced*6/M)
    if delay_voiced==0:
        delay_voiced=1

    if delay_voiced*M>len(sig):
        delay_voiced=int(np.floor(len(sig)/M))

    # Write result in a csv file
    with open("Persistent_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile)

        # Time-delay embedding of voiced data
        point_Cloud=timedelay.TimeDelayEmbedding(M, delay_voiced, 5)
        Points=point_Cloud(sig)
        if len(Points)<40:               
            continue
        
        # Compute persistent diagram of piont cloud
        dgms = ripser(Points,maxdim=1)['dgms']
        dgms=dgms[1]
        if dgms.size==0:
            continue
        persistent_time=[ele[1]-ele[0] for ele in dgms]            
        index=argmax(persistent_time)

        # Compute birth time and lifetime 
        # Write them into csv file
        # 0 indicate voiced data
        birth_date=dgms[index][0]
        lifetime=persistent_time[index]
        writer.writerow((birth_date,lifetime,0))
        

# For voicedless data

for fn in os.listdir(voicedlessPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue
    
    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedlessPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)

    # Cut head and tail of wav file is those sections are not reliable
    status,sig=head_tail_scissor(sig)
    if status==False:
        continue
    
    # Find the principle frequency, delay of sig
    T_voicedless,corr=principle_frequency_finder(sig)
    delay_voicedless=round(T_voicedless*6/M)
    if delay_voicedless==0:
        delay_voicedless=1

    if delay_voicedless*M>len(sig):
        delay_voicedless=int(np.floor(len(sig)/M))

    # Write result in a csv file
    with open("Persistent_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile)

        # Time-delay embedding of voiced data
        point_Cloud=timedelay.TimeDelayEmbedding(M, delay_voicedless, 5)
        Points=point_Cloud(sig)
        if len(Points)<40:               
            continue
        
        # Compute persistent diagram of piont cloud
        dgms = ripser(Points,maxdim=1)['dgms']
        dgms=dgms[1]
        if dgms.size==0:
            continue
        persistent_time=[ele[1]-ele[0] for ele in dgms]            
        index=argmax(persistent_time)

        # Compute birth time and lifetime 
        # Write them into csv file
        # 1 indicate voicedless data
        birth_date=dgms[index][0]
        lifetime=persistent_time[index]
        writer.writerow((birth_date,lifetime,1))

    


    



In [None]:
# Read the csv file into DataFrame
df=pd.read_csv('Persistent_Diag2.csv', names=['birth_date','lifetime','type'],header=None)
df

Unnamed: 0,birth_date,lifetime,type
0,0.529858,0.622106,0
1,0.211372,0.562659,0
2,0.198676,0.292560,0
3,0.167390,0.674918,0
4,0.950687,0.740677,0
...,...,...,...
14827,0.125428,0.011910,1
14828,0.272550,0.012513,1
14829,0.054631,0.005890,1
14830,0.210238,0.017549,1


In [3]:
# Set up plot configuration
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 15

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

# Group the data based on voiced/ voicedless
groups = df.groupby('type')

# Plot
fig, ax = plt.subplots(figsize=(6,6))
ax.margins(0.05)
typeDict= {1:'voicedless',0:'voiced'}
for type, group in groups:
    if type==1:
        ax.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#4d4dff')
    if type==0:
        ax.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#ff5c33')
legend=ax.legend(fontsize=15,markerscale=4)
plt.xlabel('birth time',fontsize=15)
plt.ylabel('lifetime',fontsize=15)

## Save figure as pdf file
#plt.savefig("figure/featureAna_pd1.pdf", format="pdf", bbox_inches="tight")

Text(0, 0.5, 'lifetime')

2025-03-17 14:36:57.621 python[2735:71585] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-17 14:36:57.621 python[2735:71585] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [55]:
# Train a linear model to classify the data, and plot the decision boundary
# Split the data into training and testing sets
X=df[['birth_date','lifetime']]
y=df[['type']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a RidgeClassifier object
ridge = RidgeClassifier(alpha=1.0)

# Fit the model to the training data
ridge.fit(X_train, y_train)

# Predict the labels for the test data
y_pred = ridge.predict(X_test)

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a meshgrid to plot decision boundary
x_min, x_max = X['birth_date'].min() - 1, X['birth_date'].max() + 1
y_min, y_max = X['lifetime'].min() - 1, X['lifetime'].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# Predict the class labels for each point in the grid
Z = ridge.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the data points
fig, ax = plt.subplots()
ax.margins(0.05)
typeDict= {1:'voicedless',0:'voiced'}
for type, group in groups:
    ax.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type])
ax.legend()
plt.xlabel('birth time')
plt.ylabel('lifetime')

# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.8)

Accuracy: 0.9389956184698348


  y = column_or_1d(y, warn=True)


<matplotlib.contour.QuadContourSet at 0x3465a2210>

In [None]:
# Plot individual 
plt.figure(figsize=(6, 3))

for type, group in groups:
    if type==0:
        plt.subplot(1, 2, 1)
        plt.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#ff5c33')
        plt.legend(['voiced'],fontsize=10,markerscale=4,loc='upper right')
        plt.xlabel('birth time')
        plt.ylabel('lifetime')
        plt.xlim([-0.1,3.1])
        plt.ylim([-0.1,2.6])
    if type==1:
        plt.subplot(1, 2, 2)
        plt.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#4d4dff')
        plt.legend(['voicedless'],fontsize=10,markerscale=4,loc='upper right')
        plt.xlabel('birth time')
        plt.ylabel('lifetime')
        plt.xlim([-0.1,3.1])
        plt.ylim([-0.1,2.6])

plt.tight_layout()
plt.show()

## Save figure as pdf file
#plt.savefig("figure/featureAna_pd2.pdf", format="pdf", bbox_inches="tight")