In [None]:
# import packages

import os # read system path 
import csv

import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd
import soundfile as sf
from gudhi.point_cloud import timedelay
import numpy as np
from numpy import argmax
import math
from ripser import ripser
%matplotlib qt5

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [None]:
# function define

# principle_frequency_finder is to find the period of a speech signal
def principle_frequency_finder(sig):
    t=int(len(sig)/2)
    corr=np.zeros(t)

    for index in np.arange(t):
        ACF_delay=sig[index:]
        L=(t-index)/2
        m = np.sum(sig[int(t-L):int(t+L+1)]**2) + np.sum(ACF_delay[int(t-L):int(t+L+1)]**2)
        r = np.sum(sig[int(t-L):int(t+L+1)]*ACF_delay[int(t-L):int(t+L+1)])
        corr[index] = 2*r/m

    zc = np.zeros(corr.size-1)
    zc[(corr[0:-1] < 0)*(corr[1::] > 0)] = 1
    zc[(corr[0:-1] > 0)*(corr[1::] < 0)] = -1

    admiss = np.zeros(corr.size)
    admiss[0:-1] = zc
    for i in range(1, corr.size):
        if admiss[i] == 0:
            admiss[i] = admiss[i-1]

    maxes = np.zeros(corr.size)
    maxes[1:-1] = (np.sign(corr[1:-1] - corr[0:-2])==1)*(np.sign(corr[1:-1] - corr[2::])==1)
    maxidx = np.arange(corr.size)
    maxidx = maxidx[maxes == 1]
    max_index = 0
    if len(corr[maxidx]) > 0:
        max_index = maxidx[np.argmax(corr[maxidx])]

    return (max_index, corr)

In [None]:
# Path is where the voiced/voicedless wav file located
voicedPath=".."
voicedlessPath=".."

# Parameter for embedding
M=100 # embed dimension

In [None]:
# Retrive features from persistent diagram 
# For voiced data
for fn in os.listdir(voicedPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue

    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)
    
    # Find the principle frequency, delay of sig
    T_voiced,corr=principle_frequency_finder(sig)
    delay_voiced=round(T_voiced*6/M)
    if delay_voiced==0:
        delay_voiced=1

    if delay_voiced*M>len(sig):
        delay_voiced=int(np.floor(len(sig)/M))

    # Write result in a csv file
    with open("Persistent_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile)

        # Time-delay embedding of voiced data
        point_Cloud=timedelay.TimeDelayEmbedding(M, delay_voiced, 5)
        Points=point_Cloud(sig)
        if len(Points)<40:               
            continue
        
        # Compute persistent diagram of piont cloud
        dgms = ripser(Points,maxdim=1)['dgms']
        dgms=dgms[1]
        if dgms.size==0:
            continue
        persistent_time=[ele[1]-ele[0] for ele in dgms]            
        index=argmax(persistent_time)

        # Compute birth time and lifetime 
        # Write them into csv file
        # 0 indicate voiced data
        birth_date=dgms[index][0]
        lifetime=persistent_time[index]
        writer.writerow((birth_date,lifetime,0))
        

# For voicedless data

for fn in os.listdir(voicedlessPath):
    # Subsample dataset, retrieve 1 in 10 among dataset
    randNum=np.random.randint(10)
    if randNum !=0:
        continue
    
    # Read wav file as "sig"
    fileName,ext=os.path.splitext(fn)
    wavFile=voicedlessPath+fileName+".wav"
    sig,samplerate=sf.read(wavFile)
    
    # Find the principle frequency, delay of sig
    T_voicedless,corr=principle_frequency_finder(sig)
    delay_voicedless=round(T_voicedless*6/M)
    if delay_voicedless==0:
        delay_voicedless=1

    if delay_voicedless*M>len(sig):
        delay_voicedless=int(np.floor(len(sig)/M))

    # Write result in a csv file
    with open("Persistent_Diag.csv","a",newline="") as csvfile:
        writer=csv.writer(csvfile)

        # Time-delay embedding of voiced data
        point_Cloud=timedelay.TimeDelayEmbedding(M, delay_voicedless, 5)
        Points=point_Cloud(sig)
        if len(Points)<40:               
            continue
        
        # Compute persistent diagram of piont cloud
        dgms = ripser(Points,maxdim=1)['dgms']
        dgms=dgms[1]
        if dgms.size==0:
            continue
        persistent_time=[ele[1]-ele[0] for ele in dgms]            
        index=argmax(persistent_time)

        # Compute birth time and lifetime 
        # Write them into csv file
        # 1 indicate voicedless data
        birth_date=dgms[index][0]
        lifetime=persistent_time[index]
        writer.writerow((birth_date,lifetime,1))

In [None]:
# Read the csv file into DataFrame
df=pd.read_csv('Persistent_Diag.csv', names=['birth_date','lifetime','type'],header=None)
df

In [None]:
# Shuffle 9000 samples for each voice and voiceless type

# Shuffle 9000 rows from each class
df_0 = df[df['type'] == 0].sample(n=9000, random_state=42)
df_1 = df[df['type'] == 1].sample(n=9000, random_state=42)

# Combine and shuffle again (optional)
df_feature = pd.concat([df_0, df_1]).sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
# Normalization: min-max
normalized_df=(df_feature[['birth_date','lifetime']]-df_feature[['birth_date','lifetime']].min())/(df_feature[['birth_date','lifetime']].max()-df_feature[['birth_date','lifetime']].min())
normalized_df['type']=df_feature['type']

# Read/load data if needed
#normalized_df.to_pickle('topcap_df_feature_normalized')
#normalized_df=pd.read_pickle('topcap_df_feature_normalized')

# Check the number of samples in each class
(normalized_df['type']==0).sum()

In [None]:
# Plot the normalized result
# Set up plot configuration
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 15

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

# Group the data based on voiced/ voicedless
groups = normalized_df.groupby('type')

# Plot
fig, ax = plt.subplots(figsize=(6,6))
ax.margins(0.05)
typeDict= {1:'voicedless',0:'voiced'}
for type, group in groups:
    if type==1:
        ax.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#4d4dff')
    if type==0:
        ax.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type],alpha=0.5, color='#ff5c33')
legend=ax.legend(fontsize=15,markerscale=4)
plt.xlabel('Birth Time')
plt.ylabel('Lifetime')

## Save figure as pdf file
#plt.savefig("..", format="pdf", bbox_inches="tight")

In [None]:
# Plot individual 
plt.figure(figsize=(6, 3))

for type, group in groups:
    if type==0:
        plt.subplot(1, 2, 1)
        plt.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#ff5c33')
        plt.legend(['voiced'],fontsize=10,markerscale=4,loc='upper right')
        plt.xlabel('Birth Time')
        plt.ylabel('Lifetime')
        plt.xlim([-0.1,1.1])
        plt.ylim([-0.1,1.1])
        plt
    if type==1:
        plt.subplot(1, 2, 2)
        plt.plot(group.birth_date, group.lifetime, marker='o', linestyle='', ms=2, label=typeDict[type], alpha=0.5, color='#4d4dff')
        plt.legend(['voiceless'],fontsize=10,markerscale=4,loc='upper right')
        plt.xlabel('Birth Time')
        plt.ylabel('Lifetime')
        plt.xlim([-0.1,1.1])
        plt.ylim([-0.1,1.1])

plt.tight_layout()
plt.show()

## Save figure as pdf file
#plt.savefig("..", format="pdf", bbox_inches="tight")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(normalized_df[['birth_date','lifetime']], normalized_df['type'], test_size=0.2, random_state=42)

# Logistic classification
from sklearn.linear_model import LogisticRegression

# Create a logistic regression model
model = LogisticRegression()

# Train the model using the training data
model.fit(X_train, y_train.values.ravel())

# Predict class labels for the test data
y_pred = model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')