In [None]:
# Install disvoice package using pip3 if not already installed
!pip3 install disvoice

In [None]:
# Clone the Kaldi repository from upstream (official Kaldi GitHub repository)
!git clone https://github.com/kaldi-asr/kaldi.git kaldi --origin upstream

In [None]:
# Importing necessary libraries and modules
import warnings   # To manage warnings during runtime
import os         # For operating system related functions
import pandas as pd  # For data manipulation and analysis
import numpy as np   # For numerical operations

# Importing modules from disvoice package for voice analysis
from disvoice.phonation import Phonation      # For phonation analysis
from disvoice.prosody import Prosody          # For prosody analysis
from disvoice.replearning import RepLearning  # For representation learning
from disvoice.glottal import Glottal          # For glottal analysis
from disvoice.phonological import Phonological  # For phonological analysis


In [None]:
# Define main folder and split records paths for heart failure voice analysis
mainFolderPath = "Heart Failure Voice Analysis/"
splitRecordsPath = mainFolderPath + "HeartFailure/" + "Split Records/"

# List all audio files in the split records directory
audioFilesNameList = os.listdir(splitRecordsPath)


In [None]:
# Extract unique patient names from audio file names
audioFilesPatientNameSet = {name.split("_")[0] for name in audioFilesNameList}
audioFilesPatientNameList = list(audioFilesPatientNameSet)

In [None]:
# Initialize an empty DataFrame to store all features
featuresdf = pd.DataFrame()

for patientName in audioFilesPatientNameSet:
  voiceSplitsforPatient = [section for section in audioFilesNameList if section.startswith(patientName)]
  features = pd.DataFrame()
  for audioSection in voiceSplitsforPatient:

    sectionName = audioSection.split("_")[1]
    fileAudioPath= splitRecordsPath + audioSection
    sectionOrderName = sectionName.split(".")[0]


    #Extracting prosody and phonation features from section 1 of recorded speech, -> Six sentences specifically crafted based on different phonetic contexts.
    if (sectionOrderName == "Section1"):
      prosody=Prosody()
      features2=prosody.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/prosody/"
      features2 = features2.add_prefix(prefix)
      features = pd.concat([features, features2], axis = 1)

      phonation = Phonation()
      features1 = phonation.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/phonation/"
      features1 = features1.add_prefix(prefix)
      features = pd.concat([features, features1], axis = 1)


    #Extracting prosody, phonation and glottal features from section 2 of recorded speech, -> Sustained vowel "a" for 5 seconds
    elif (sectionOrderName == "Section2"):
      prosody=Prosody()
      features2=prosody.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/prosody/"
      features2 = features2.add_prefix(prefix)
      features = pd.concat([features, features2], axis = 1)

      phonation = Phonation()
      features1 = phonation.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/phonation/"
      features1 = features1.add_prefix(prefix)
      features = pd.concat([features, features1], axis = 1)

      glottal=Glottal()
      features4 = glottal.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/glottal/"
      features4 = features4.add_prefix(prefix)
      features = pd.concat([features, features4], axis = 1)


    #Extracting prosody, phonation and glottal features from section 3 of recorded speech, -> Sustained vowel "i" for 5 seconds
    elif (sectionOrderName == "Section3"):
      prosody=Prosody()
      features2=prosody.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/prosody/"
      features2 = features2.add_prefix(prefix)
      features = pd.concat([features, features2], axis = 1)

      phonation = Phonation()
      features1 = phonation.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/phonation/"
      features1 = features1.add_prefix(prefix)
      features = pd.concat([features, features1], axis = 1)

      glottal=Glottal()
      features4 = glottal.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/glottal/"
      features4 = features4.add_prefix(prefix)
      features = pd.concat([features, features4], axis = 1)


    #Extracting prosody and phonation features from section 4 of recorded speech, -> Conversational speech, including responses to at least two of three daily questions
    elif (sectionOrderName == "Section4"):
      phonation = Phonation()
      features1 = phonation.extract_features_file(fileAudioPath, static = True, plots=False, fmt="dataframe")
      prefix = f"{sectionName}/phonation/"
      features1 = features1.add_prefix(prefix)
      features = pd.concat([features, features1], axis = 1)


    else:
      print("Section name not found!")

  features["Patient Name"] = patientName
  features = features.reset_index(drop=True)
  featuresdf = pd.concat([featuresdf, features], axis = 0)
  featuresdf = featuresdf.reset_index(drop=True)

featuresdf.to_csv(mainFolderPath + "staticFeaturesForTheNewPatients.csv", index=False)