In [1]:
from typing import List
from senselab.audio.data_structures import Audio
from senselab.audio.tasks.preprocessing import downmix_audios_to_mono, resample_audios

In [2]:
audio1 = Audio.from_filepath("../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
audio2 = Audio.from_filepath("../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")

# Downmix to mono
audio2 = downmix_audios_to_mono([audio2])[0]

# Resample both audios to 16kHz
audios = resample_audios([audio1, audio2], 16000)

In [3]:
audios

[Audio(waveform=tensor([[ 1.4590e-06, -7.7387e-06, -5.3665e-06,  ...,  1.4245e-04,
           1.4647e-04,  1.0976e-04]]), sampling_rate=16000, orig_path_or_id='../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav', metadata={}),
 Audio(waveform=tensor([[ 1.1528e-05, -5.4035e-06,  3.5290e-06,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00]]), sampling_rate=16000, orig_path_or_id='../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav', metadata={})]

In [4]:
from senselab.audio.tasks.features_extraction.praat_parselmouth import extract_features_from_audios, extract_intensity_descriptors

In [5]:
result = extract_features_from_audios(audios=audios)

[Result(output=Output(speech_rate_out={'speaking_rate': 4.063956515665282, 'articulation_rate': 4.063956515665282, 'phonation_ratio': 1.0, 'pause_rate': 0.0, 'mean_pause_dur': 0.0}, pitch_values_out={'pitch_floor': 60.0, 'pitch_ceiling': 250.0}, pitch_out={'mean_f0_hertz': 118.5991613436674, 'stdev_f0_hertz': 30.232956155880277}, intensity_out={'mean_db': 69.97371343701798, 'range_db_ratio': 1.550705537854896}, harmonicity_out={'hnr_db_mean': 3.328562825913734, 'hnr_db_std_dev': 3.3649067832457247}, slope_tilt_out={'spc_slope': -13.992213991490878, 'spc_tilt': -0.004453109703050689}, cpp_out={'mean_cpp': 7.038229172332017}, formants_out={'F1_mean': 613.4814343368618, 'F1_Std': 303.9953720107834, 'B1_mean': 401.9346611426706, 'B1_Std': 400.72077921852315, 'F2_mean': 1701.7936201026444, 'F2_Std': 325.4474082112356, 'B2_mean': 434.5022203696791, 'B2_Std': 380.6260425874624}, spectral_moments_out={'spc_gravity': 579.565685472785, 'spc_std_dev': 651.3074579097976, 'spc_skewness': 3.58796292

In [6]:
result

{'praat_parselmouth': [{'duration': 4.9213125,
   'speaking_rate': 4.063956515665282,
   'articulation_rate': 4.063956515665282,
   'phonation_ratio': 1.0,
   'pause_rate': 0.0,
   'mean_pause_duration': 0.0,
   'mean_f0_hertz': 118.5991613436674,
   'stdev_f0_hertz': 30.232956155880277,
   'mean_db': 69.97371343701798,
   'range_ratio_db': 1.550705537854896,
   'hnr_db': 3.328562825913734,
   'spectral_slope': -13.992213991490878,
   'spectral_tilt': -0.004453109703050689,
   'cepstral_peak_prominence': 7.038229172332017,
   'mean_f1_loc': 613.4814343368618,
   'std_f1_loc': 303.9953720107834,
   'mean_b1_loc': 401.9346611426706,
   'std_b1_loc': 400.72077921852315,
   'mean_f2_loc': 1701.7936201026444,
   'std_f2_loc': 325.4474082112356,
   'mean_b2_loc': 434.5022203696791,
   'std_b2_loc': 380.6260425874624,
   'spectral_gravity': 579.565685472785,
   'spectral_std_dev': 651.3074579097976,
   'spectral_skewness': 3.587962927611105,
   'spectral_kurtosis': 19.99120845874353},
  {'dur

In [7]:
# TODO:
# CHANGE SOME OF THE NAMES
# ADD SOME PARAMS
# CACHE FOLDER??? DEFAULT SHOULD BE NONE MAYBE
# DOCS
# TESTS
# TUTORIALS
# CHILDREN!!!

# LARGER SPEECH-VOICE VECTOR DESCRIPTOR