In [1]:
import os
import random
import opensmile
import soundfile as sf
import pandas as pd
from tqdm import tqdm

In [2]:
# Path to directory with audio files
audio_fpath = "Androids-Corpus/Reading-Task/audio/PT/"

# Pick a random audio file
files = [f for f in os.listdir(audio_fpath) if f.endswith(".wav")]
# rand_file = random.choice(files)
rand_file = files[42]
audio_file = os.path.join(audio_fpath, rand_file)
print(f"Selected file: {audio_file}")

# Load audio
signal, sr = sf.read(audio_file)

# Define openSMILE extractors
smile_egemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

smile_compare = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Extract features
features_egemaps = smile_egemaps.process_signal(signal, sr)
features_compare = smile_compare.process_signal(signal, sr)


Selected file: Androids-Corpus/Reading-Task/audio/PT/21_PM33_3.wav


In [23]:
features_compare.columns[100:110]

Index(['pcm_zcr_sma_iqr2-3', 'pcm_zcr_sma_iqr1-3', 'pcm_zcr_sma_percentile1.0',
       'pcm_zcr_sma_percentile99.0', 'pcm_zcr_sma_pctlrange0-1',
       'pcm_zcr_sma_stddev', 'pcm_zcr_sma_skewness', 'pcm_zcr_sma_kurtosis',
       'pcm_zcr_sma_meanSegLen', 'pcm_zcr_sma_maxSegLen'],
      dtype='object')

In [10]:
len(features_egemaps.columns)

88

In [9]:
len(features_compare.columns)

6373

In [11]:
smile_egemaps_lld = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)

features_egemaps_lld = smile_egemaps_lld.process_signal(signal, sr)

In [20]:
print(features_egemaps.columns)

Index(['F0semitoneFrom27.5Hz_sma3nz_amean',
       'F0semitoneFrom27.5Hz_sma3nz_stddevNorm',
       'F0semitoneFrom27.5Hz_sma3nz_percentile20.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile50.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile80.0',
       'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2',
       'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope', 'loudness_sma3_amean',
       'loudness_sma3_stddevNorm', 'loudness_sma3_percentile20.0',
       'loudness_sma3_percentile50.0', 'loudness_sma3_percentile80.0',
       'loudness_sma3_pctlrange0-2', 'loudness_sma3_meanRisingSlope',
       'loudness_sma3_stddevRisingSlope', 'loudness_sma3_meanFallingSlope',
       'loudness_sma3_stddevFallingSlope', 'spectralFlux_sma3_amean',
       'spectralFlux_sma3_stddevNorm', 'mfcc1_sma3_amean',
       'mfcc1_sma3_stddevNorm', 'mfcc2_

In [19]:
print(features_egemaps_lld.columns)

Index(['Loudness_sma3', 'alphaRatio_sma3', 'hammarbergIndex_sma3',
       'slope0-500_sma3', 'slope500-1500_sma3', 'spectralFlux_sma3',
       'mfcc1_sma3', 'mfcc2_sma3', 'mfcc3_sma3', 'mfcc4_sma3',
       'F0semitoneFrom27.5Hz_sma3nz', 'jitterLocal_sma3nz',
       'shimmerLocaldB_sma3nz', 'HNRdBACF_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1frequency_sma3nz', 'F1bandwidth_sma3nz',
       'F1amplitudeLogRelF0_sma3nz', 'F2frequency_sma3nz',
       'F2bandwidth_sma3nz', 'F2amplitudeLogRelF0_sma3nz',
       'F3frequency_sma3nz', 'F3bandwidth_sma3nz',
       'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')


In [13]:
features_egemaps.shape

(1, 88)

In [12]:
features_egemaps_lld.shape

(5155, 25)

In [16]:
features_egemaps.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0 days,0 days 00:00:51.595691610,24.842369,0.097686,23.68504,24.603333,26.284668,2.599628,153.326202,205.401535,50.320881,128.666885,...,-0.015402,-0.005375,0.068034,3.198915,1.707743,0.408068,0.511468,0.18473,0.260147,-36.667397


In [15]:
features_egemaps_lld.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0 days 00:00:00,0 days 00:00:00.020000,0.171682,-8.549108,17.937178,-0.044203,-0.004924,0.007501,8.089675,3.684633,10.559636,2.022642,...,0.0,515.777344,998.036316,-140.662384,1562.565063,1092.897827,-139.921829,2245.683594,816.377747,-140.248398
0 days 00:00:00.010000,0 days 00:00:00.030000,0.174262,-7.213514,14.113811,-0.021557,-0.004548,0.020852,7.815128,3.322604,8.408792,0.962938,...,2.524562,511.435455,984.341614,-80.21167,1522.374634,1076.443726,-79.247849,2301.574463,763.513184,-81.463966
0 days 00:00:00.020000,0 days 00:00:00.040000,0.177049,-6.060549,11.679684,-0.00804,-0.003126,0.030875,8.038446,3.144581,8.04735,1.714168,...,7.840576,516.194519,994.886475,-21.763206,1511.36084,1159.270996,-21.066263,2296.432861,765.887207,-24.833033
0 days 00:00:00.030000,0 days 00:00:00.050000,0.177995,-7.191372,11.244885,-0.02457,-0.000249,0.043502,9.689507,5.715604,8.120708,4.563063,...,17.773783,683.552185,987.040527,-24.686514,1782.07605,1003.147949,-25.3862,2611.919434,879.448059,-30.161133
0 days 00:00:00.040000,0 days 00:00:00.060000,0.176964,-10.843346,18.030664,-0.045253,0.000909,0.049436,11.64788,10.249892,10.948712,10.241356,...,23.584673,903.97821,978.66925,-27.896708,2041.84375,874.502869,-27.78698,2930.892334,885.3255,-32.680462


## Corpus level

### Reading

In [None]:
# Paths to subcorpora
base_dir = "Androids-Corpus/Reading-Task/audio"
groups = ["PT", "HC"]  # PT = patient / depressed group, HC = healthy control

# Define feature extractors
smile_egemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# smile_compare = opensmile.Smile(
#     feature_set=opensmile.FeatureSet.ComParE_2016,
#     feature_level=opensmile.FeatureLevel.Functionals,
# )

# Loop over groups
for group in groups:
    audio_fpath = os.path.join(base_dir, group)
    files = [f for f in os.listdir(audio_fpath) if f.endswith(".wav")]
    target = 0 if group == "HC" else 1
    
    all_features = []
    print(f"Extracting features for {group} ({len(files)} files)...")

    for f in tqdm(files):
        audio_file = os.path.join(audio_fpath, f)
        try:
            signal, sr = sf.read(audio_file)

            # Extract features (you can choose one or both)
            feat_egemaps = smile_egemaps.process_signal(signal, sr)

            # Combine and add metadata
            # combined = pd.concat([feat_egemaps, feat_compare], axis=1)
            # combined["file"] = f
            feat_egemaps["file"] = f
            feat_egemaps["target"] = target
            all_features.append(feat_egemaps)
        except Exception as e:
            print(f"Error processing {f}: {e}")

    # Concatenate all participants
    df = pd.concat(all_features, ignore_index=True)

    # Save per group
    output_path = f"{group}_features_reading.csv"
    df.to_csv(output_path, index=False)
    print(f"Saved {group} features to {output_path}")


Extracting features for PT (58 files)...


  0%|          | 0/58 [00:00<?, ?it/s]

100%|██████████| 58/58 [02:19<00:00,  2.41s/it]


Saved PT features to PT_features_reading.csv
Extracting features for HC (54 files)...


100%|██████████| 54/54 [01:51<00:00,  2.07s/it]

Saved HC features to HC_features_reading.csv





### Interview

In [7]:
inter_dir = "Androids-Corpus/Interview-Task/audio_clip"
subdirs = [d for d in os.listdir(inter_dir) if os.path.isdir(os.path.join(inter_dir, d))]

In [8]:
subdirs[:5]

['29_PF42_3', '68_PF43_3', '05_CF41_3', '32_PF51_2', '57_CF25_3']

In [9]:
len(subdirs)

116

In [13]:
subdirs[0].split("_")[1][0]

'P'

In [4]:
smile_egemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

In [None]:
interdata = []
for guy in subdirs:
    audio_fpath = os.path.join(inter_dir, guy)
    files = [f for f in os.listdir(audio_fpath) if f.endswith(".wav")]
    target = 0 if guy.split("_")[1][0] == "C" else 1

    all_features = []
    print(f"Extracting features for {guy} ({len(files)} files)...")

    for f in tqdm(files):
        audio_file = os.path.join(audio_fpath, f)
        try:
            signal, sr = sf.read(audio_file)

            # Extract features (you can choose one or both)
            feat_egemaps = smile_egemaps.process_signal(signal, sr)

            # Combine and add metadata
            # combined = pd.concat([feat_egemaps, feat_compare], axis=1)
            # combined["file"] = f
            # feat_egemaps["file"] = f
            feat_egemaps["target"] = target
            all_features.append(feat_egemaps)
        except Exception as e:
            print(f"Error processing {f}: {e}")

    # Concatenate all participants
    df = pd.concat(all_features, ignore_index=True)
    app = pd.DataFrame(df.mean()).transpose()
    app['file'] = guy
    interdata.append(app)

Extracting features for 29_PF42_3 (1 files)...


100%|██████████| 1/1 [00:06<00:00,  6.11s/it]


Extracting features for 68_PF43_3 (6 files)...


100%|██████████| 6/6 [00:04<00:00,  1.26it/s]


Extracting features for 05_CF41_3 (13 files)...


100%|██████████| 13/13 [00:09<00:00,  1.44it/s]


Extracting features for 32_PF51_2 (3 files)...


100%|██████████| 3/3 [00:02<00:00,  1.03it/s]


Extracting features for 57_CF25_3 (7 files)...


100%|██████████| 7/7 [00:11<00:00,  1.61s/it]


Extracting features for 31_CF55_2 (19 files)...


100%|██████████| 19/19 [00:05<00:00,  3.41it/s]


Extracting features for 06_PM64_2 (2 files)...


100%|██████████| 2/2 [00:16<00:00,  8.04s/it]


Extracting features for 40_CF59_1 (8 files)...


100%|██████████| 8/8 [00:12<00:00,  1.51s/it]


Extracting features for 33_CF46_3 (7 files)...


100%|██████████| 7/7 [00:10<00:00,  1.52s/it]


Extracting features for 56_PM57_4 (21 files)...


100%|██████████| 21/21 [00:07<00:00,  2.82it/s]


Extracting features for 44_PF29_3 (4 files)...


100%|██████████| 4/4 [00:04<00:00,  1.24s/it]


Extracting features for 27_PF26_3 (2 files)...


100%|██████████| 2/2 [00:07<00:00,  3.67s/it]


Extracting features for 36_PF38_2 (7 files)...


100%|██████████| 7/7 [00:04<00:00,  1.47it/s]


Extracting features for 48_CF34_3 (22 files)...


100%|██████████| 22/22 [00:06<00:00,  3.54it/s]


Extracting features for 15_CF53_3 (3 files)...


100%|██████████| 3/3 [00:11<00:00,  3.76s/it]


Extracting features for 64_PF54_2 (6 files)...


100%|██████████| 6/6 [00:08<00:00,  1.38s/it]


Extracting features for 18_PF33_2 (6 files)...


100%|██████████| 6/6 [00:04<00:00,  1.47it/s]


Extracting features for 04_PF50_2 (7 files)...


100%|██████████| 7/7 [00:10<00:00,  1.43s/it]


Extracting features for 46_CF39_3 (13 files)...


100%|██████████| 13/13 [00:07<00:00,  1.65it/s]


Extracting features for 62_PF56_3 (3 files)...


100%|██████████| 3/3 [00:13<00:00,  4.40s/it]


Extracting features for 54_CM48_2 (13 files)...


100%|██████████| 13/13 [00:07<00:00,  1.84it/s]


Extracting features for 67_PF54_1 (7 files)...


100%|██████████| 7/7 [00:06<00:00,  1.14it/s]


Extracting features for 38_CM27_3 (14 files)...


100%|██████████| 14/14 [00:12<00:00,  1.16it/s]


Extracting features for 65_PF41_3 (4 files)...


100%|██████████| 4/4 [00:05<00:00,  1.35s/it]


Extracting features for 09_PM60_2 (9 files)...


100%|██████████| 9/9 [00:11<00:00,  1.23s/it]


Extracting features for 31_PF68_4 (1 files)...


100%|██████████| 1/1 [00:08<00:00,  8.07s/it]


Extracting features for 14_PF35_3 (4 files)...


100%|██████████| 4/4 [00:05<00:00,  1.25s/it]


Extracting features for 26_PM54_3 (3 files)...


100%|██████████| 3/3 [00:08<00:00,  2.84s/it]


Extracting features for 23_PF55_2 (7 files)...


100%|██████████| 7/7 [00:02<00:00,  3.22it/s]


Extracting features for 58_PM43_3 (5 files)...


100%|██████████| 5/5 [00:07<00:00,  1.44s/it]


Extracting features for 71_PM68_1 (1 files)...


100%|██████████| 1/1 [00:11<00:00, 11.81s/it]


Extracting features for 26_CM31_3 (1 files)...


100%|██████████| 1/1 [00:08<00:00,  8.77s/it]


Extracting features for 20_PM43_2 (7 files)...


100%|██████████| 7/7 [00:03<00:00,  1.96it/s]


Extracting features for 53_PM50_2 (3 files)...


100%|██████████| 3/3 [00:09<00:00,  3.08s/it]


Extracting features for 07_PM39_4 (5 files)...


100%|██████████| 5/5 [00:14<00:00,  3.00s/it]


Extracting features for 37_CF69_1 (21 files)...


100%|██████████| 21/21 [00:09<00:00,  2.10it/s]


Extracting features for 16_CF33_4 (3 files)...


100%|██████████| 3/3 [00:10<00:00,  3.37s/it]


Extracting features for 57_PM56_2 (7 files)...


100%|██████████| 7/7 [00:05<00:00,  1.26it/s]


Extracting features for 49_CM54_4 (9 files)...


100%|██████████| 9/9 [00:09<00:00,  1.01s/it]


Extracting features for 01_CF56_1 (10 files)...


100%|██████████| 10/10 [00:07<00:00,  1.27it/s]


Extracting features for 18_CM64_3 (3 files)...


100%|██████████| 3/3 [00:12<00:00,  4.08s/it]


Extracting features for 13_CF45_2 (23 files)...


100%|██████████| 23/23 [00:08<00:00,  2.81it/s]


Extracting features for 10_PF51_1 (5 files)...


100%|██████████| 5/5 [00:06<00:00,  1.40s/it]


Extracting features for 20_CM51_3 (4 files)...


100%|██████████| 4/4 [00:10<00:00,  2.60s/it]


Extracting features for 19_CF62_4 (3 files)...


100%|██████████| 3/3 [00:09<00:00,  3.01s/it]


Extracting features for 47_CF61_2 (16 files)...


100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Extracting features for 29_CF34_3 (3 files)...


100%|██████████| 3/3 [00:08<00:00,  2.85s/it]


Extracting features for 49_PM31_3 (5 files)...


100%|██████████| 5/5 [00:05<00:00,  1.05s/it]


Extracting features for 41_PF32_4 (4 files)...


100%|██████████| 4/4 [00:04<00:00,  1.21s/it]


Extracting features for 44_CF37_3 (11 files)...


100%|██████████| 11/11 [00:09<00:00,  1.15it/s]


Extracting features for 24_PF56_2 (1 files)...


100%|██████████| 1/1 [00:07<00:00,  7.25s/it]


Extracting features for 51_CF52_2 (11 files)...


100%|██████████| 11/11 [00:07<00:00,  1.49it/s]


Extracting features for 37_PF40_3 (6 files)...


100%|██████████| 6/6 [00:01<00:00,  3.30it/s]


Extracting features for 39_CM27_3 (20 files)...


100%|██████████| 20/20 [00:08<00:00,  2.30it/s]


Extracting features for 02_PM65_2 (5 files)...


100%|██████████| 5/5 [00:12<00:00,  2.49s/it]


Extracting features for 09_CF56_3 (4 files)...


100%|██████████| 4/4 [00:11<00:00,  2.80s/it]


Extracting features for 51_PF58_2 (8 files)...


100%|██████████| 8/8 [00:03<00:00,  2.02it/s]


Extracting features for 69_PF48_3 (2 files)...


100%|██████████| 2/2 [00:29<00:00, 14.97s/it]


Extracting features for 46_PF44_2 (8 files)...


100%|██████████| 8/8 [00:01<00:00,  4.76it/s]


Extracting features for 32_CF22_3 (9 files)...


100%|██████████| 9/9 [00:10<00:00,  1.14s/it]


Extracting features for 07_CF50_2 (8 files)...


100%|██████████| 8/8 [00:09<00:00,  1.16s/it]


Extracting features for 72_PM53_1 (4 files)...


100%|██████████| 4/4 [00:09<00:00,  2.34s/it]


Extracting features for 52_PM53_4 (7 files)...


100%|██████████| 7/7 [00:05<00:00,  1.28it/s]


Extracting features for 50_CF46_2 (16 files)...


100%|██████████| 16/16 [00:06<00:00,  2.48it/s]


Extracting features for 06_CF44_2 (26 files)...


100%|██████████| 26/26 [00:09<00:00,  2.85it/s]


Extracting features for 11_CF44_2 (6 files)...


100%|██████████| 6/6 [00:13<00:00,  2.29s/it]


Extracting features for 28_CF34_3 (6 files)...


100%|██████████| 6/6 [00:07<00:00,  1.26s/it]


Extracting features for 12_PF68_1 (4 files)...


100%|██████████| 4/4 [00:06<00:00,  1.53s/it]


Extracting features for 22_CF50_3 (13 files)...


100%|██████████| 13/13 [00:09<00:00,  1.40it/s]


Extracting features for 11_PF34_4 (3 files)...


100%|██████████| 3/3 [00:13<00:00,  4.64s/it]


Extracting features for 38_PF31_2 (5 files)...


100%|██████████| 5/5 [00:04<00:00,  1.12it/s]


Extracting features for 60_PF56_x (5 files)...


100%|██████████| 5/5 [00:03<00:00,  1.64it/s]


Extracting features for 19_PF43_2 (2 files)...


100%|██████████| 2/2 [00:07<00:00,  3.75s/it]


Extracting features for 59_PF36_x (3 files)...


100%|██████████| 3/3 [00:11<00:00,  3.69s/it]


Extracting features for 41_CM71_2 (17 files)...


100%|██████████| 17/17 [00:11<00:00,  1.49it/s]


Extracting features for 48_PF43_3 (3 files)...


100%|██████████| 3/3 [00:05<00:00,  1.73s/it]


Extracting features for 25_PF36_4 (2 files)...


100%|██████████| 2/2 [00:04<00:00,  2.44s/it]


Extracting features for 02_CM57_2 (12 files)...


100%|██████████| 12/12 [00:08<00:00,  1.42it/s]


Extracting features for 04_CF57_3 (7 files)...


100%|██████████| 7/7 [00:08<00:00,  1.27s/it]


Extracting features for 01_PM58_2 (7 files)...


100%|██████████| 7/7 [00:10<00:00,  1.46s/it]


Extracting features for 03_CF30_3 (13 files)...


100%|██████████| 13/13 [00:06<00:00,  1.98it/s]


Extracting features for 35_PF43_2 (1 files)...


100%|██████████| 1/1 [00:12<00:00, 12.34s/it]


Extracting features for 36_CF59_2 (11 files)...


100%|██████████| 11/11 [00:12<00:00,  1.16s/it]


Extracting features for 47_PF42_2 (6 files)...


100%|██████████| 6/6 [00:09<00:00,  1.66s/it]


Extracting features for 56_CM23_3 (16 files)...


100%|██████████| 16/16 [00:09<00:00,  1.75it/s]


Extracting features for 53_CF52_1 (33 files)...


100%|██████████| 33/33 [00:04<00:00,  6.82it/s]


Extracting features for 08_CF42_2 (5 files)...


100%|██████████| 5/5 [00:15<00:00,  3.14s/it]


Extracting features for 70_PF51_1 (7 files)...


100%|██████████| 7/7 [00:02<00:00,  2.63it/s]


Extracting features for 43_PF53_4 (2 files)...


100%|██████████| 2/2 [00:04<00:00,  2.09s/it]


Extracting features for 21_PM33_3 (7 files)...


100%|██████████| 7/7 [00:03<00:00,  2.10it/s]


Extracting features for 25_CF59_3 (5 files)...


100%|██████████| 5/5 [00:07<00:00,  1.60s/it]


Extracting features for 61_PM59_3 (4 files)...


100%|██████████| 4/4 [00:08<00:00,  2.00s/it]


Extracting features for 05_PM53_4 (1 files)...


100%|██████████| 1/1 [00:25<00:00, 25.53s/it]


Extracting features for 30_CF62_3 (4 files)...


100%|██████████| 4/4 [00:08<00:00,  2.17s/it]


Extracting features for 23_CF55_3 (1 files)...


100%|██████████| 1/1 [00:10<00:00, 10.36s/it]


Extracting features for 22_PF40_3 (6 files)...


100%|██████████| 6/6 [00:06<00:00,  1.10s/it]


Extracting features for 17_PF44_2 (7 files)...


100%|██████████| 7/7 [00:04<00:00,  1.41it/s]


Extracting features for 14_CF49_3 (12 files)...


100%|██████████| 12/12 [00:07<00:00,  1.54it/s]


Extracting features for 13_PF58_2 (5 files)...


100%|██████████| 5/5 [00:05<00:00,  1.17s/it]


Extracting features for 50_PF35_3 (3 files)...


100%|██████████| 3/3 [00:04<00:00,  1.60s/it]


Extracting features for 54_PF48_3 (12 files)...


100%|██████████| 12/12 [00:08<00:00,  1.36it/s]


Extracting features for 12_CF36_1 (24 files)...


100%|██████████| 24/24 [00:08<00:00,  2.88it/s]


Extracting features for 33_PM19_3 (8 files)...


100%|██████████| 8/8 [00:05<00:00,  1.34it/s]


Extracting features for 10_CF51_2 (15 files)...


100%|██████████| 15/15 [00:09<00:00,  1.66it/s]


Extracting features for 17_CF55_3 (3 files)...


100%|██████████| 3/3 [00:09<00:00,  3.25s/it]


Extracting features for 03_PF66_3 (4 files)...


100%|██████████| 4/4 [00:14<00:00,  3.72s/it]


Extracting features for 42_PF63_4 (2 files)...


100%|██████████| 2/2 [00:05<00:00,  2.64s/it]


Extracting features for 15_PM63_4 (3 files)...


100%|██████████| 3/3 [00:07<00:00,  2.50s/it]


Extracting features for 45_CF40_3 (9 files)...


100%|██████████| 9/9 [00:09<00:00,  1.11s/it]


Extracting features for 21_CF58_3 (7 files)...


100%|██████████| 7/7 [00:06<00:00,  1.04it/s]


Extracting features for 27_CF63_4 (4 files)...


100%|██████████| 4/4 [00:09<00:00,  2.31s/it]


Extracting features for 52_CF29_3 (23 files)...


100%|██████████| 23/23 [00:07<00:00,  3.16it/s]


Extracting features for 66_PM33_3 (2 files)...


100%|██████████| 2/2 [00:07<00:00,  3.78s/it]


Extracting features for 16_PF52_3 (3 files)...


100%|██████████| 3/3 [00:03<00:00,  1.25s/it]


Extracting features for 24_CM63_3 (3 files)...


100%|██████████| 3/3 [00:09<00:00,  3.18s/it]


Extracting features for 08_PF37_2 (2 files)...


100%|██████████| 2/2 [00:06<00:00,  3.42s/it]


In [24]:
interdata[0]

Unnamed: 0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp,target,file
0,33.36076,0.113016,32.255665,33.328674,35.014645,2.75898,351.979126,570.98114,78.741867,149.342133,...,1.309442,3.72637,2.503231,0.275408,0.28466,0.118527,0.176662,-21.46962,1.0,29_PF42_3


In [25]:
merged_df_inter = pd.concat(interdata, ignore_index=True)

In [27]:
merged_df_inter

Unnamed: 0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp,target,file
0,33.360760,0.113016,32.255665,33.328674,35.014645,2.758980,351.979126,570.981140,78.741867,149.342133,...,1.309442,3.726370,2.503231,0.275408,0.284660,0.118527,0.176662,-21.469620,1.0,29_PF42_3
1,28.141464,0.197084,26.210714,29.375952,31.138563,4.927848,286.377350,329.608276,163.863953,257.176788,...,0.085129,3.070338,3.017196,0.170990,0.159955,0.157874,0.200705,-41.911461,1.0,68_PF43_3
2,32.723759,0.107326,30.775631,32.544403,35.322155,4.546522,157.087357,218.234512,78.323013,138.313095,...,0.214557,4.598979,1.973078,0.415761,0.338462,0.138481,0.148790,-30.098064,0.0,05_CF41_3
3,27.331697,0.127157,25.527407,26.935419,28.603676,3.076268,167.702225,184.724060,60.824116,139.489334,...,0.150410,2.660018,1.671053,0.433269,0.409655,0.187644,0.323758,-34.492558,1.0,32_PF51_2
4,29.801727,0.105786,27.886662,29.715998,31.878279,3.991617,101.348854,164.944778,47.292782,77.142075,...,0.202380,4.848176,3.549651,0.210282,0.161527,0.109442,0.128517,-36.320538,0.0,57_CF25_3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,31.389324,0.145669,29.975641,31.891922,33.860661,3.885019,226.296341,243.557037,89.818230,103.441055,...,0.174709,4.358105,2.627234,0.251070,0.232751,0.127381,0.115455,-34.114819,0.0,52_CF29_3
112,22.966356,0.136990,21.622726,23.050695,24.560778,2.938052,205.717468,400.888824,64.319138,101.022491,...,0.118613,2.560018,1.780949,0.363900,0.399798,0.200111,0.344861,-35.208801,1.0,66_PM33_3
113,28.152618,0.216538,22.538826,30.339205,32.421719,9.882892,293.280273,395.356842,140.559937,198.592545,...,0.242853,3.761033,2.405857,0.264600,0.281190,0.156339,0.236051,-32.710659,1.0,16_PF52_3
114,28.735914,0.121195,27.066780,28.642897,30.745932,3.679153,250.115173,420.797028,77.854729,229.702454,...,0.314341,3.369453,1.260306,0.690255,0.746179,0.170092,0.207236,-22.151117,0.0,24_CM63_3


In [None]:
# merged_df_inter['gender'] = merged_df_inter['file'].str.split('_').str[1].str[1]
# merged_df_inter['age'] = merged_df_inter['file'].str.split('_').str[1].str[2:]
# merged_df_inter['edu'] = merged_df_inter['file'].str.split('_').str[2]
# merged_df_inter['age'] = merged_df_inter['age'].astype(pd.Int64Dtype())

In [30]:
merged_df_inter.to_csv("features_egemaps_spont.csv", index=False)

In [16]:
type(pd.DataFrame(df.mean()).transpose())

pandas.core.frame.DataFrame

In [None]:
# - nn_XGmm_t.wav,
# - nn is a unique integer identifier suchthat, in a given group, files with the same nn contain the voiceof the same speaker (there is a trailing 0 for numbers lower than 10),
# - X is an alphabetic character corresponding to the speaker’s
# condition (P for depression patient and C for control),
# - G is analphabetic character that stands for the speaker’s gender (M for male and F for female),
# - mm is a two-digits integer number corresponding to the speaker’s age,
# - t is an integer number between 1 and 4accounting for the education level (1 corresponds to primary school and 4 corresponds to university).
# - The letter Xwas used for the 2 participants who did not provide informationabout this aspect.