In [3]:
import os
import random
import opensmile
import soundfile as sf
import pandas as pd
from tqdm import tqdm

In [2]:
# Path to directory with audio files
audio_fpath = "Androids-Corpus/Reading-Task/audio/PT/"

# Pick a random audio file
files = [f for f in os.listdir(audio_fpath) if f.endswith(".wav")]
# rand_file = random.choice(files)
rand_file = files[42]
audio_file = os.path.join(audio_fpath, rand_file)
print(f"Selected file: {audio_file}")

# Load audio
signal, sr = sf.read(audio_file)

# Define openSMILE extractors
smile_egemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

smile_compare = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Extract features
features_egemaps = smile_egemaps.process_signal(signal, sr)
features_compare = smile_compare.process_signal(signal, sr)


Selected file: Androids-Corpus/Reading-Task/audio/PT/21_PM33_3.wav


In [23]:
features_compare.columns[100:110]

Index(['pcm_zcr_sma_iqr2-3', 'pcm_zcr_sma_iqr1-3', 'pcm_zcr_sma_percentile1.0',
       'pcm_zcr_sma_percentile99.0', 'pcm_zcr_sma_pctlrange0-1',
       'pcm_zcr_sma_stddev', 'pcm_zcr_sma_skewness', 'pcm_zcr_sma_kurtosis',
       'pcm_zcr_sma_meanSegLen', 'pcm_zcr_sma_maxSegLen'],
      dtype='object')

In [10]:
len(features_egemaps.columns)

88

In [9]:
len(features_compare.columns)

6373

In [11]:
smile_egemaps_lld = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)

features_egemaps_lld = smile_egemaps_lld.process_signal(signal, sr)

In [20]:
print(features_egemaps.columns)

Index(['F0semitoneFrom27.5Hz_sma3nz_amean',
       'F0semitoneFrom27.5Hz_sma3nz_stddevNorm',
       'F0semitoneFrom27.5Hz_sma3nz_percentile20.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile50.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile80.0',
       'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2',
       'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope', 'loudness_sma3_amean',
       'loudness_sma3_stddevNorm', 'loudness_sma3_percentile20.0',
       'loudness_sma3_percentile50.0', 'loudness_sma3_percentile80.0',
       'loudness_sma3_pctlrange0-2', 'loudness_sma3_meanRisingSlope',
       'loudness_sma3_stddevRisingSlope', 'loudness_sma3_meanFallingSlope',
       'loudness_sma3_stddevFallingSlope', 'spectralFlux_sma3_amean',
       'spectralFlux_sma3_stddevNorm', 'mfcc1_sma3_amean',
       'mfcc1_sma3_stddevNorm', 'mfcc2_

In [19]:
print(features_egemaps_lld.columns)

Index(['Loudness_sma3', 'alphaRatio_sma3', 'hammarbergIndex_sma3',
       'slope0-500_sma3', 'slope500-1500_sma3', 'spectralFlux_sma3',
       'mfcc1_sma3', 'mfcc2_sma3', 'mfcc3_sma3', 'mfcc4_sma3',
       'F0semitoneFrom27.5Hz_sma3nz', 'jitterLocal_sma3nz',
       'shimmerLocaldB_sma3nz', 'HNRdBACF_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1frequency_sma3nz', 'F1bandwidth_sma3nz',
       'F1amplitudeLogRelF0_sma3nz', 'F2frequency_sma3nz',
       'F2bandwidth_sma3nz', 'F2amplitudeLogRelF0_sma3nz',
       'F3frequency_sma3nz', 'F3bandwidth_sma3nz',
       'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')


In [13]:
features_egemaps.shape

(1, 88)

In [12]:
features_egemaps_lld.shape

(5155, 25)

In [16]:
features_egemaps.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0 days,0 days 00:00:51.595691610,24.842369,0.097686,23.68504,24.603333,26.284668,2.599628,153.326202,205.401535,50.320881,128.666885,...,-0.015402,-0.005375,0.068034,3.198915,1.707743,0.408068,0.511468,0.18473,0.260147,-36.667397


In [15]:
features_egemaps_lld.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0 days 00:00:00,0 days 00:00:00.020000,0.171682,-8.549108,17.937178,-0.044203,-0.004924,0.007501,8.089675,3.684633,10.559636,2.022642,...,0.0,515.777344,998.036316,-140.662384,1562.565063,1092.897827,-139.921829,2245.683594,816.377747,-140.248398
0 days 00:00:00.010000,0 days 00:00:00.030000,0.174262,-7.213514,14.113811,-0.021557,-0.004548,0.020852,7.815128,3.322604,8.408792,0.962938,...,2.524562,511.435455,984.341614,-80.21167,1522.374634,1076.443726,-79.247849,2301.574463,763.513184,-81.463966
0 days 00:00:00.020000,0 days 00:00:00.040000,0.177049,-6.060549,11.679684,-0.00804,-0.003126,0.030875,8.038446,3.144581,8.04735,1.714168,...,7.840576,516.194519,994.886475,-21.763206,1511.36084,1159.270996,-21.066263,2296.432861,765.887207,-24.833033
0 days 00:00:00.030000,0 days 00:00:00.050000,0.177995,-7.191372,11.244885,-0.02457,-0.000249,0.043502,9.689507,5.715604,8.120708,4.563063,...,17.773783,683.552185,987.040527,-24.686514,1782.07605,1003.147949,-25.3862,2611.919434,879.448059,-30.161133
0 days 00:00:00.040000,0 days 00:00:00.060000,0.176964,-10.843346,18.030664,-0.045253,0.000909,0.049436,11.64788,10.249892,10.948712,10.241356,...,23.584673,903.97821,978.66925,-27.896708,2041.84375,874.502869,-27.78698,2930.892334,885.3255,-32.680462


## Corpus level

In [4]:
# Paths to subcorpora
base_dir = "Androids-Corpus/Reading-Task/audio"
groups = ["PT", "HC"]  # PT = patient / depressed group, HC = healthy control

# Define feature extractors
smile_egemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# smile_compare = opensmile.Smile(
#     feature_set=opensmile.FeatureSet.ComParE_2016,
#     feature_level=opensmile.FeatureLevel.Functionals,
# )

# Loop over groups
for group in groups:
    audio_fpath = os.path.join(base_dir, group)
    files = [f for f in os.listdir(audio_fpath) if f.endswith(".wav")]
    target = 0 if group == "HC" else 1
    
    all_features = []
    print(f"Extracting features for {group} ({len(files)} files)...")

    for f in tqdm(files):
        audio_file = os.path.join(audio_fpath, f)
        try:
            signal, sr = sf.read(audio_file)

            # Extract features (you can choose one or both)
            feat_egemaps = smile_egemaps.process_signal(signal, sr)

            # Combine and add metadata
            # combined = pd.concat([feat_egemaps, feat_compare], axis=1)
            # combined["file"] = f
            feat_egemaps["file"] = f
            feat_egemaps["target"] = target
            all_features.append(feat_egemaps)
        except Exception as e:
            print(f"Error processing {f}: {e}")

    # Concatenate all participants
    df = pd.concat(all_features, ignore_index=True)

    # Save per group
    output_path = f"{group}_features_reading.csv"
    df.to_csv(output_path, index=False)
    print(f"Saved {group} features to {output_path}")


Extracting features for PT (58 files)...


  0%|          | 0/58 [00:00<?, ?it/s]

100%|██████████| 58/58 [02:19<00:00,  2.41s/it]


Saved PT features to PT_features_reading.csv
Extracting features for HC (54 files)...


100%|██████████| 54/54 [01:51<00:00,  2.07s/it]

Saved HC features to HC_features_reading.csv



