# Speech Feature Extraction using OpenSMILE (GeMapsv01b + ComParE config)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
from typing import List, Optional

import pandas as pd
from IPython.display import display
from tqdm.auto import tqdm

sys.path.append("..")
# opensmile
import opensmile

from sgs_utils.path_conf import loc_data_dir, speech_data_session_dir

# Extracting features

useful links:
* [opensmile config folder](https://github.com/audeering/opensmile/tree/v3.0.0/config)
* difference between GeMAPS versions [here](https://github.com/audeering/opensmile/blob/v3.0.0/config/gemaps/CHANGES.txt')

**note**: `eGeMAPS` is an _extended_ version of the GeMAPS

feature-level`
* `Functionals`: global segment based features (1 feature per segment)
* `LowLevelDescriptor`: sliding window features (1 feature per window)

In [3]:
df_session = pd.read_parquet(loc_data_dir.joinpath("df_session_uuid.parquet"))

In [4]:
# define the feature extraction configs
func_gemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.Functionals,
)

lld_gemaps = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)

# we will use the ComParE LLD to calculate frequency-based features on `F0final_sma`
lld_compare = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)

In [5]:
from multiprocessing import Pool
import traceback
from typing import Tuple

In [6]:
def _extract_parse_smile_df(s: opensmile.Smile, f: Path) -> pd.DataFrame:
    df_feat = s.process_file(f)
    df_feat = df_feat.reset_index(drop=False)
    df_feat["file"] = df_feat["file"].astype("str")

    # df_feat["fileName"] = f.name
    df_feat["pic_name"] = f.name.split("__")[0]
    df_feat["time_str"] = f.name.split("__")[1].split('.')[0]
    df_feat["DB"] = f.parent.name
    df_feat["ID"] = f.parent.parent.name.split('__')[-1]
    return df_feat

def _extract_opensmile_f(file: Path) -> Tuple[pd.DataFrame, ...]:
    # calculate the global utterance features
    return (
        _extract_parse_smile_df(func_gemaps, f=file),
        _extract_parse_smile_df(lld_gemaps, f=file),
        _extract_parse_smile_df(lld_compare, f=file),
    )

out = None
with Pool(processes=16) as pool:
    wav_files = list(speech_data_session_dir.glob("*/*/*.wav"))
    results = pool.imap_unordered(_extract_opensmile_f, wav_files)
    results = tqdm(results, total=len(wav_files))
    try:
        out = [f for f in results]
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()

def _parse_concat_df(df_conc: pd.DataFrame) -> pd.DataFrame:
    df_conc["DB"] = df_conc["DB"].astype("category")
    df_conc["pic_name"] = df_conc["pic_name"].astype("category")
    df_conc["ID"] = df_conc["ID"].astype("category")
    return df_conc


df_gemaps_func = _parse_concat_df(pd.concat([o[0] for o in out], ignore_index=True))
df_gemaps_lld = _parse_concat_df(pd.concat([o[1] for o in out], ignore_index=True))
df_compare_lld = _parse_concat_df(pd.concat([o[2] for o in out], ignore_index=True))

del (
    out,
    _extract_opensmile_f,
    _parse_concat_df,
    _extract_parse_smile_df,
)

  0%|          | 0/1273 [00:00<?, ?it/s]



## Extract `F0`-range from the LLD's

In [7]:
import numpy as np
from tsflex.features import FuncWrapper
from tsflex.features.utils import make_robust

In [8]:
def quantile_nz(a: np.ndarray, q=List[float]) -> List[Optional[float]]:
    a_nz = a[a > 0]
    if len(a_nz):
        return np.quantile(a_nz, q=q)
    else:
        return [None] * len(q)


def nonzero_count(a: np.ndarray) -> int:
    return sum(a > 0)


def return_func_series_list(a: np.ndarray, f_list: List[FuncWrapper]) -> pd.Series:
    s = pd.Series(dtype="float64")
    for f in f_list:
        s = pd.concat([s, pd.Series(data=f(a), index=f.output_names)])
    return s.sort_index()


qs = sum([[1 - q, q] for q in [0, 0.01, 0.02, 0.03, 0.05, 0.1, 0.15, 0.2]], []) + [0.5]
display(str(qs))

'[1, 0, 0.99, 0.01, 0.98, 0.02, 0.97, 0.03, 0.95, 0.05, 0.9, 0.1, 0.85, 0.15, 0.8, 0.2, 0.5]'

### GeMAPS LLD

In [10]:
df_gemaps_lld.filter(like="F0").columns
# logRelF0-H1-H2_sma3nz -> log freq difference between the harmonics
# SMA -> moving average window
# NZ -> no-zero

Index(['F0semitoneFrom27.5Hz_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1amplitudeLogRelF0_sma3nz',
       'F2amplitudeLogRelF0_sma3nz', 'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')

In [22]:
# define the signal on which the function will be performed, and the functions
s_name = "F0semitoneFrom27.5Hz_sma3nz"

f_gemaps_lld_funcs: List[FuncWrapper] = [
    make_robust(
        FuncWrapper(quantile_nz, output_names=[s_name + f"_q={q}" for q in qs], q=qs),
        min_nb_samples=3,
        passthrough_nans=False,
    ),
    make_robust(FuncWrapper(nonzero_count, output_names=[f"{s_name}_nzcount"])),
]

# Apply the functions on each group
df_gemaps_lld_F0 = (
    df_gemaps_lld.groupby(by=["file"])[[s_name]]
    .apply(lambda x: return_func_series_list(x.values, f_list=f_gemaps_lld_funcs))
    .reset_index()
)
display(df_gemaps_lld_F0)

Unnamed: 0,file,F0semitoneFrom27.5Hz_sma3nz_nzcount,F0semitoneFrom27.5Hz_sma3nz_q=0,F0semitoneFrom27.5Hz_sma3nz_q=0.01,F0semitoneFrom27.5Hz_sma3nz_q=0.02,F0semitoneFrom27.5Hz_sma3nz_q=0.03,F0semitoneFrom27.5Hz_sma3nz_q=0.05,F0semitoneFrom27.5Hz_sma3nz_q=0.1,F0semitoneFrom27.5Hz_sma3nz_q=0.15,F0semitoneFrom27.5Hz_sma3nz_q=0.2,F0semitoneFrom27.5Hz_sma3nz_q=0.5,F0semitoneFrom27.5Hz_sma3nz_q=0.8,F0semitoneFrom27.5Hz_sma3nz_q=0.85,F0semitoneFrom27.5Hz_sma3nz_q=0.9,F0semitoneFrom27.5Hz_sma3nz_q=0.95,F0semitoneFrom27.5Hz_sma3nz_q=0.97,F0semitoneFrom27.5Hz_sma3nz_q=0.98,F0semitoneFrom27.5Hz_sma3nz_q=0.99,F0semitoneFrom27.5Hz_sma3nz_q=1
0,/users/jonvdrdo/jonas/data/speech_webapp/backu...,17.0,12.191752,12.564453,12.937154,13.309855,14.055257,14.597499,15.503935,16.841757,18.356476,22.650394,23.989383,25.952409,27.560105,27.563536,27.565251,27.566966,27.568682
1,/users/jonvdrdo/jonas/data/speech_webapp/backu...,67.0,19.313572,19.344458,19.384260,19.433537,19.518947,21.738563,22.768820,23.518655,24.328886,25.999571,26.468599,26.608040,27.338765,27.916453,28.126937,28.286391,28.397816
2,/users/jonvdrdo/jonas/data/speech_webapp/backu...,21.0,14.463905,14.499279,14.534653,14.570027,14.640775,14.791085,15.602757,15.967938,16.963837,26.684515,30.657898,30.667315,30.709490,30.748100,30.767405,30.786710,30.806015
3,/users/jonvdrdo/jonas/data/speech_webapp/backu...,17.0,16.558184,16.584490,16.610795,16.637101,16.689713,17.795068,18.917401,19.784034,22.725342,26.955467,27.570501,28.193248,28.618507,28.676063,28.704840,28.733618,28.762396
4,/users/jonvdrdo/jonas/data/speech_webapp/backu...,27.0,14.005994,14.008942,14.011890,14.014838,14.448310,16.329633,18.086482,18.519012,20.931238,23.018240,24.202085,25.636305,28.272351,28.671990,28.691290,28.710590,28.729891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1268,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1199.0,12.570896,13.313062,14.335479,15.269173,16.487911,18.307598,22.233826,25.979074,31.035788,34.579975,35.089474,35.515230,36.604401,37.413639,38.357111,39.787693,44.039238
1269,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1188.0,12.300839,13.698286,14.767228,15.824807,17.012822,19.628085,23.533434,28.540396,31.828205,35.547111,36.412104,37.641628,39.604219,42.019275,45.242463,54.351222,62.062374
1270,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1137.0,12.281521,13.911784,15.411904,16.003989,16.920245,20.776879,25.578695,29.259179,32.053028,35.785120,36.438109,37.365578,39.496005,40.791048,43.667561,54.074749,61.839996
1271,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1223.0,12.379935,13.091617,14.496284,16.090282,17.033836,20.512587,25.444006,29.228053,32.149685,35.785662,36.603656,37.703198,38.872800,40.580891,41.968274,45.099729,57.680775


### ComPaRE LLD

In [24]:
df_compare_lld.filter(like="F0").columns

Index(['F0final_sma'], dtype='object')

In [26]:
s_name = "F0final_sma"

f_compare_lld_funcs: List[FuncWrapper] = [
    make_robust(
        FuncWrapper(quantile_nz, output_names=[s_name + f"_q={q}" for q in qs], q=qs),
        min_nb_samples=3,
        passthrough_nans=False,
    ),
    make_robust(FuncWrapper(nonzero_count, output_names=[f"{s_name}_nzcount"])),
]

df_compare_lld_F0 = (
    df_compare_lld.groupby(by=["file"])[[s_name]]
    .apply(lambda x: return_func_series_list(x.values, f_list=f_compare_lld_funcs))
    .reset_index()
)
display(df_compare_lld_F0)

Unnamed: 0,file,F0final_sma_nzcount,F0final_sma_q=0,F0final_sma_q=0.01,F0final_sma_q=0.02,F0final_sma_q=0.03,F0final_sma_q=0.05,F0final_sma_q=0.1,F0final_sma_q=0.15,F0final_sma_q=0.2,F0final_sma_q=0.5,F0final_sma_q=0.8,F0final_sma_q=0.85,F0final_sma_q=0.9,F0final_sma_q=0.95,F0final_sma_q=0.97,F0final_sma_q=0.98,F0final_sma_q=0.99,F0final_sma_q=1
0,/users/jonvdrdo/jonas/data/speech_webapp/backu...,17.0,55.612980,57.090217,58.567454,60.044691,62.999165,65.177296,68.756898,74.045857,79.432457,101.827782,111.786874,125.094739,135.114194,135.140995,135.154396,135.167797,135.181198
1,/users/jonvdrdo/jonas/data/speech_webapp/backu...,70.0,53.277576,53.480272,53.794782,54.412597,61.195617,83.779074,84.921798,102.376797,111.343956,123.411115,126.672525,127.810335,132.949007,137.651314,139.480018,140.874490,141.815491
2,/users/jonvdrdo/jonas/data/speech_webapp/backu...,23.0,53.979771,55.826598,57.673425,59.520252,62.415413,62.910513,63.611592,65.590999,73.359673,120.637674,153.746248,161.660815,162.035667,162.382710,162.581571,162.780432,162.979294
3,/users/jonvdrdo/jonas/data/speech_webapp/backu...,17.0,73.260132,73.408948,73.557764,73.706580,74.004211,78.605843,83.735320,87.931921,102.206047,131.045206,135.416949,140.260162,143.639478,144.117256,144.356145,144.595034,144.833923
4,/users/jonvdrdo/jonas/data/speech_webapp/backu...,30.0,59.417526,60.060518,60.703510,61.346501,61.689878,61.793563,65.841770,69.700331,82.889271,104.110809,110.023577,116.045589,139.767745,144.029853,144.209152,144.388450,144.567749
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1268,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1206.0,52.383827,58.273582,61.022183,63.578596,70.104742,77.450943,94.469307,122.695564,164.764435,202.531342,208.707863,213.945419,227.765541,238.715387,251.911862,273.795459,350.020721
1269,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1181.0,53.923107,56.381485,60.222797,62.632336,68.851265,78.749413,102.513641,133.167755,171.011169,209.047165,222.004578,235.592178,257.474152,270.830170,303.068921,318.950470,544.965149
1270,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1131.0,53.102188,56.499269,59.127883,61.796724,69.368813,82.242065,118.778240,148.485703,174.298172,216.621231,223.966690,234.692062,259.108734,281.267737,290.138116,338.574295,582.968689
1271,/users/jonvdrdo/jonas/data/speech_webapp/backu...,1224.0,52.902222,58.055949,60.250278,64.277085,72.089671,81.195032,112.265381,146.217807,175.744591,216.095883,226.166822,241.386131,255.828107,278.341906,301.550966,321.880326,604.734070


In [27]:
del df_compare_lld, df_gemaps_lld

# Join into one big dataframe

In [46]:
df_feat_tot = df_gemaps_func.merge(
    df_gemaps_lld_F0,
    on=["file"],
).merge(df_compare_lld_F0, on=["file"])

display(df_feat_tot)

df_feat_tot.to_parquet(loc_data_dir.joinpath("df_speech_feat_tot.parquet"), engine='fastparquet')

Unnamed: 0,file,start,end,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,...,F0final_sma_q=0.2,F0final_sma_q=0.5,F0final_sma_q=0.8,F0final_sma_q=0.85,F0final_sma_q=0.9,F0final_sma_q=0.95,F0final_sma_q=0.97,F0final_sma_q=0.98,F0final_sma_q=0.99,F0final_sma_q=1
0,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:19.114666667,25.490253,0.377710,21.393423,22.749063,25.733017,4.339594,714.380615,...,94.199889,101.599884,117.742972,122.572626,132.948477,159.191696,319.512184,468.750630,480.069237,567.618042
1,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:22.442666667,25.295950,0.337538,21.988764,23.498669,25.115431,3.126667,339.231567,...,97.276611,106.305206,115.486900,117.776329,121.268616,130.701828,134.064529,136.431412,201.735916,589.890503
2,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:22.869333333,24.299351,0.323387,21.091982,22.897926,24.773689,3.681707,364.202515,...,92.464499,102.536163,113.433928,116.004996,120.970467,129.682268,133.559573,137.425247,151.650980,538.822998
3,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:22.698666667,25.043055,0.363119,21.044415,23.554974,25.340881,4.296467,317.010315,...,89.458710,105.272217,117.076057,119.361813,122.974945,132.629784,147.893730,198.690878,376.116025,568.420776
4,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:24.320000,24.814177,0.302032,21.689808,23.346060,25.622778,3.932970,345.970367,...,95.918501,105.102257,118.631383,122.340250,128.151599,137.596556,159.349033,164.926558,315.656856,613.824158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1268,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:01:00.416000,30.592422,0.413754,23.543167,25.860874,37.002468,13.459301,424.827148,...,106.411546,121.280540,145.063663,217.161178,412.725775,516.720990,541.207266,554.061957,589.469916,617.425537
1269,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:43.093333333,30.133505,0.411654,23.194771,25.437296,36.387650,13.192879,372.059265,...,104.442857,117.835598,209.940866,251.882718,366.071762,465.922661,533.979252,562.364019,588.079594,616.100708
1270,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:40.362666667,29.553082,0.386342,23.864059,25.420221,34.920620,11.056561,310.785370,...,108.591864,118.200844,145.694055,215.574872,334.325372,431.512537,496.645176,543.841362,572.581180,610.554321
1271,/users/jonvdrdo/jonas/data/speech_webapp/backu...,0 days,0 days 00:00:43.776000,32.525726,0.407258,24.019632,26.189028,50.059898,26.040266,385.977692,...,109.192059,123.261086,304.804712,415.476280,501.119281,550.881372,566.037034,576.618112,586.937399,619.093384


---

In [44]:
df_feat_tot['file'].map(lambda x: '/'.join(x.split('/')[-3:]))

0       2020-11-27__12:06:00+01:00__716dc108-5588-49ed...
1       2020-11-27__12:06:00+01:00__716dc108-5588-49ed...
2       2020-11-27__12:06:00+01:00__716dc108-5588-49ed...
3       2020-11-27__12:06:00+01:00__716dc108-5588-49ed...
4       2020-11-27__12:06:00+01:00__716dc108-5588-49ed...
                              ...                        
1268    2020-12-09__20:06:00+01:00__82caae94-6b1a-4a4f...
1269    2020-12-09__20:06:00+01:00__82caae94-6b1a-4a4f...
1270    2020-12-09__20:06:00+01:00__82caae94-6b1a-4a4f...
1271    2020-12-09__20:06:00+01:00__82caae94-6b1a-4a4f...
1272    2020-12-09__20:06:00+01:00__82caae94-6b1a-4a4f...
Name: file, Length: 1273, dtype: object

In [43]:
df_feat_tot[['file', 'DB', 'pic_name', 'time_str', 'ID']]

Unnamed: 0,file,DB,pic_name,time_str,ID
0,/users/jonvdrdo/jonas/data/speech_webapp/backu...,marloes,marloes,13:16:46,716dc108-5588-49ed-bf3c-04fb87212e2b
1,/users/jonvdrdo/jonas/data/speech_webapp/backu...,marloes,marloes,13:11:01,716dc108-5588-49ed-bf3c-04fb87212e2b
2,/users/jonvdrdo/jonas/data/speech_webapp/backu...,marloes,marloes,12:54:53,716dc108-5588-49ed-bf3c-04fb87212e2b
3,/users/jonvdrdo/jonas/data/speech_webapp/backu...,marloes,marloes,12:31:45,716dc108-5588-49ed-bf3c-04fb87212e2b
4,/users/jonvdrdo/jonas/data/speech_webapp/backu...,marloes,marloes,12:44:31,716dc108-5588-49ed-bf3c-04fb87212e2b
...,...,...,...,...,...
1268,/users/jonvdrdo/jonas/data/speech_webapp/backu...,Radboud,Rafd090_04_Caucasian_female_neutral_frontal,20:32:25,82caae94-6b1a-4a4f-8643-3827eee25fed
1269,/users/jonvdrdo/jonas/data/speech_webapp/backu...,Radboud,Rafd090_32_Caucasian_female_neutral_frontal,20:27:37,82caae94-6b1a-4a4f-8643-3827eee25fed
1270,/users/jonvdrdo/jonas/data/speech_webapp/backu...,Radboud,Rafd090_49_Caucasian_male_neutral_frontal,20:14:24,82caae94-6b1a-4a4f-8643-3827eee25fed
1271,/users/jonvdrdo/jonas/data/speech_webapp/backu...,Radboud,Rafd090_33_Caucasian_male_neutral_frontal,20:30:08,82caae94-6b1a-4a4f-8643-3827eee25fed
