## Feature Generation
In this file, we generate features for all four languages.
Input: DataFrame with file paths.
Output: DataFrame with features for all four languages.

### Introduction to geMAPS (Geneva Minimalistic Acoustic Parameter Set)

- geMAPS is used as a minimalistic parameter set for acoustic analysis, including features most related to emotions.  
- It consists of 88 features related to energy, frequency, cepstral, spectral, and dynamic information.  
- We used all features, as done in the paper.  
- We used the OpenSMILE toolkit for feature extraction.  
- We used the latest version, GeMAPSv02.

In [22]:
# dependecies
import opensmile
import pandas as pd

In [23]:
# data
df_german = pd.read_csv(r'..\Data\German\data_path_german.csv')
df_urdu = pd.read_csv(r'..\Data\Urdu\data_path_urdu.csv')
df_english = pd.read_csv(r'..\Data\English\data_path_english.csv')
df_italian = pd.read_csv(r'..\Data\Italian\data_path_italian.csv')

In [24]:
def extract_features(df, column="filename"):
    # openSMILE initialization (eGeMAPS configuration)
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv02,
        feature_level=opensmile.FeatureLevel.Functionals,
    )

    # Feature extraction
    features = [smile.process_file(path) for path in df[column]]

    # Combine features with the dataframe
    features_df = pd.concat(features).reset_index(drop=True)
    return pd.concat([df, features_df], axis=1)


In [14]:
df_features_german = extract_features(df_german)

In [15]:
df_features_urdu = extract_features(df_urdu)

In [16]:
df_features_english = extract_features(df_english)

In [25]:
df_features_italian = extract_features(df_italian)

In [17]:
df_features_german.head()

Unnamed: 0,filename,emotion,speaker_id,valence,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,..\Data\German\wav\03a01Fa.wav,happiness,3,1,31.188166,0.15256,27.766502,31.412098,36.340343,8.573841,...,-0.012351,0.010897,0.218922,4.255319,3.278689,0.156667,0.084591,0.108571,0.036422,-21.647932
1,..\Data\German\wav\03a01Nc.wav,neutral,3,1,25.022938,0.14854,20.637066,25.118477,28.660353,8.023287,...,-0.020058,0.007849,0.450747,6.25,3.870968,0.101667,0.055202,0.115714,0.049816,-18.010019
2,..\Data\German\wav\03a01Wa.wav,anger,3,0,34.29232,0.102067,30.504089,33.584766,37.343616,6.839527,...,0.014669,0.012394,0.388366,4.83871,3.314917,0.15,0.07303,0.111429,0.060339,-17.85531
3,..\Data\German\wav\03a02Fc.wav,happiness,3,1,37.576572,0.108607,34.083504,37.371193,41.233379,7.149876,...,-0.007576,0.015585,0.243682,4.522613,2.57732,0.246,0.21096,0.1,0.042032,-18.062223
4,..\Data\German\wav\03a02Nc.wav,neutral,3,1,25.04019,0.116655,21.225212,26.198669,27.725224,6.500011,...,-0.035085,0.005547,0.444158,6.338028,3.649635,0.148,0.130138,0.086667,0.026247,-16.109831


In [18]:
df_features_urdu.head()

Unnamed: 0,filename,emotion,speaker_id,valence,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,..\Data\Urdu\Angry\SM1_F10_A010.wav,Angry,12,0,37.416851,0.162711,36.100449,39.646458,41.276707,5.176258,...,-0.020775,-0.025433,0.560624,4.347826,2.389079,0.325714,0.353548,0.163333,0.168589,-21.443512
1,..\Data\Urdu\Angry\SM1_F11_A011.wav,Angry,12,0,39.29084,0.048453,38.296627,39.4426,40.628967,2.33234,...,-0.053453,-0.029727,1.589942,7.035176,1.554404,0.613333,0.318573,0.03,0.0,-18.693861
2,..\Data\Urdu\Angry\SM1_F12_A012.wav,Angry,12,0,36.559757,0.159693,31.33247,39.228329,40.891193,9.558723,...,-0.0634,-0.029255,0.695361,5.016722,2.730376,0.27875,0.293106,0.106,0.124354,-19.787762
3,..\Data\Urdu\Angry\SM1_F13_A013.wav,Angry,12,0,39.653111,0.099355,38.884323,40.339897,41.908745,3.024422,...,-0.051385,-0.025643,0.648632,4.522613,2.072539,0.3875,0.192273,0.075,0.048218,-19.527483
4,..\Data\Urdu\Angry\SM1_F14_A014.wav,Angry,12,0,40.303036,0.095656,39.875011,40.965897,42.031673,2.156662,...,-0.05839,-0.02301,0.73193,4.682274,3.071672,0.252222,0.218316,0.05625,0.077611,-19.405691


In [19]:
df_features_english.head()

Unnamed: 0,filename,emotion,speaker_id,valence,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,..\Data\English\DC\a01.wav,anger,1,0,32.28838,0.253543,28.019905,31.009491,34.035744,6.015839,...,-0.130033,-0.008058,0.175102,3.581267,2.234637,0.29625,0.255389,0.2475,0.217529,-15.69136
1,..\Data\English\DC\a02.wav,anger,1,0,28.624495,0.083322,27.068924,28.368698,29.564381,2.495457,...,-0.127977,-0.0088,0.123429,3.508772,0.890208,0.666667,0.507631,0.325,0.268375,-15.941064
2,..\Data\English\DC\a03.wav,anger,1,0,32.756123,0.0739,30.584631,33.15353,34.469784,3.885153,...,-0.128269,-0.007824,0.087312,2.427185,0.995025,0.495,0.265,0.323333,0.19754,-13.958084
3,..\Data\English\DC\a04.wav,anger,1,0,30.76403,0.126925,28.082649,31.094275,33.430504,5.347855,...,-0.113702,-0.010014,0.08804,3.690037,1.12782,0.553333,0.402851,0.2325,0.181435,-13.02
4,..\Data\English\DC\a05.wav,anger,1,0,32.762295,0.264996,28.170233,30.551094,34.771942,6.601709,...,-0.104462,-0.01039,0.08296,3.691275,1.706485,0.374,0.653226,0.188,0.185515,-13.182361


In [26]:
df_features_italian.head()

Unnamed: 0,filename,emotion,speaker_id,valence,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,..\Data\Italian\f1\dis-f1-b1.wav,disgust,1,0,38.041195,0.062761,36.693428,38.990891,39.75058,3.057152,...,-0.054321,-0.002343,0.195009,2.816901,1.792115,0.186,0.05748,0.291667,0.316934,-20.434135
1,..\Data\Italian\f1\dis-f1-b2.wav,disgust,1,0,40.034466,0.079689,37.188023,40.978489,43.000435,5.812412,...,-0.041974,0.000791,0.143104,4.577465,2.150538,0.215,0.112953,0.195714,0.241238,-22.372519
2,..\Data\Italian\f1\dis-f1-b3.wav,disgust,1,0,40.346146,0.06286,37.921173,41.041718,42.420361,4.499187,...,-0.042082,0.000804,0.195658,3.797468,2.155173,0.244,0.233461,0.2425,0.151554,-21.750406
3,..\Data\Italian\f1\dis-f1-d1.wav,disgust,1,0,40.030762,0.142646,35.758671,42.093567,44.378151,8.61948,...,-0.031066,-0.00276,0.264424,3.535354,3.108808,0.108333,0.052731,0.158571,0.18122,-24.157581
4,..\Data\Italian\f1\dis-f1-d2.wav,disgust,1,0,34.73925,0.244674,31.76965,37.416992,39.738838,7.969189,...,-0.05439,-0.005514,0.16228,2.888087,2.941176,0.1325,0.06057,0.185,0.20025,-25.894457


In [27]:
# save features
df_features_german.to_csv("../Data/German/features_german.csv",index=False)
df_features_urdu.to_csv("../Data/Urdu/features_urdu.csv",index=False)
df_features_english.to_csv("../Data/English/features_english.csv",index=False)
df_features_italian.to_csv("../Data/Italian/features_italian.csv",index=False)