## 1. Data Preparation

Prerequisites:

- installed PRAAT
- installed openSMILE

In [None]:
import os
import sys
import glob
import pandas as pd
import numpy as np

Change these paths:

1. openSMILE installation
2. path to the PRAAT application (win: "C:\Program Files\Praat.exe", mac: /Applications/Praat.app/Contents/MacOS/Praat, linux:  /usr/bin/praat)
3. the directory of the audio files
4. an (empty) data directory folder where you want to store anonymized files such as textgrids or dataframes with the audio features

In [None]:
os_sep = os.path.abspath(os.sep)
opensmile_dir = os.path.join(os_sep, '') #openSMILE installation location
praat_path = os.path.join(os_sep, 'Applications','Praat.app', 'Contents', 'MacOS', 'Praat') # PRAAT execetuable
audio_dir = os.path.join(os_sep, '') # audio data
data_dir = os.path.join(os_sep, '') #csv output
#txt files that contain the subject id's of the participants in that group
dem_dir = os.path.join(os_sep, '')

In [None]:
def checkDirs(dir_list):
    """Checks if the given directories in the list exist, if not directories are created."""
    import os
    
    for path in dir_list:
        try:
            os.stat(path)
        except:
            os.mkdir(path)

##### 1.1. Split audio data with PRAAT

Audio files contain two channels for the interviewer (ch1) and participant (ch2). We split those channels and save them separately to extract features from each channel.

In [None]:
import subprocess

#define and create directories
split_ch_output = os.path.join(audio_dir, 'split_channels')
temp_dir = os.path.join(audio_dir, 'tmp')

checkDirs([split_ch_output, temp_dir])


#praat script to split files
split_script = os.path.join(os.getcwd(), 'PRAATScripts', 'separate_channels.praat')

#praat script doesn't handle many files easily, to prevent memory overflowing we process files in batches of 10 at a time
file_list = glob.glob(audio_dir + os_sep + '*.wav')
n_batches = int(len(file_list) / 10.)
batches = np.array_split(file_list, n_batches)

In [None]:
for i, batch in enumerate(batches):
    
    #move file back and forth between tmp and audio folder
    for file in batch:
        new_path = os.path.join(temp_dir, os.path.basename(file))
        os.system(f'mv {file} {new_path}')
    
    #run praat script on batch with arguments
    subprocess.call([praat_path, 
                '--run',
                split_script,     #path to script
                temp_dir + os_sep, #input dir + praat needs the slash at the end of a path
                split_ch_output + os_sep]) #output dir
    
    for file in batch:
        tmp_path = os.path.join(temp_dir, os.path.basename(file))
        os.system(f'mv {tmp_path} {file}')
    
    print(f'finished batch {i + 1} out of {n_batches}')

##### 1.2. Concatenate and annotate the audio files with PRAAT

To automatically annotate the turns of each speaker, a PRAAT script detects the silences in the interviewer track (ch1) and annotates these silences ina TextGrid file. If the interviewer is silent (i.e., between asking questions) it is assumed that the participant is speaking. The concatenated audio files are all speaking turns concatenated into one audio track, separately for each channel.

In [None]:
#directories to save concatenated audio and generated textgrids
concat_ch_output = os.path.join(audio_dir, 'concatenated_channels')
textgrid_dir = os.path.join(data_dir, 'textgrids')
turn_textgrids = os.path.join(data_dir, 'textgrids', 'turn_textgrids')

checkDirs([concat_ch_output, textgrid_dir, turn_textgrids, temp_dir])

#PRAAT script
concat_script = os.path.join(os.getcwd(), 'PRAATScripts', 'praat_splitsen.praat')

#again in batches of 10 to prevent PRAAT from crashing
file_list = glob.glob(audio_dir + os_sep + '*.wav')
n_batches = int(len(file_list) / 10.)
batches = np.array_split(file_list, n_batches)

In [None]:
for i, batch in enumerate(batches):
    for file in batch:
        new_path = os.path.join(temp_dir, os.path.basename(file))
        os.system(f'mv {file} {new_path}')
    
    #run praat script on batch
    subprocess.call([praat_path, 
                '--run',
                concat_script,    #path to script
                temp_dir + os_sep, #praat needs the slash at the end of a path
                concat_ch_output + os_sep, #output audio
                turn_textgrids + os_sep])  #output textgrids
    
    for file in batch:
        tmp_path = os.path.join(temp_dir, os.path.basename(file))
        os.system(f'mv {tmp_path} {file}')
    
    print(f'finished batch {i + 1}')

## 2. Feature extraction

##### 2.1. Feature extraction with eGeMAPS in openSMILE

openSMILE has different configuration files and arguments to extract features. Most of the audio features (i.e., pitch or loudness) are computed every 10ms and then summarized (mean, std) over a given period of time. We extract the start and end time for each turn from the TextGrid files we just created and get the summarized features for each turn.

In [None]:
from praatio import tgio
import subprocess

#define and create input and output directories
audio = os.path.join(audio_dir, 'split_channels')
textgrid_dir = os.path.join(data_dir, 'textgrids', 'turn_textgrids')
config_file = os.path.join(opensmile_dir, 'config', 'gemaps', 'eGeMAPSv01a.conf')

egemaps_output = os.path.join(data_dir, 'opensmile', 'egemaps_summary_turns')

checkDirs([egemaps_output])

for file in sorted(glob.glob(audio + '/*.wav')):
    
    #load textgrid with turn annotations
    sub_id = os.path.basename(file)[:4]
    textgrid = sorted(glob.glob(os.path.join(textgrid_dir, sub_id + '*.TextGrid')))
    
    filename = os.path.basename(os.path.normpath(file))
    output_file = os.path.join(egemaps_output, filename[:-4] + '.csv')
    
    #read textgrid using praatio, extract entries of the annotated tier
    tg = tgio.openTextgrid(textgrid[0])
    entryList = tg.tierDict['silences'].entryList

    intervals_interviewer = []
    intervals_participant = []
    
    #sort entries by speaker
    for entry in entryList:
        start = entry[0]
        stop = entry[1]
        
        if entry[2] == 'interviewer_silent':
            
            intervals_participant.append((start, stop))
            
        if entry[2] == 'interviewer_speaks':
            
            intervals_interviewer.append((start, stop))
    
    #select correct file for speaker
    if 'ch1' in file:
        
        for start, stop in intervals_interviewer:
            
            #name that's displayed in column of output file
            instname = str(start) + '-' + str(stop)
            
            #run openSMILE extraction with arguments
            subprocess.run(['SMILExtract', 
                            '-C', config_file,   #egemaps configuration
                            '-I', file,          #audio file
                            '-csvoutput', output_file,  #csv summary file
                            '-start', str(start),    #time interval from which features are extracted
                            '-end', str(stop),
                            '-instname', instname])  #start and end for each turn
    elif 'ch2' in file:
        
        for start, stop in intervals_participant:
             
            #name that's displayed in column of output file
            instname = str(start) + '-' + str(stop)
            
            #run openSMILE extraction with arguments
            subprocess.run(['SMILExtract', 
                            '-C', config_file, 
                            '-I', file, 
                            '-csvoutput', output_file,
                            '-start', str(start),
                            '-end', str(stop),
                            '-instname', instname])

## 3. Analysis preparation

In [None]:
#out of all the egemaps features these are the ones of interest
pauses = 'MeanUnvoicedSegmentLength'
syll_rate = 'VoicedSegmentsPerSec'
pitch = 'F0semitoneFrom27.5Hz_sma3nz_amean'
loudness = 'loudness_sma3_amean'
pitch_var = 'F0semitoneFrom27.5Hz_sma3nz_stddevNorm'

features = [pauses, syll_rate, pitch, loudness, pitch_var]

##### 3.2. Pre-process the data

Since the turns of each speaker are annotated automatically, the process is error prone. Specifially, the speaking turns are annotated based on the interviewer track, anytime the interviewer is pausing, it is assumed the participant is speaking. That is obviously not always true, leading to trailing silences in the participant track and false positives, where an entire interval is falsely labeled as speech. To exclude such false positives, only sounding intervals (F0 > 0) are used for analysis. OpenSMILE already thresholds the pitch data, so all pitch values that are 0.0 are replaced by NaN values. At any point where pitch = 0, loudness, syllable rate and pitch variability are also set to 0 and replaced by NaN, as these can only be computed from speaking intervals.

In [None]:
egemaps_output = os.path.join(data_dir, 'opensmile', 'egemaps_summary_turns')
filtered_output = os.path.join(data_dir, 'opensmile', 'egemaps_summary_turns_zero_filtered')

checkDirs([filtered_output])

In [None]:
ch1_files = []
ch2_files = []

for file in sorted(glob.glob(egemaps_output + '/*.csv')):
    if 'ch1' in file:
        ch1_files.append(file)
    else:
        ch2_files.append(file)

In [None]:
pd.options.mode.chained_assignment = None  # default='warn'

for ch1, ch2 in zip(ch1_files, ch2_files):
    
    df_ch1 = pd.read_csv(ch1, sep = ';')
    df_ch2 = pd.read_csv(ch2, sep = ';')
    
    cols_to_keep = features[:]
    cols_to_keep.insert(0, 'name') #keep the name column without updating features
    
    df_ch1_filt = df_ch1[cols_to_keep]
    df_ch2_filt = df_ch2[cols_to_keep]
    
    #set remaining columns to zero wherever pitch is zero
    df_ch1_filt.loc[df_ch1_filt['F0semitoneFrom27.5Hz_sma3nz_amean'] == 0.0, 
               ['loudness_sma3_amean', 'VoicedSegmentsPerSec', 'F0semitoneFrom27.5Hz_sma3nz_stddevNorm']] = 0.0

    df_ch2_filt.loc[df_ch2_filt['F0semitoneFrom27.5Hz_sma3nz_amean'] == 0.0, 
               ['loudness_sma3_amean', 'VoicedSegmentsPerSec', 'F0semitoneFrom27.5Hz_sma3nz_stddevNorm']] = 0.0
    
    #replace zeros with nans
    df_ch1_filt[['F0semitoneFrom27.5Hz_sma3nz_amean', 
                               'loudness_sma3_amean', 
                               'VoicedSegmentsPerSec',
                               'F0semitoneFrom27.5Hz_sma3nz_stddevNorm']].replace(0.0, np.nan)
    df_ch2_filt[['F0semitoneFrom27.5Hz_sma3nz_amean', 
                               'loudness_sma3_amean', 
                               'VoicedSegmentsPerSec',
                               'F0semitoneFrom27.5Hz_sma3nz_stddevNorm']].replace(0.0, np.nan)
    
    #keep track of sub_id for doubel checking
    sub_id =  os.path.basename(ch1)[:4]
    
    df_ch1_filt['sub_id'] = sub_id
    df_ch2_filt['sub_id'] = sub_id

    
    filename_ch1 = os.path.basename(ch1)[:-4]
    filename_ch2 = os.path.basename(ch2)[:-4]
    
    df_ch1_filt.to_csv(os.path.join(filtered_output, filename_ch1 + '_zero_drop.csv'), sep = ';')
    df_ch2_filt.to_csv(os.path.join(filtered_output, filename_ch2 + '_zero_drop.csv'), sep = ';')

In [None]:
ch1_files_filt = []
ch2_files_filt = []

for file in sorted(glob.glob(filtered_output + '/*.csv')):
    if 'ch1' in file:
        ch1_files_filt.append(file)
    else:
        ch2_files_filt.append(file)

In [None]:
def calculateSynchrony(ch1_files, ch2_files, features):

    import pandas as pd
    import scipy.stats as stats
    
    #ToDo: fix later with loop
    feature_rows = {'MeanUnvoicedSegmentLength' : [],
                     'VoicedSegmentsPerSec' : [],
                     'F0semitoneFrom27.5Hz_sma3nz_amean' : [],
                     'loudness_sma3_amean' : [],
                     'F0semitoneFrom27.5Hz_sma3nz_stddevNorm' : []}
    
    for ch1, ch2 in zip(ch1_files, ch2_files):
        
        ch1_df = pd.read_csv(ch1, sep = ';', index_col= [0])
        ch2_df = pd.read_csv(ch2, sep = ';', index_col= [0])
        
        sub_id = os.path.basename(ch1)[:4]

        for feature in features:

            speaker_1 = ch1_df[feature].to_numpy()
            speaker_2 = ch2_df[feature].to_numpy()
            
            #sometimes turns will be unequal, in that case drop the last one from the array
            if len(speaker_1) > len(speaker_2):
                speaker_1 = speaker_1[:-1]
                
            elif len(speaker_1) < len(speaker_2):
                speaker_2 = speaker_2[:-1]
                
            speaker_1 = speaker_1[~np.isnan(speaker_2)]  #drop nan turns from ch2 also from ch1  
            speaker_2 = speaker_2[~np.isnan(speaker_2)]
    
            x = speaker_1[~np.isnan(speaker_1)] #drop nan turns from ch1 also from ch2  
            y = speaker_2[~np.isnan(speaker_1)]
            
            #calculate synchrony using spearman r
            r, p = stats.spearmanr(x, y)
            
            #transform to z scores
            r_z = np.arctanh(r)

            row = {'soundname': sub_id,
                   'r': r, 
                   'p': p, 
                   'r_z': r_z}
        
            feature_rows[feature] += [row]

        
    return feature_rows

In [None]:
feature_rows = calculateSynchrony(ch1_files_filt, ch2_files_filt, features)

In [None]:
summary_dir = os.path.join(data_dir, 'group_level')

checkDirs([summary_dir])

for feature, rows in feature_rows.items():

    df = pd.DataFrame(rows)
    
    df.to_csv(os.path.join(summary_dir, feature + '_summary.csv'), sep = ';')

##### 3.4. make group comparisons

In [None]:
summary_dir = os.path.join(data_dir, 'group_level')

In [None]:
# load summary dataframes
pitch = pd.read_csv(os.path.join(summary_dir, 'F0semitoneFrom27.5Hz_sma3nz_amean_summary.csv'), sep = ';', index_col = [0])
loudness = pd.read_csv(os.path.join(summary_dir, 'loudness_sma3_amean_summary.csv'), sep = ';', index_col = [0])
syll = pd.read_csv(os.path.join(summary_dir, 'VoicedSegmentsPerSec_summary.csv'), sep = ';', index_col = [0])
pause = pd.read_csv(os.path.join(summary_dir, 'MeanUnvoicedSegmentLength_summary.csv'), sep = ';', index_col = [0])
pitch_var = pd.read_csv(os.path.join(summary_dir, 'F0semitoneFrom27.5Hz_sma3nz_stddevNorm_summary.csv'), sep = ';', index_col = [0])

Some functions to make looping easier

In [None]:
def getIndices(df, group):
    
    group_indices = [k for k in df['soundname'] if k[:4] in group]
    
    return group_indices

In [None]:
groups = {}

for file in glob.glob(dem_dir + os_sep + '*.txt'):
    groupname = os.path.basename(file)[:-4]
    
    groups[groupname] = np.loadtxt(file, dtype= str)

Same for t-tests

In [None]:
tests = [('control_subs', 'patient_subs'), 
         ('high_panss_subs', 'low_panss_subs'),
         ('control_subs', 'high_panss_subs'), 
         ('controls_m', 'controls_f'),
         ('sz_m', 'sz_f'),
         ('controls_same', 'controls_diff'),
         ('sz_same', 'sz_diff')]

In [None]:
columns = ['T', 'p']
row_labels = ['f0', 'loudness', 'art_rate', 'avg_pause_dur', 'pitch_var']
dfs = [pitch, loudness, syll, pause, pitch_var]

In [None]:
import scipy.stats as stats

test_dfs = {}

for keys in tests:
    
    group1 = groups[keys[0]]
    group2 = groups[keys[1]]
    
    rows = {}
    
    for row_label, df in zip(row_labels, dfs):
            row = {}
        
            idxs_g1 = getIndices(df, group1) #the matching subjects in the dataframe
            idxs_g2 = getIndices(df, group2)

            values_g1 = df[df['soundname'].isin(idxs_g1)]['r_z']   #select converted r value
            values_g2 = df[df['soundname'].isin(idxs_g2)]['r_z']          
                            
            t, p = stats.ttest_ind(values_g1, values_g2, equal_var = False) #equal var = False --> Welch's t-test
            
            row['T'] = t
            row['p'] = p
            
            rows[row_label] = row
    
    df = pd.DataFrame(rows)
    test_dfs[keys[0] + ' ' + keys[1]] = df.T

In [None]:
ttest_df = pd.concat(test_dfs.values(), keys = test_dfs.keys())

In [None]:
ttest_df

In [None]:
ttest_df.to_csv(os.path.join(dem_dir, 'ttest_groups.csv'))

test against 0

In [None]:
import scipy.stats as stats

one_test_dfs = {}
ttest_groups = ['control_subs', 'patient_subs']

for group in ttest_groups:
    
    rows = {}
    group_subs = groups[group]

    for row_label, df in zip(row_labels, dfs):
        row = {}
        
        idxs = getIndices(df, group_subs) #the matching subjects in the dataframe
        
        values = df[df['soundname'].isin(idxs)]['r_z']   #select converted r value

        t, p = stats.ttest_1samp(values, 0.0)
        
        row['T'] = t
        row['p'] = p
        
        rows[row_label] = row

    df = pd.DataFrame(rows)
    one_test_dfs[group] = df.T

In [None]:
one_test_df = pd.concat(one_test_dfs.values(), keys = one_test_dfs.keys())

In [None]:
pd.set_option('display.float_format', lambda x: '%.10f' % x)

one_test_df

In [None]:
one_test_df.to_csv(os.path.join(dem_dir, 'one_sided_groups.csv'))