In [1]:
import pandas as pd
import numpy as np
import os
from scipy.stats import skew, kurtosis

def summarize_features(df):
    stats = {
        'mean': df.mean(),
        'std': df.std(),
        'min': df.min(),
        'max': df.max(),
        'skew': df.apply(skew),
        'kurtosis': df.apply(kurtosis),
    }
    return pd.concat(stats, axis=0)

def process_utterance(start, end, covarep_df, formant_df, sample_rate=100):
    # Convert time to frame index
    start_idx = int(start * sample_rate)
    end_idx = int(end * sample_rate)
    
    covarep_segment = covarep_df.iloc[start_idx:end_idx]
    formant_segment = formant_df.iloc[start_idx:end_idx]
    
    if covarep_segment.empty or formant_segment.empty:
        return None
    
    covarep_stats = summarize_features(covarep_segment)
    formant_stats = summarize_features(formant_segment)
    
    return pd.concat([covarep_stats, formant_stats], axis=0)

def preprocess_participant(transcript_path, covarep_path, formant_path):
    transcript = pd.read_csv(transcript_path, sep="\t")
    covarep = pd.read_csv(covarep_path, header=None)
    formant = pd.read_csv(formant_path, header=None)

    all_features = []
    texts = []

    for _, row in transcript.iterrows():
        features = process_utterance(row['start_time'], row['stop_time'], covarep, formant)
        if features is not None:
            all_features.append(features)
            texts.append(row['value'])

    return pd.DataFrame(all_features), texts
