<a href="https://colab.research.google.com/github/luzalbaposse/audio-process/blob/main/Entrainment_Metrics_Full_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Entrainment Metrics: analisis de conversaciones

Para usar este notebook, vas a necesitar seguir [estas instrucciones](https://docs.google.com/document/d/12kiauHlXamN-WvpGU1nBTOL9hAh1OJpdU6fqn3_3ITc/edit?usp=sharing) para preparar los archivos. Una vez que tengas eso, vamos a organizar esos archivos de audio en una carpeta `dyads` que contenga las conversaciones en carpetas llamadas `A{numero}`. Dentro de cada carpeta, vamos a necesitar tener nuestro `.TextGrid` y nuestros respectivos `.wav`. Además, antes de correr este notebook, vas a necesitar obtener los `.phrases `para esos wavs. Lo podes hacer utilizando este código:

```
#!/usr/bin/perl

# Agustin Gravano - Columbia University - November 2005

# Converts a Praat TextGrid file into a set of WaveSurfer annotation
# files, one for each tier. Note: WaveSurfer makes no distinction between
# point and interval tiers, so everything is stored as intervals.

# Usage: praat2wavesurfer.pl STEM
# That will read STEM.TextGrid and create STEM.words, STEM.tones, etc.

$stem = shift;

if ($stem =~ m/(.+)\.TextGrid$/i) {
	$stem = $1;
}

if (!$stem) {
    die "Usage: praat2wavesurfer.pl STEM\nThat will read STEM.TextGrid and create STEM.words, STEM.tones, etc.\n";  
} elsif (! -e ($stem.".TextGrid")) {
    die "File not found: $stem.TextGrid\n";    
}

# read the source file into the @lines array.
open SOURCEFILE, $stem.".TextGrid";
@lines = <SOURCEFILE>;
close SOURCEFILE;

# remove the trailing \n from each line.
chomp @lines;

# parse the header, upto this line: "item []:"

$i = 0;
my $starting_time = 0;
my $finishing_time = 0;
my $number_of_tiers = 0;

$done = 0;

while (!$done) {
	my $line = $lines[$i];

    if ($line =~ m/xmin =\s*([0-9\.]+)/) {
    	$starting_time = $1;
    }
    elsif ($line =~ m/xmax =\s*([0-9\.]+)/) {
    	$finishing_time = $1;
    }
    elsif ($line =~ m/size =\s*([0-9]+)/) {
    	$number_of_tiers = $1;
    }
    elsif ($line =~ m/item \[\]:/) {
    	$done = 1;
    }
    elsif ($i>=@lines) {
    	die "Wrong format in $stem.TextGrid: Reached the end of file while expecting \"item []:\"\n";
    }
    $i++;
}

for ($tier_number = 1;  $tier_number <= $number_of_tiers;  $tier_number++) {
	my $target_file_text = "";

	my $done = 0;
	my $tier_name = "";
	my $tier_starting_time = 0;
	my $tier_finishing_time = 0;
	my $tier_number_of_points = 0;
	my $tier_number_of_intervals = 0;

	while (!$done) {
		my $line = $lines[$i];

	    if ($line =~ m/name =\s*"([^"]+)"/) {
	    	$tier_name = $1;
	    }
	    elsif ($line =~ m/xmin =\s*([0-9\.]+)/) {
	    	$tier_starting_time = $1;
	    }
	    elsif ($line =~ m/xmax =\s*([0-9\.]+)/) {
	    	$tier_finishing_time = $1;
	    }
	    elsif ($line =~ m/points: size =\s*([0-9]+)/) {
	    	$tier_number_of_points = $1;
	    	$done = 1;
	    }
	    elsif ($line =~ m/intervals: size =\s*([0-9]+)/) {
	    	$tier_number_of_intervals = $1;
	    	$done = 1;
	    }
	    elsif ($i>=@lines) {
	    	die "Wrong format in $stem.TextGrid: Reached the end of file while expecting \"points: size = INTEGER\" or \"intervals: size = INTEGER\"\n";
	    }
	    
	    $i++;
	}

	if ($tier_number_of_points) {
		my $previous_time = 0;
		for ($j=1; $j<=$tier_number_of_points; $j++) {
			my $done = 0;
			my $time = 0;
			my $mark = "";

			if (! $lines[$i] =~ "points \[$j\]:") {
				die "Wrong format in $stem.TextGrid: Expected: \"points [$j]:\" at line $i.\n";
			}
			$i++;

			if ($lines[$i] =~ m/number =\s*([0-9\.]+)/) {
				$time = $1;
			} else {
				die "Wrong format in $stem.TextGrid: Expected: \"number = FLOAT\" at line $i.\n";
			}
			$i++;

			if ($lines[$i] =~ m/mark =\s*"(.*)" $/) {
				$mark = $1;
			} else {
				die "Wrong format in $stem.TextGrid: Expected: \"mark = STRING\" at line $i.\n";
			}
			$i++;

			# format the times
			$previous_time = f($previous_time);
			$time = f($time);

			$target_file_text .= "$previous_time $time $mark\n";
			$previous_time = $time;
		}
	}

	elsif ($tier_number_of_intervals) {
		for ($j=1; $j<=$tier_number_of_intervals; $j++) {
			my $done = 0;
			my $xmin = 0;
			my $xmax = 0;
			my $text = "";

			if (! $lines[$i] =~ "intervals \[$j\]:") {
				die "Wrong format in $stem.TextGrid: Expected: \"intervals [$j]:\" at line $i.\n";
			}
			$i++;

			if ($lines[$i] =~ m/xmin =\s*([0-9\.]+)/) {
				$xmin = $1;
			} else {
				die "Wrong format in $stem.TextGrid: Expected: \"xmin = FLOAT\" at line $i.\n";
			}
			$i++;

			if ($lines[$i] =~ m/xmax =\s*([0-9\.]+)/) {
				$xmax = $1;
			} else {
				die "Wrong format in $stem.TextGrid: Expected: \"xmax = FLOAT\" at line $i.\n";
			}
			$i++;

			if ($lines[$i] =~ m/text =\s*"(.*)" ?$/) {
				$text = $1;
			} else {
				die "Wrong format in $stem.TextGrid: Expected: \"text = STRING\" at line $i.\n";
			}
			$i++;

			# format the times
			$xmin = f($xmin);
			$xmax = f($xmax);

			$target_file_text .= "$xmin $xmax $text\n";
		}
	}

	open TARGETFILE, ">".$stem.".".$tier_name;
	print TARGETFILE $target_file_text;
	close TARGETFILE;
}

# number format
sub f {
	my $x = shift;
	return sprintf("%.6f", $x);
}
```

In [None]:
#@markdown ## Instalar dependencias
!pip install fastapi
!pip install kaleido
!pip install python-multipart
!pip install uvicorn
!pip install entrainment_metrics
!pip install pyannote

In [None]:
#@markdown ## Importar dependencias
from typing import List
from entrainment_metrics import InterPausalUnit
from pathlib import Path
import glob
import os
import csv
from scipy.io import wavfile
from math import isnan
from entrainment_metrics.continuous import plot_time_series
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shutil
import re


In [None]:
#@markdown ##Montar Drive
from google.colab import drive
drive.mount('/content/drive')

## Dividir IPUS

In [None]:
conversations = [path.split("/")[1] for path in glob.glob(os.path.join(Path("/content/drive/MyDrive/dyads/*")))]

In [None]:
len(conversations)

In [None]:
for conversation in conversations:
    print(conversation)
    conversation_path = Path(f"/content/drive/MyDrive/dyads/A{conversation}")
    phrases = [phrases_file for phrases_file in conversation_path.glob('*.phrases')]
    if phrases:
        print("Phrases already calculated")
        continue
    task_fn = f"/content/drive/MyDrive/dyads/A{conversation}/*.TextGrid"
    with open(task_fn, "r") as f:
        lines = f.readlines()
        debate_start, debate_end = None, None
        for i in range(len(lines)):
            if "debate" in lines[i]:
                debate_start = float(lines[i-2].split("xmin = ")[1])
                debate_end = float(lines[i-1].split("xmax = ")[1])

        if debate_start is None or debate_end is None:
            raise ValueError(f"Could not find debate line in {task_fn}")
    #print(f"start {debate_start}")
    #print(f"end {debate_end}")
    wavs_fnames = conversation_path.glob('*.wav')
    for wav_file in wavs_fnames:
        if "_cropped" in wav_file.stem:
            continue
        samplerate, data = wavfile.read(wav_file)

        start, end = int(debate_start * samplerate), int(debate_end * samplerate)
        cutted_data = data[start : end]

        cropped_wav_name: str = wav_file.stem + "_cropped.wav"
        output_dir: str = os.path.join(conversation_path, cropped_wav_name)
        wavfile.write(output_dir, samplerate, cutted_data)

##Obtener IPUS de los Wavs usando Voice Activity Detection

In [None]:
from pyannote.audio import Pipeline
from entrainment_metrics import InterPausalUnit

In [None]:
pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection",
                                    use_auth_token="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")

In [None]:
ipus = {}

In [None]:
for conversation in conversations:
    print(conversation)
    conversation_path = Path(f"./dyads/{conversation}")
    phrases = [phrases_file for phrases_file in conversation_path.glob('*.phrases')]
    if phrases:
        print("Phrases already calculated")
        continue
    wavs_fnames = Path(conversation_path).glob('*_cropped.wav')
    if not conversation in ipus:
        ipus[conversation] = {}
    for wav_file in wavs_fnames:
        speaker = wav_file.stem.lower().replace("participante", "").replace("_cropped", "").replace(" ", "").replace("audio", "")[0].upper()
        print(wav_file)
        print(speaker)
        if not speaker in ipus[conversation]:
            print(f"Processing {speaker}")
            try:
                ipus[conversation][speaker] = []
                output = pipeline(wav_file)
                for speech in output.get_timeline().support():
                    ipu = InterPausalUnit(start=speech.start, end=speech.end)
                    ipus[conversation][speaker].append(ipu)
            except:
                print(f"Failed {conversation}.{speaker}")

In [None]:
conversations_to_fix = []
for conversation, conversation_dict in ipus.items():
    if len(conversation_dict) != 2:
        conversations_to_fix.append(conversation)
    if "A" not in conversation_dict or "Z" not in conversation_dict:
        conversations_to_fix.append(conversation)
if conversations_to_fix:
    print(f"FIX: {conversations_to_fix}")

In [None]:
for conversation, speakers_dict in ipus.items():
    if len(speakers_dict) != 2:
        raise ValueError(f"Wrong amount of speakers {conversation}: {list(ipus[conversation].keys())}")

# Crear .phrases para cada speaker

In [None]:
def write_phrases_file(conversation_id, speaker_id, ipus_list):
    # TODO Watchout when len(ipus_list) <=3
    lines = []
    if ipus_list[0].start > 0:
        lines.append(f"0.0 {ipus_list[0].start} #")
    for i in range(len(ipus_list) -1) :
        lines.append(f"{ipus_list[i].start} {ipus_list[i].end} IPU")
        lines.append(f"{ipus_list[i].end} {ipus_list[i+1].start} #")

    lines.append(f"{ipus_list[-1].start} {ipus_list[-1].end} IPU")

    with open(f'/content/drive/MyDrive/dyads/{conversation_id}/{speaker_id}.phrases', 'w') as f:
        f.writelines("%s\n" % l for l in lines)

In [None]:
for conversation in ipus:
    conversation_ipus = ipus[conversation]
    for speaker, speaker_ipus in conversation_ipus.items():
        # Ruta del directorio de la conversación
        conversation_path = "/content/drive/MyDrive/dyads/A44"

        # Obtener lista de archivos en el directorio
        files_in_directory = os.listdir(conversation_path)

        # Filtrar archivos que terminan con "_mono.phrases"
        mono_phrases_files = [file for file in files_in_directory if file.endswith(f"{conversation}.phrases")]

        # Renombrar archivos
        for mono_phrases_file in mono_phrases_files:
            new_file_name = f"{conversation}_.phrases"
            os.rename(os.path.join(conversation_path, mono_phrases_file), os.path.join(conversation_path, new_file_name))


In [None]:
for conversation in ipus:
    conversation_ipus = ipus[conversation]
    for speaker, speaker_ipus in conversation_ipus.items():
        write_phrases_file(conversation, speaker, speaker_ipus)

# Conseguir InterPausalUnits de los .phrases

In [None]:
def get_interpausal_units_from_phrases(phrases_fname: Path) -> List[InterPausalUnit]:
    """
    Return a list of IPUs given a Path to a .phrases file

    The format of the file must be:
        - For each line
            f'{starting_time} {ending_time} {ipu's transcription}'
        Where starting_time and ending_time are floats

    Parameters
    ----------
    phrases_fname: Path
        The path to the words file

    Returns
    -------
    List[InterPausalUnit]
        The InterPausalUnits from the words file.
    """
    interpausal_units: List[InterPausalUnit] = []
    skip_words = ["<risa>", "<tos>", "#", "<chasquido>", "<ruido>", "<risas>", "<missing>", "<chasquidos>", "<suspiro>", "<silbando?>" , "<silbando?>"]
    with open(phrases_fname, "r") as word_file:
        while line := word_file.readline().rstrip():
            line_splitted = line.split()
            start, end, first_word = line_splitted[0], line_splitted[1], line_splitted[2]

            if (first_word in skip_words and len(line_splitted) == 3) or ((len(line_splitted) == 4 and first_word in skip_words and line_splitted[3] in skip_words)):
                continue
            else:
                interpausal_units.append(InterPausalUnit(float(start), float(end)))
    return interpausal_units

In [None]:
def get_interpausal_units_from_phrases(phrases_fname: Path) -> List[InterPausalUnit]:
    interpausal_units: List[InterPausalUnit] = []
    skip_words = ["<risa>", "<tos>", "#", "<chasquido>", "<ruido>", "<risas>", "<missing>", "<chasquidos>", "<suspiro>", "<silbando?>", "<silbando?>"]
    with open(phrases_fname, "r") as word_file:
        for line in word_file:
            line = line.rstrip()
            line_splitted = line.split()

            # Asegurarse de que la línea tenga suficientes elementos
            if len(line_splitted) < 3:
                print(f"Línea no válida (menos de 3 elementos): {line}")
                continue

            start, end, first_word = line_splitted[0], line_splitted[1], line_splitted[2]

            if (first_word in skip_words and len(line_splitted) == 3) or ((len(line_splitted) == 4 and first_word in skip_words and line_splitted[3] in skip_words)):
                continue
            else:
                interpausal_units.append(InterPausalUnit(float(start), float(end)))
    return interpausal_units


In [None]:
ipus = {}

In [None]:
base_path = "/content/drive/MyDrive/dyads"

# Itera sobre las carpetas A{numero} en la carpeta base
for number in range(1, 53):
    folder_name = f"A{number}"
    folder_path = os.path.join(base_path, folder_name)

    # Verifica si la carpeta existe
    if os.path.exists(folder_path):
        print(f"\nEstoy en la carpeta '{folder_name}':")

        # Itera sobre los archivos *.phrases en la carpeta específica
        for filename in glob.glob(os.path.join(Path(folder_path), "*.phrases")):
            speaker = filename.split("/")[-1].replace(".phrases", "")
            print(speaker)
            if not number in ipus:
                ipus[number] = {}
            # interpausal units usando "get_interpausal_units_from_phrases"
            # try:
            ipus[number][speaker] = get_interpausal_units_from_phrases(filename)
            # except:
            #     raise ValueError(f"{session}.{task}.{speaker_id}")

print("\nListo :D")

In [None]:
# Check for silence between IPUs < 100ms
for conversation, speaker_dict in ipus.items():
    for speaker, speaker_ipus in speaker_dict.items():
        for i in range(len(speaker_ipus) - 1):
            if speaker_ipus[i + 1].start - speaker_ipus[i].end < 0.1:
                raise ValueError(f"Found diff < 100ms: {conversation}.{speaker} {speaker_ipus[i]} <-> {speaker_ipus[i + 1]} ")

# Calcular Feature Values

Para esta sección, vamos a necesitar los géneros de las personas que hayan participado en las conversaciones. Este `Genders.csv` debería tener 3 columnas: `id`, siendo este el código de participante, `pareja`, que es el código de la conversación, y `gender` que es el género de la persona en cuestión.

In [None]:
df_genders = pd.read_csv("/content/Genders.csv")
genders = df_genders.set_index(['id', 'pareja']).T.to_dict('dict')

In [None]:
def extract_ipus_features(ipus_list, wav_fname, gender):
'''

'''
    for ipu in ipus_list:
        try:
            ipu.calculate_features(
                audio_file=wav_fname,
                extractor="speech-rate",
            )
        except ValueError as err:
            ipu_too_small = 'is too small to resample from'

            if ipu_too_small in str(err):
                pass
            else:
                raise err
        try:
            ipu.calculate_features(
                audio_file=wav_fname,
                extractor="praat",
                pitch_gender=gender,
            )
        except BaseException as err:
            ipu_too_small = 'Audio file contains 0 samples'

            if ipu_too_small in str(err):
                pass
            else:
                raise err

In [None]:
for conversation, speaker_dict in ipus.items():
    if len(speaker_dict) != 2:
        print(f"Unexpected number of speakers ({len(speaker_dict)}) in conversation A{conversation}. Skipping.")
        continue
    conversation_wavs = glob.glob(os.path.join(Path(f"/content/drive/MyDrive/dyads/A{conversation}"), "*_cropped.wav"))
    for speaker, speaker_ipus in speaker_dict.items():
        print(f"A{conversation}.{speaker}")
        key = (int(''.join(filter(str.isdigit, speaker[:4]))), f"A{conversation}")
        print("Key:", key)
        if speaker_ipus:  # Verifica si la lista no está vacía
            ipus_already_calculated = bool(speaker_ipus[0].features_values)
            if ipus_already_calculated:
                continue
        if key in genders:
            speaker_gender = genders[key]['gender']
            print("Gender:", speaker_gender)
            wav_files = [wavfile for wavfile in conversation_wavs if speaker.lower()[:3] == os.path.basename(wavfile).lower()[:3] and wavfile.endswith("_cropped.wav")]

            for wav_file in wav_files:
                extract_ipus_features(speaker_ipus, wav_file, speaker_gender)
        else:
            print(f"Key {key} not found in genders")

# Guardar en un CSV

In [None]:
features = [
 'SECONDS',
 'F0_MAX',
 'F0_MIN',
 'F0_MEAN',
 'F0_MEDIAN',
 'F0_STDV',
 'F0_MAS',
 'ENG_MAX',
 'ENG_MIN',
 'ENG_MEAN',
 'ENG_STDV',
 'VCD2TOT_FRAMES',
 'speech_rate',
]

In [None]:
rows = []
for conversation, speaker_dict in ipus.items():
    for speaker, speaker_ipus in speaker_dict.items():
            for ipu in speaker_ipus:

                row = [conversation, speaker, ipu.start, ipu.end]

                for feature in features:
                    try:
                        ipu_feature = ipu.feature_value(feature)
                    except ValueError:
                        ipu_feature = None
                    row.append(ipu_feature)
                rows.append(row)
print(len(rows))

In [None]:
with open('ipus.csv', 'w', newline ='') as csvfile:
    header = ["conversation", "speaker", "start", "end"]
    header += features
    writer = csv.writer(csvfile)
    writer.writerows([header] + rows)

# Calcular Entrainment Metrics

## Importar el archivo ipus.csv

In [None]:
import pandas as pd
from entrainment_metrics.continuous import calculate_metric, TimeSeries

In [None]:
df_ipus = pd.read_csv("./ipus.csv")
display(df_ipus)

## Obtener los IPUs

In [None]:
"""
Getting IPUs from DataFrame
"""
def get_ipus_from_conversation(df_ipus, conversation_id):
    # Filtrar solo la conversación deseada
    df_conversation = df_ipus[df_ipus["conversation"] == conversation_id]

    # Asumiendo que solo hay dos speakers por conversación,
    # obtener sus identificadores únicos
    speakers = df_conversation["speaker"].unique()

    if len(speakers) != 2:
        # Manejar el caso donde no hay exactamente dos speakers
        print(f"Unexpected number of speakers ({len(speakers)}) found in conversation {conversation_id}")

    # Separar los IPUs para cada speaker
    df_speaker_1 = df_conversation[df_conversation["speaker"] == speakers[0]]
    df_speaker_2 = df_conversation[df_conversation["speaker"] == speakers[1]]

    ipus_speaker_1 = get_ipus_from_speaker(df_speaker_1)
    ipus_speaker_2 = get_ipus_from_speaker(df_speaker_2)

    return ipus_speaker_1, ipus_speaker_2

def get_ipus_from_speaker(df_speaker):
    features = [
     'F0_MAX', 'F0_MIN', 'F0_MEAN', 'F0_MEDIAN', 'F0_STDV', 'F0_MAS',
     'ENG_MAX', 'ENG_MIN', 'ENG_MEAN', 'ENG_STDV', 'VCD2TOT_FRAMES', 'speech_rate',
    ]
    ipus = []
    for _, row in df_speaker.iterrows():
        ipu_feature_values = {feature: row[feature] for feature in features}
        try:
            # Extraer la parte relevante de la cadena y convertirla en float
            start = float(row["start"].split('.')[0] + '.' + row["start"].split('.')[1][:3])
            end = float(row["end"].split('.')[0] + '.' + row["end"].split('.')[1][:3])
        except ValueError:

            print(f"Error converting start/end to float: start={row['start']}, end={row['end']}")
            continue

        ipu = InterPausalUnit(
            start=start,
            end=end,
            features_values=ipu_feature_values
        )
        ipus.append(ipu)
    return ipus

## Calculr las métricas

In [None]:
def get_metrics_from_conversation(df_ipus, conversation_id):
    features = [
     'F0_MAX',
     'F0_MIN',
     'F0_MEAN',
     'F0_MEDIAN',
     'F0_STDV',
     'F0_MAS',
     'ENG_MAX',
     'ENG_MIN',
     'ENG_MEAN',
     'ENG_STDV',
     'VCD2TOT_FRAMES',
     'speech_rate',
    ]
    metrics = ["synchrony", "convergence", "proximity"]

    ipus_a, ipus_b = get_ipus_from_conversation(df_ipus, conversation_id)
    results = {}
    for feature in features:
        try:
            ipus_a_w_feature = [ipu for ipu in ipus_a if ipu.feature_value(feature) is not None and not isnan(ipu.feature_value(feature))]
            ipus_b_w_feature = [ipu for ipu in ipus_b if ipu.feature_value(feature) is not None and not isnan(ipu.feature_value(feature))]
            if not ipus_a_w_feature or not ipus_b_w_feature:
                for metric in metrics:
                    results[feature + "_" + metric] = None
                continue
            time_series_a = TimeSeries(
                feature=feature,
                interpausal_units=ipus_a_w_feature,
                method="knn",
            )
            time_series_b = TimeSeries(
                feature=feature,
                interpausal_units=ipus_b_w_feature,
                method="knn",
            )
            if feature == "speech_rate":
                plot_time_series(time_series_a, time_series_b)
            for metric in metrics:
                res_metric = calculate_metric(
                    metric=metric,
                    time_series_a=time_series_a,
                    time_series_b=time_series_b,
                )
                results[feature + "_" + metric] = res_metric
        except ValueError as err:
            k_too_big = 'k cannot be bigger than the amount of interpausal units, default k is 7'

            if k_too_big in str(err):
                for metric in metrics:
                    results[feature + "_" + metric] = None
                print(k_too_big)
            else:
                print([ipu.features_values[feature] for ipu in ipus_a_w_feature])
                print([ipu.features_values[feature] for ipu in ipus_b_w_feature])
                raise err
    return results

def get_metrics_for_all_conversations(df_ipus):
    res = {}
    conversations = set(df_ipus["conversation"].tolist())
    for conversation in conversations:
        res[conversation] = get_metrics_from_conversation(df_ipus, conversation)
    return res

In [None]:
results = get_metrics_for_all_conversations(df_ipus)

## Guardar en un csv

In [None]:
metrics_names = [
 'F0_MAX_synchrony',
 'F0_MAX_convergence',
 'F0_MAX_proximity',
 'F0_MIN_synchrony',
 'F0_MIN_convergence',
 'F0_MIN_proximity',
 'F0_MEAN_synchrony',
 'F0_MEAN_convergence',
 'F0_MEAN_proximity',
 'F0_MEDIAN_synchrony',
 'F0_MEDIAN_convergence',
 'F0_MEDIAN_proximity',
 'F0_STDV_synchrony',
 'F0_STDV_convergence',
 'F0_STDV_proximity',
 'F0_MAS_synchrony',
 'F0_MAS_convergence',
 'F0_MAS_proximity',
 'ENG_MAX_synchrony',
 'ENG_MAX_convergence',
 'ENG_MAX_proximity',
 'ENG_MIN_synchrony',
 'ENG_MIN_convergence',
 'ENG_MIN_proximity',
 'ENG_MEAN_synchrony',
 'ENG_MEAN_convergence',
 'ENG_MEAN_proximity',
 'ENG_STDV_synchrony',
 'ENG_STDV_convergence',
 'ENG_STDV_proximity',
 'VCD2TOT_FRAMES_synchrony',
 'VCD2TOT_FRAMES_convergence',
 'VCD2TOT_FRAMES_proximity',
 'speech_rate_synchrony',
 'speech_rate_convergence',
 'speech_rate_proximity'
]

In [None]:
rows = []
for conversation, conversation_metrics in results.items():
    row = [conversation]
    row += list(conversation_metrics.values())

    rows.append(row)

In [None]:
df_metrics = pd.read_csv("./metrics.csv")
display(df_metrics)

# Troubleshotting

Al hacer el análisis me encontré con varios errores y problemas. Dejo acá anotados la mayoría y la forma de que lo solucioné.