In [None]:
import os
import pandas as pd
from datetime import datetime

L_TOKEN, C_TOKEN = "<|listener|>", "<|client|>"
HIDE, SHOW = -1, -2

log_path = "/Users/shanglinghsu/backup_logs/flask_outputs_20221202"
DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S"
DATE_MICROSEC_FORMAT = "%Y-%m-%d_%H-%M-%S.%f"
DIALOG_COLUMNS = ['user_id', 'is_listener', 'utterance', 'time']
PRED_COLUMNS = ['code', 'score', 'last_utterance_index', 'pred_index', 'text', 'time']
CLICK_COLUMNS = ['last_utterance_index', 'pred_index', 'time']

EXP_START = datetime(year=2022, month=11, day=13, hour=13, minute=28, second=51)  # 2022-11-13 13:28:50

def f_optional_strptime(dtstr):
    format = DATE_MICROSEC_FORMAT if "." in dtstr else DATETIME_FORMAT
    return datetime.strptime(dtstr, format)

def parse_df_time(df):
    df["time"] = df["time"].apply(f_optional_strptime)

In [None]:
data = {"chat_id":[], "datetime":[], "filename_prefix": [], "num_utterances": []}
filenames = os.listdir(log_path)
unique_datetime = set()
for x in filenames:
    x_split = x.split('_')
    datetime = f_optional_strptime('_'.join(x_split[:2]))
    if datetime in unique_datetime or datetime < EXP_START: continue
    unique_datetime.add(datetime)

    chat_id = x_split[2]
    filename_prefix = "_".join(x_split[:3])

    fname = "{}_dialog.csv".format(filename_prefix)
    dialog_df = pd.read_csv(os.path.join(log_path, fname), header=0)
    data["num_utterances"].append(len(dialog_df["utterance"]))

    data["chat_id"].append(chat_id)
    data["datetime"].append(datetime)
    data["filename_prefix"].append(filename_prefix)

sessions = pd.DataFrame(data=data)
sessions.sort_values("datetime", inplace=True)
sessions.reset_index(drop=True, inplace=True)

print("Number of chats:", len(sessions))

In [None]:
sessions["num_utterances"].describe()[["min", "max", "mean", "std"]]

In [None]:
def load_logs(path_prefix):
    dialog = pd.read_csv(path_prefix + "_dialog.csv", header=0)[DIALOG_COLUMNS]
    click = pd.read_csv(path_prefix + "_click.csv", header=0) #[CLICK_COLUMNS]
    pred = pd.read_csv(path_prefix + "_pred.csv", header=0) #[PRED_COLUMNS]

    for df in [dialog, click, pred]:
        if "time" in df.columns:
            parse_df_time(df)
    return dialog, click, pred

def compute_response_times(dialog, mask):
    return (dialog.time - dialog.time.shift(1))[mask]

def compute_dialog_stats(dialog):
    lrt = compute_response_times(dialog, dialog.is_listener == True)
    mrt = compute_response_times(dialog, dialog.is_listener == False)
    return {
        "span": (dialog.time.max() - dialog.time.min()).total_seconds(),
        "# utterances": len(dialog),
        "# L utterances": sum(dialog.is_listener == True),
        "# M utterances": sum(dialog.is_listener == False),
        "avg_l_response_time": lrt.mean().total_seconds(),
        "med_l_response_time": lrt.median().total_seconds(),
        "avg_m_response_time": mrt.mean().total_seconds(),
        "med_m_response_time": mrt.median().total_seconds(),
    }

def compute_pred_stats(pred):
    return {
        "# predictions": len(pred),
        "# utterances with predictions": len(set(pred.last_utterance_index)),
    }   

def compute_click_stats(click):
    click_example_mask = click.pred_index >= 0
    return {
        "# example clicks": click_example_mask.sum(),
        "# utterances with example clicks": len(set(click[click_example_mask]["last_utterance_index"])),
    }

## Compute hide / show tool spans

In [None]:
def compute_toggle_spans(click, pred):
    show_spans, hide_spans = [], []
    i = 0
    shown = 1
    last_time = pred.time.min() 
    while i < len(click):
        # Find the next hide
        while i < len(click) and click.loc[i].pred_index != -1:
            i += 1
            continue
        if i == len(click): break
        shown = 0
        hide_spans.append(click.loc[i].time - last_time)
        last_time = click.loc[i].time

        # Find the next show
        while i < len(click) and click.loc[i].pred_index != -2:
            i += 1
            continue
        if i == len(click): break
        shown = 1
        show_spans.append(click.loc[i].time - last_time)
        last_time = click.loc[i].time
    if shown:
        show_spans.append(pred.time.max() - last_time)
    else:
        hide_spans.append(pred.time.max() - last_time)
    spans = {
        "show tool span": pd.Series(show_spans).sum().total_seconds() if len(show_spans) > 0 else 0,
        "hide tool span": pd.Series(hide_spans).sum().total_seconds() if len(hide_spans) > 0 else 0,
    }
    spans["tool span"] = spans["show tool span"] + spans["hide tool span"]
    return spans

# Between predictions and responses
- Text similarity
- Strategy similarity

In [None]:
import editdistance
def compute_text_similarity(truth, predictions):
    edit_distances = list(map(lambda p: editdistance.eval(truth, p), predictions))
    return {
        "Edit Distances": edit_distances,
        "Min Edit Distance": min(edit_distances) if len(edit_distances) > 0 else len(truth),
    }
    
def compute_dialog_pred_similarity(dialog, pred):
    scores = []
    for index, drow in dialog[dialog["is_listener"]].iterrows():
        last_prows = pred[pred["last_utterance_index"] == index - 1]

        utterance_true = drow["utterance"].replace(L_TOKEN, "")
        utterance_preds = last_prows["text"].tolist()
        score = compute_text_similarity(utterance_true, utterance_preds)

        # TODO: tag true codes to compare
        # codes_pred = last_prows["code"].tolist()
        scores.append(score)

    score_dict = {
        key: [s[key] for s in scores]
        for key in scores[0].keys()    
    }
    score_dict["Edit Distances"] = [y for x in score_dict["Edit Distances"] for y in x]  # flatten
    return score_dict

In [None]:
def compute_relative_stats(result):
    result[r"% of utterances with predictions"] = result["# utterances with predictions"] / result["# utterances"] * 100
    result[r"% of utterances with example clicks"] = result["# utterances with example clicks"] / result["# utterances"] * 100
    result[r"% of L utterances with example clicks"] = result["# utterances with example clicks"] / result["# L utterances"] * 100
    result[r"% tool span"] = result["tool span"] / result["span"] * 100
    result[r"% show tool span"] = result["show tool span"] / result["tool span"] * 100
    result[r"% hide tool span"] = result["hide tool span"] / result["tool span"] * 100

def compute_stats_wrapper(dialog, click, pred):
    result = {}
    result.update(compute_dialog_stats(dialog))
    result.update(compute_pred_stats(pred))
    result.update(compute_click_stats(click))
    result.update(compute_toggle_spans(click, pred))
    result.update(compute_dialog_pred_similarity(dialog, pred))
    compute_relative_stats(result)
    return result

In [None]:
all_stats = []
for prefix in sessions["filename_prefix"]:
    dialog, click, pred = load_logs(os.path.join(log_path, prefix))
    stats = compute_stats_wrapper(dialog, click, pred)
    all_stats.append(stats)
stats_df = pd.concat([sessions, pd.DataFrame(data=all_stats)], axis=1)
stats_df.to_csv(log_path + ".csv", index=0)
stats_df

In [None]:
for x in stats_df.columns:
    print(x)

In [None]:
stats_df["% of utterances with predictions"].describe()