In [1]:
import os
import pandas as pd
from datetime import datetime

L_TOKEN, C_TOKEN = "<|listener|>", "<|client|>"
HIDE, SHOW = -1, -2

log_path = "/Users/shanglinghsu/backup_logs/flask_outputs_20221202"
DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S"
DATE_MICROSEC_FORMAT = "%Y-%m-%d_%H-%M-%S.%f"
DIALOG_COLUMNS = ['user_id', 'is_listener', 'utterance', 'time']
PRED_COLUMNS = ['code', 'score', 'last_utterance_index', 'pred_index', 'text', 'time']
CLICK_COLUMNS = ['last_utterance_index', 'pred_index', 'time']

EXP_START = datetime(year=2022, month=11, day=13, hour=13, minute=28, second=51)  # 2022-11-13 13:28:50

def f_optional_strptime(dtstr):
    format = DATE_MICROSEC_FORMAT if "." in dtstr else DATETIME_FORMAT
    return datetime.strptime(dtstr, format)

def parse_df_time(df):
    df["time"] = df["time"].apply(f_optional_strptime)

In [2]:
data = {"chat_id":[], "datetime":[], "filename_prefix": [], "num_utterances": []}
filenames = os.listdir(log_path)
unique_datetime = set()
for x in filenames:
    x_split = x.split('_')
    datetime = f_optional_strptime('_'.join(x_split[:2]))
    if datetime in unique_datetime or datetime < EXP_START: continue
    unique_datetime.add(datetime)

    chat_id = x_split[2]
    filename_prefix = "_".join(x_split[:3])

    fname = "{}_dialog.csv".format(filename_prefix)
    dialog_df = pd.read_csv(os.path.join(log_path, fname), header=0)
    data["num_utterances"].append(len(dialog_df["utterance"]))

    data["chat_id"].append(chat_id)
    data["datetime"].append(datetime)
    data["filename_prefix"].append(filename_prefix)

sessions = pd.DataFrame(data=data)
sessions.sort_values("datetime", inplace=True)
sessions.reset_index(drop=True, inplace=True)

print("Number of chats:", len(sessions))

Number of chats: 30


In [3]:
sessions["num_utterances"].describe()[["min", "max", "mean", "std"]]

min     21.000000
max     38.000000
mean    30.866667
std      2.763473
Name: num_utterances, dtype: float64

In [4]:
def load_logs(path_prefix):
    dialog = pd.read_csv(path_prefix + "_dialog.csv", header=0)[DIALOG_COLUMNS]
    click = pd.read_csv(path_prefix + "_click.csv", header=0) #[CLICK_COLUMNS]
    pred = pd.read_csv(path_prefix + "_pred.csv", header=0) #[PRED_COLUMNS]

    for df in [dialog, click, pred]:
        if "time" in df.columns:
            parse_df_time(df)
    return dialog, click, pred

def compute_response_times(dialog, mask):
    return (dialog.time - dialog.time.shift(1))[mask]

def compute_dialog_stats(dialog):
    lrt = compute_response_times(dialog, dialog.is_listener == True)
    mrt = compute_response_times(dialog, dialog.is_listener == False)
    return {
        "span": (dialog.time.max() - dialog.time.min()).total_seconds(),
        "# utterances": len(dialog),
        "# L utterances": sum(dialog.is_listener == True),
        "# M utterances": sum(dialog.is_listener == False),
        "avg_l_response_time": lrt.mean().total_seconds(),
        "med_l_response_time": lrt.median().total_seconds(),
        "avg_m_response_time": mrt.mean().total_seconds(),
        "med_m_response_time": mrt.median().total_seconds(),
    }

def compute_pred_stats(pred):
    return {
        "# predictions": len(pred),
        "# utterances with predictions": len(set(pred.last_utterance_index)),
    }   

def compute_click_stats(click):
    from collections import Counter
    click_example_mask = click.pred_index >= 0
    click_index = click[click_example_mask]["last_utterance_index"]
    index_counts = Counter(click_index)
    per_index_click_counts = Counter(index_counts.values())
    return {
        "# example clicks": click_example_mask.sum(),
        "# utterances with example clicks": len(set(click_index)),
        "per index click counts": per_index_click_counts,
    }

## Compute hide / show tool spans

In [5]:
def compute_toggle_spans(click, pred):
    show_spans, hide_spans = [], []
    i = 0
    shown = 1
    last_time = pred.time.min() 
    while i < len(click):
        # Find the next hide
        while i < len(click) and click.loc[i].pred_index != -1:
            i += 1
            continue
        if i == len(click): break
        shown = 0
        hide_spans.append(click.loc[i].time - last_time)
        last_time = click.loc[i].time

        # Find the next show
        while i < len(click) and click.loc[i].pred_index != -2:
            i += 1
            continue
        if i == len(click): break
        shown = 1
        show_spans.append(click.loc[i].time - last_time)
        last_time = click.loc[i].time
    if shown:
        show_spans.append(pred.time.max() - last_time)
    else:
        hide_spans.append(pred.time.max() - last_time)
    spans = {
        "show tool span": pd.Series(show_spans).sum().total_seconds() if len(show_spans) > 0 else 0,
        "hide tool span": pd.Series(hide_spans).sum().total_seconds() if len(hide_spans) > 0 else 0,
    }
    spans["tool span"] = spans["show tool span"] + spans["hide tool span"]
    return spans

# Between predictions and responses
- Text similarity
- Strategy similarity
- Time difference

In [6]:
import editdistance
def compute_text_similarity(truth, predictions):
    edit_distances = list(map(lambda p: editdistance.eval(truth, p), predictions))
    return {
        "Edit Distances": edit_distances,
        "Min Edit Distance": min(edit_distances) if len(edit_distances) > 0 else len(truth),
    }
    
def compute_dialog_pred_similarity(dialog, pred):
    scores = []
    for index, drow in dialog[dialog["is_listener"]].iterrows():
        last_prows = pred[pred["last_utterance_index"] == index - 1]

        utterance_true = drow["utterance"].replace(L_TOKEN, "")
        utterance_preds = last_prows["text"].tolist()
        score = compute_text_similarity(utterance_true, utterance_preds)

        # TODO: tag true codes to compare
        # codes_pred = last_prows["code"].tolist()
        scores.append(score)

    score_dict = {
        key: [s[key] for s in scores]
        for key in scores[0].keys()    
    }
    score_dict["Edit Distances"] = [y for x in score_dict["Edit Distances"] for y in x]  # flatten
    return score_dict

prefix = sessions["filename_prefix"][11]
dialog, click, pred = load_logs(os.path.join(log_path, prefix))
print(len(dialog), len(click), len(pred))

# To prove that the response time isn't any longer when preditions were shown or clicked
# TODO def compute_response_times_with_pred(dialog, pred):
pred_mask = pd.Series([False] * sum(dialog.is_listener))
uindex_pred = list(set(pred.last_utterance_index.tolist()))
# pred_mask[pred.last_utterance_index.astype(int)] = True
# # utterances time difference when pred is between
# l_time_pred = dialog[dialog.is_listener].time[pred_mask]
# # otherwise
# l_time_no_pred = dialog[dialog.is_listener].time[~pred_mask]
# print(l_time_pred.mean(), l_time_no_pred.mean())

31 7 53


In [7]:
def compute_inference_times(dialog, pred):
    times = []
    for index, drow in dialog.iterrows():
        last_prows = pred[pred["last_utterance_index"] == index]
        if len(last_prows) > 0:
            times.append(last_prows['time'].max() - drow['time'])
    return {'Inference Times': times}

# def compare_clickthrough(dialog, pred, click):
clicked, not_clickde = [], []
example_clicks = click[click.pred_index >= 0]
for index, drow in dialog[dialog["is_listener"]].iterrows():
    if index == 0: continue
    if index not in pred['last_utterance_index']: continue
    if index in example_clicks["last_utterance_index"]:
        dialog[index - 1]
# listener_utterances = 
# listener_utterances[]

# clicked L response times
# no clicked L response times

In [9]:
def compute_relative_stats(result):
    result[r"% of utterances with predictions"] = result["# utterances with predictions"] / result["# utterances"] * 100
    result[r"% of utterances with example clicks"] = result["# utterances with example clicks"] / result["# utterances"] * 100
    result[r"% of L utterances with example clicks"] = result["# utterances with example clicks"] / result["# L utterances"] * 100
    result[r"% tool span"] = result["tool span"] / result["span"] * 100
    result[r"% show tool span"] = result["show tool span"] / result["tool span"] * 100
    result[r"% hide tool span"] = result["hide tool span"] / result["tool span"] * 100

def compute_stats_wrapper(dialog, click, pred):
    result = {}
    result.update(compute_dialog_stats(dialog))
    result.update(compute_pred_stats(pred))
    result.update(compute_click_stats(click))
    result.update(compute_toggle_spans(click, pred))
    result.update(compute_dialog_pred_similarity(dialog, pred))
    result.update(compute_inference_times(dialog, pred))
    compute_relative_stats(result)
    return result

In [10]:
all_stats = []
for prefix in sessions["filename_prefix"]:
    dialog, click, pred = load_logs(os.path.join(log_path, prefix))
    stats = compute_stats_wrapper(dialog, click, pred)
    all_stats.append(stats)
stats_df = pd.concat([sessions, pd.DataFrame(data=all_stats)], axis=1)
stats_df.to_csv(log_path + ".csv", index=0)
stats_df

Unnamed: 0,chat_id,datetime,filename_prefix,num_utterances,span,# utterances,# L utterances,# M utterances,avg_l_response_time,med_l_response_time,...,tool span,Edit Distances,Min Edit Distance,Inference Times,% of utterances with predictions,% of utterances with example clicks,% of L utterances with example clicks,% tool span,% show tool span,% hide tool span
0,hbu,2022-11-13 18:12:02,2022-11-13_18-12-02_hbu,21,274.856804,21,11,10,17.969445,18.888637,...,231.302666,"[67, 81, 50, 109, 174, 100, 99, 94, 57, 50, 26...","[50, 46, 63, 67, 50, 109, 94, 26, 27, 42, 44]","[0 days 00:00:00.306180, 0 days 00:00:00.56921...",76.190476,0.0,0.0,84.15388,100.0,0.0
1,xpp,2022-11-13 18:26:31,2022-11-13_18-26-31_xpp,32,553.437371,32,17,15,21.854503,21.98408,...,515.898835,"[66, 84, 71, 45, 75, 55, 51, 52, 46, 29, 31, 4...","[60, 16, 66, 45, 75, 51, 46, 29, 40, 101, 20, ...","[0 days 00:00:00.579043, 0 days 00:00:00.32882...",75.0,0.0,0.0,93.217203,100.0,0.0
2,itr,2022-11-13 20:14:17,2022-11-13_20-14-17_itr,32,509.468653,32,16,16,18.281596,17.184917,...,419.845107,"[38, 37, 58, 39, 48, 45, 37, 26, 73, 73, 77, 4...","[65, 29, 37, 39, 26, 73, 42, 46, 32, 13, 43, 4...","[0 days 00:00:00.578816, 0 days 00:00:00.59205...",75.0,0.0,0.0,82.408428,100.0,0.0
3,qmd,2022-11-13 20:36:01,2022-11-13_20-36-01_qmd,30,1220.482239,30,17,13,63.430161,26.76164,...,655.131019,"[30, 19, 46, 44, 43, 52, 55, 136, 127, 0, 0, 9...","[35, 62, 71, 19, 44, 43, 0, 0, 55, 100, 40, 30...","[0 days 00:00:00.373207, 0 days 00:00:00.31383...",76.666667,13.333333,23.529412,53.678046,100.0,0.0
4,ljq,2022-11-14 14:31:04,2022-11-14_14-31-04_ljq,30,556.741366,30,17,13,25.241583,22.170505,...,534.703674,"[28, 24, 22, 0, 32, 21, 0, 60, 75, 72, 41, 46,...","[28, 29, 24, 0, 0, 60, 0, 0, 73, 55, 0, 75, 11...","[0 days 00:00:00.297234, 0 days 00:00:00.39806...",83.333333,23.333333,41.176471,96.041664,100.0,0.0
5,ugq,2022-11-14 14:44:20,2022-11-14_14-44-20_ugq,30,547.67664,30,16,14,24.575902,22.326323,...,519.454191,"[35, 36, 39, 80, 75, 64, 51, 50, 52, 37, 30, 4...","[28, 53, 35, 64, 64, 50, 30, 49, 67, 84, 32, 1...","[0 days 00:00:00.597608, 0 days 00:00:00.26994...",76.666667,0.0,0.0,94.846877,100.0,0.0
6,teb,2022-11-15 13:52:28,2022-11-15_13-52-28_teb,38,1087.081487,38,23,15,36.569737,33.243044,...,1031.17511,"[108, 0, 0, 21, 17, 60, 63, 56, 38, 41, 0, 21,...","[35, 89, 0, 0, 56, 0, 0, 0, 63, 0, 49, 0, 67, ...","[0 days 00:00:00.345997, 0 days 00:00:00.45873...",86.842105,28.947368,47.826087,94.857205,100.0,0.0
7,oev,2022-11-15 14:14:19,2022-11-15_14-14-19_oev,31,803.994797,31,14,17,33.908372,27.649079,...,736.722033,"[94, 44, 49, 44, 74, 82, 76, 78, 80, 79, 51, 2...","[40, 88, 94, 38, 44, 74, 78, 29, 99, 105, 89, ...","[0 days 00:00:00.510131, 0 days 00:00:00.37291...",80.645161,0.0,0.0,91.632687,100.0,0.0
8,dxo,2022-11-16 15:19:20,2022-11-16_15-19-20_dxo,30,875.277835,30,17,13,18.845029,14.684387,...,651.323877,"[31, 0, 37, 19, 0, 0, 56, 56, 0, 70, 46, 38, 4...","[31, 27, 29, 0, 0, 0, 0, 38, 68, 107, 87, 45, ...","[0 days 00:00:00.220464, 0 days 00:00:00.43439...",86.666667,36.666667,64.705882,74.413386,100.0,0.0
9,btv,2022-11-16 15:32:26,2022-11-16_15-32-26_btv,27,522.261547,27,12,15,25.402791,17.733492,...,492.521791,"[10, 17, 58, 31, 30, 61, 58, 49, 33, 20, 53, 4...","[31, 41, 10, 30, 49, 20, 44, 69, 118, 87, 310,...","[0 days 00:00:00.566035, 0 days 00:00:00.24949...",85.185185,0.0,0.0,94.305582,100.0,0.0


In [11]:
stats_df.columns

Index(['chat_id', 'datetime', 'filename_prefix', 'num_utterances', 'span',
       '# utterances', '# L utterances', '# M utterances',
       'avg_l_response_time', 'med_l_response_time', 'avg_m_response_time',
       'med_m_response_time', '# predictions', '# utterances with predictions',
       '# example clicks', '# utterances with example clicks',
       'per index click counts', 'show tool span', 'hide tool span',
       'tool span', 'Edit Distances', 'Min Edit Distance', 'Inference Times',
       '% of utterances with predictions',
       '% of utterances with example clicks',
       '% of L utterances with example clicks', '% tool span',
       '% show tool span', '% hide tool span'],
      dtype='object')

In [12]:
stats_df["% of utterances with predictions"].describe()

count    30.000000
mean     82.538493
std       4.681492
min      75.000000
25%      78.064516
50%      84.079392
75%      86.666667
max      87.500000
Name: % of utterances with predictions, dtype: float64

# Utterance table

In [142]:
click

Unnamed: 0,last_utterance_index,pred_index,time
0,12,-1,2022-11-18 14:31:12.198250
1,14,-2,2022-11-18 14:32:12.729224
2,18,-1,2022-11-18 14:33:35.585050
3,18,-1,2022-11-18 14:33:37.481979
4,18,-2,2022-11-18 14:33:38.332956
5,23,-2,2022-11-18 14:35:29.955236
6,23,-1,2022-11-18 14:35:31.990520


In [158]:
dialogs = []
for _, row in stats_df.iterrows():
    # row = stats_df.iloc[11]  # 13 is interesting

    dialog, click, pred = load_logs(os.path.join(log_path, row['filename_prefix']))
    dialog["chat_id"] = row["chat_id"]
    dialog["response_time"] = compute_response_times(dialog, [True] * len(dialog))
    dialog["utterance"] = dialog["utterance"].apply(lambda x: x.replace(C_TOKEN, "").replace(L_TOKEN, ""))

    # Label if CARE is "not hidden" right before this utterance is sent
    INPUT_LEN = 5
    cur_state = SHOW
    is_tool_hidden = [pd.NA] * INPUT_LEN + [False] * (len(dialog) - INPUT_LEN)
    c = len(click) - 1
    for d in range(len(dialog) - 1, -1, -1):
        # Find latest show/hide click before this utterance
        while c >= 0 and (click["time"].iloc[c] > dialog["time"].iloc[d] or click["pred_index"].iloc[c] >= 0):
            c -= 1
        if c < 0: break
        # Update all utterances after this click
        cur_state = True if click["pred_index"].iloc[c] == HIDE else False
        while d >= 0 and (dialog["time"].iloc[d] > click["time"].iloc[c]):
            is_tool_hidden[d] = cur_state
            d -= 1
    # Only chat 11, 25 have non-0 is_tool_hidden
    dialog["is_tool_hidden"] = is_tool_hidden

    # Mark the predictions right before this utterance
    preds = [[]] + [pred.loc[pred["last_utterance_index"] == (i - 1), "text"].tolist() for i in range(1, len(dialog))]
    dialog["num_preds"] = [len(x) for x in preds]
    dialog["preds"] = preds

    # Mark the last clicked prediction right before this utterance
    clicks = [click.loc[click["last_utterance_index"] == (i - 1), "pred_index"].tolist() for i in range(1, len(dialog) + 1)]
    pred_clicks = [[y for y in x if y >= 0] for x in clicks]
    dialog["num_pred_clicks"] = [len(x) for x in pred_clicks]

    last_clicks = [x[-1] if len(x) > 0 else None for x in pred_clicks]
    last_clicked_pred = [(dialog["preds"][i][last_clicks[i]] if last_clicks[i] != None else "") for i in range(len(dialog))]
    dialog["last_clicked_pred"] = last_clicked_pred

    dialogs.append(dialog)

utterance_df = pd.concat(dialogs, ignore_index=True)

utterance_df["response_time_s"] = utterance_df["response_time"].apply(lambda x: pd.Timedelta(x).total_seconds())
utterance_df = utterance_df[[x for x in utterance_df.columns if x not in ["user_id", "time", "response_time",]]]

utterance_df.to_csv(log_path + "_utter.csv", index=0)

In [83]:
pred.columns

Index(['code', 'score', 'last_utterance_index', 'pred_index', 'text', 'time'], dtype='object')