In [40]:
import os
import pandas as pd
from datetime import datetime, timedelta

log_path = "/home/ubuntu/models/test_flask_outputs"
DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S"
DATE_MICROSEC_FORMAT = "%Y-%m-%d_%H-%M-%S.%f"
DIALOG_COLUMNS = ['user_id', 'is_listener', 'utterance', 'time']
PRED_COLUMNS = ['code', 'score', 'last_utterance_index', 'pred_index', 'text', 'time']
CLICK_COLUMNS = ['last_utterance_index', 'pred_index', 'time']

def f_optional_strptime(dtstr):
    format = DATE_MICROSEC_FORMAT if "." in dtstr else DATETIME_FORMAT
    return datetime.strptime(dtstr, format)

def parse_df_time(df):
    df["time"] = df["time"].apply(f_optional_strptime)

In [41]:
data = {"chat_id":[], "date_time":[]}
filenames = os.listdir(log_path)
unique_datetime = set()
for x in filenames:
    x_split = x.split('_')
    chat_id = x_split[2]
    date_time = f_optional_strptime('_'.join(x_split[:2]))
    
    if date_time in unique_datetime:
        continue
    unique_datetime.add(date_time)

    data["chat_id"].append(chat_id)
    data["date_time"].append(date_time)
sessions = pd.DataFrame(data=data)
sessions.sort_values("date_time", inplace=True)
sessions.reset_index(drop=True, inplace=True)
print("Number of sessions:", len(sessions))
sessions.head()

Number of sessions: 39


Unnamed: 0,chat_id,date_time
0,mock,2022-10-30 21:21:11
1,mock,2022-10-30 21:21:12
2,mock,2022-11-02 14:23:00
3,mock,2022-11-02 14:43:38
4,mock,2022-11-02 19:40:24


In [43]:
# See dialogues one at a time
for i, row in sessions.iterrows():
    fname = "{}_{}_dialog.csv".format(datetime.strftime(row.date_time, DATETIME_FORMAT), row.chat_id)
    dialog_df = pd.read_csv(os.path.join(log_path, fname), header=0)
    dialog_df = dialog_df[DIALOG_COLUMNS]
sessions.head()

Unnamed: 0,chat_id,date_time
0,mock,2022-10-30 21:21:11
1,mock,2022-10-30 21:21:12
2,mock,2022-11-02 14:23:00
3,mock,2022-11-02 14:43:38
4,mock,2022-11-02 19:40:24


In [95]:
def get_logs(path_prefix):
    dialog = pd.read_csv(path_prefix + "dialog.csv", header=0)[DIALOG_COLUMNS]
    click = pd.read_csv(path_prefix + "click.csv", header=0) #[CLICK_COLUMNS]
    pred = pd.read_csv(path_prefix + "pred.csv", header=0) #[PRED_COLUMNS]

    for df in [dialog, click, pred]:
        if "time" in df.columns:
            parse_df_time(df)
    return dialog, click, pred

In [99]:
# response times
def compute_response_times(dialog, mask):
    return dialog[mask].time - dialog[mask].time.shift(1)

def compute_stats(dialog, click):
    lrt = compute_response_times(dialog, dialog.is_listener == True)
    mrt = compute_response_times(dialog, dialog.is_listener == False)
    return {
        "span": dialog.time.max() - dialog.time.min(),
        "# L utterances": sum(dialog.is_listener == True),
        "# M utterances": sum(dialog.is_listener == False),
        "# clicks": len(click),
        "avg_l_response_time": lrt.mean(),
        "med_l_response_time": lrt.median(),
        "avg_m_response_time": mrt.mean(),
        "med_m_response_time": mrt.median(),
    }

In [100]:
path_prefix = "/home/ubuntu/models/test_flask_outputs/2022-11-02_20-30-22_mock_" # Qianyi chat 1

dialog, click, pred = get_logs(path_prefix)
compute_stats(dialog, click)

{'span': Timedelta('0 days 00:11:26'),
 '# L utterances': 7,
 '# M utterances': 7,
 '# clicks': 7,
 'avg_l_response_time': Timedelta('0 days 00:01:51.500000'),
 'med_l_response_time': Timedelta('0 days 00:01:38.500000'),
 'avg_m_response_time': Timedelta('0 days 00:01:35'),
 'med_m_response_time': Timedelta('0 days 00:01:17')}

In [101]:
path_prefix = "/home/ubuntu/models/test_flask_outputs/2022-11-02_20-41-53_mock_"  # Qianyi chat 2

dialog, click, pred = get_logs(path_prefix)
compute_stats(dialog, click)

{'span': Timedelta('0 days 00:05:35'),
 '# L utterances': 9,
 '# M utterances': 9,
 '# clicks': 0,
 'avg_l_response_time': Timedelta('0 days 00:00:41.875000'),
 'med_l_response_time': Timedelta('0 days 00:00:44.500000'),
 'avg_m_response_time': Timedelta('0 days 00:00:39.625000'),
 'med_m_response_time': Timedelta('0 days 00:00:37')}

In [102]:
print("Client's utterances:")
for x in dialog.utterance[dialog.is_listener == False]:
    print(x.replace("<|client|>", ""))

Client's utterances:
Hi
i'll take the exam for medical school admission next month
but I don't think I can get into any at all
what school on earth will admit me anyways??
yeah definitely--I don't have time to enjoy my college life either
study, study, and study
no friends no life. I can't stand with it anymore
i have no choice. If I don't get into a med school, I don't know what my value is
i have claimed that I'll be a surgeon literally to everyone
