In [27]:
import numpy as np
import pandas as pd
import random
from convokit import Corpus, download

In [28]:
corpus_name = "cmv"

In [29]:
label_metadata = "conversation_has_personal_attack" if corpus_name == "wikiconv" else "has_removed_comment"

In [30]:
if corpus_name == "wikiconv":
    corpus = Corpus(filename=download("conversations-gone-awry-corpus"))
elif corpus_name == "cmv":
    corpus = Corpus(filename=download("conversations-gone-awry-cmv-corpus"))

Dataset already exists at /home/jonathan/.convokit/downloads/conversations-gone-awry-cmv-corpus


In [31]:
# compute the probability of randomly predicting derailment as the utterance-level
# class probability, given as (# utts followed by a toxic comment / # of utts)
n_utt_adj = 1 if corpus_name == "wikiconv" else 0 # subtract 2 from convo length in wikiconv since the toxic comment and section header are included but not counted
n_utts = np.sum([len(convo.get_utterance_ids())-n_utt_adj for convo in corpus.iter_conversations(lambda c: c.meta['split'] == "train")])
n_awry = len([c for c in corpus.iter_conversations(lambda c: (c.meta['split'] == "train" and c.meta[label_metadata]))]) # 1 toxic utt per awry convo
p_awry = n_awry / n_utts

In [32]:
p_awry

0.07931234305582384

In [33]:
random.seed(2024)
for convo in corpus.iter_conversations():
    # only consider test set conversations (we did not make predictions for the other ones)
    if convo.meta['split'] == "test":
        for utt in convo.iter_utterances():
            # in wikiconv, skip section header and actual toxic comment
            if corpus_name == "wikiconv" and (utt.meta['is_section_header'] or utt.meta['comment_has_personal_attack']):
                continue
            coin = random.random()
            utt.meta["forecast"] = int(coin < p_awry)

In [34]:
conversational_forecasts_df = {
    "convo_id": [],
    "label": [],
    "prediction": []
}

for convo in corpus.iter_conversations():
    if convo.meta['split'] == "test":
        conversational_forecasts_df['convo_id'].append(convo.id)
        conversational_forecasts_df['label'].append(int(convo.meta[label_metadata]))
        conversational_forecasts_df['prediction'].append(np.max([u.meta.get('forecast', -1) for u in convo.iter_utterances()]))

conversational_forecasts_df = pd.DataFrame(conversational_forecasts_df).set_index("convo_id")
print((conversational_forecasts_df.label == conversational_forecasts_df.prediction).mean())

0.506578947368421


In [35]:
# in addition to accuracy, we can also consider applying other metrics at the conversation level, such as precision/recall
def get_pr_stats(preds, labels):
    tp = ((labels==1)&(preds==1)).sum()
    fp = ((labels==0)&(preds==1)).sum()
    tn = ((labels==0)&(preds==0)).sum()
    fn = ((labels==1)&(preds==0)).sum()
    print("Precision = {0:.4f}, recall = {1:.4f}".format(tp / (tp + fp), tp / (tp + fn)))
    print("False positive rate =", fp / (fp + tn))
    print("F1 =", 2 / (((tp + fp) / tp) + ((tp + fn) / tp)))

get_pr_stats(conversational_forecasts_df.prediction, conversational_forecasts_df.label)

Precision = 0.5093, recall = 0.3596
False positive rate = 0.34649122807017546
F1 = 0.42159383033419023
