In [None]:
import ast
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_curve, auc, ConfusionMatrixDisplay, precision_recall_curve
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
from format import format_table, rename_detectors
import re

In [None]:
data_path = '../../data'
result_path = '../../data/results'
detectors_with_probs_numpy = [
    'gpt2-finetuned-en3-all',
    'electra-small-discriminator-finetuned-en3-all',
    'electra-large-discriminator-finetuned-en3-all',
    'bert-base-multilingual-cased-finetuned-en3-all',
    'roberta-large-openai-detector-finetuned-en3-all',
    'xlm-roberta-large-finetuned-en3-all',
    'mdeberta-v3-base-finetuned-en3-all',
    'gpt2-medium-finetuned-en3-all',
    'mGPT-finetuned-en3-all',
    'opt-iml-max-1.3b-finetuned-en3-all',
    'simpleai-detector',
    'electra-large-discriminator-finetuned-en3-gpt-3.5-turbo',
    'electra-large-discriminator-finetuned-en3-opt-iml-max-1.3b',
    'electra-large-discriminator-finetuned-en3-text-davinci-003',
    'electra-large-discriminator-finetuned-en3-vicuna-13b',
    'electra-large-discriminator-finetuned-en3-gpt-4'
]

detectors_with_probs = [
    'roberta-large-openai-detector',
    'grover',
    'llmdet',
    'zerogpt',
    'gptzero'
]

detectors_without_probs = [
    'gltr',
    'longformer',
]

detectors = detectors_with_probs_numpy + detectors_with_probs + detectors_without_probs

original = 'mgt_detection.csv'

In [None]:
original_df = pd.read_csv(f'{data_path}/{original}')

In [None]:
def change_label(row):
    if row == 'human':
        return 0
    else:
        return 1
    

def get_index(row, df):
    if row != row:
        index = df.index[df['Generation'].isnull()][0]
    else:
        index = df.index[df['Generation'] == row][0]
    return index


def roc_graph(fpr, tpr, roc_auc, detector):
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (ROC) Curve: {detector}')
    plt.legend(loc="lower right")
    plt.show()


def cm_graph(cm, detector):
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap=plt.cm.Blues)
    disp.ax_.set_title(f'Detector: {detector}')
    plt.show()

In [None]:
# Load results for each detector

results = dict()

for detector in detectors:
    print(detector)
    df = pd.read_csv(f'{result_path}/{detector}.csv')
    # df.dropna(inplace=True)
    df['index'] = df['Generation'].apply(lambda x: get_index(x, original_df))
    df.drop(columns=['Unnamed: 0'], inplace=True)
    df.sort_values(by=['index'], inplace=True)
    df['label'] = df['pred'].apply(change_label)
    if detector in detectors_with_probs_numpy:
        df['probabilities'] = df['probabilities'].apply(lambda x: [float(x) for x in x[1:-1].split()])

    if detector in detectors_with_probs:
        if detector == 'gptzero':
            df['probabilities'] = df['probabilities'].apply(lambda x: ast.literal_eval(x)[0])
        else:
            df['probabilities'] = df['probabilities'].apply(lambda x: ast.literal_eval(x))

    results[detector] = df

In [None]:
original_df['true_label'] = original_df['label'].apply(change_label)

In [None]:
def get_metrics(true_labels, predicted, predicted_probs, detector_name, threshold=0.5, visualize=True):
    if predicted is None:
        predicted = [1 if x >= threshold else 0 for x in predicted_probs]


    macro_precision = precision_score(true_labels, predicted, average='macro')
    macro_recall = recall_score(true_labels, predicted, average='macro')
    macro_f1 = f1_score(true_labels, predicted, average='macro')

    if predicted_probs:
        fpr, tpr, _ = roc_curve(true_labels, predicted_probs)
        roc_auc = auc(fpr, tpr)
        if visualize:
            roc_graph(fpr, tpr, roc_auc, detector_name)
            
    data = {
        'Macro Precision': [macro_precision],
        'Macro Recall': [macro_recall],
        'Macro F1-score': [macro_f1],
        'AUC': [roc_auc] if predicted_probs else [np.nan],
    }

    if visualize:
        cm_graph(confusion_matrix(true_labels, predicted, labels=[0, 1]), detector_name)

    return data

In [None]:
def get_best_threshold(df, detectors):
    best_thresholds = []
    pr_best_thresholds = []

    for detector in detectors:
        fpr, tpr, thresholds = roc_curve(list(df['true_label']), list(df[detector]))
        _, _, thresholds_pr = precision_recall_curve(list(df['true_label']), list(df[detector]))
        f1_scores = [f1_score(list(df['true_label']), list(df[detector]) >= threshold) for threshold in thresholds_pr]
        youdene_j = tpr - fpr
        best_threshold_idx = np.argmax(youdene_j)
        best_threshold = thresholds[best_threshold_idx]
        best_thresholds.append(best_threshold)
        pr_best_thresholds.append(thresholds_pr[np.argmax(f1_scores)])

    average_best_threshold_roc = np.mean(best_thresholds)
    average_best_threshold_pr = np.mean(pr_best_thresholds)
    return average_best_threshold_roc, average_best_threshold_pr, best_thresholds, pr_best_thresholds      

In [None]:
for detector in detectors:
    if detector in detectors_without_probs:
        original_df[detector] = list(results[detector]['label'])
    else:
        probs = list(results[detector]['probabilities'].apply(lambda x: x[1]))
        original_df[detector] = probs

In [None]:
def get_results(df, thresholds=0.5, visualize=True):
    final_df = pd.DataFrame()
    list_threshold = False

    if type(thresholds) == list:
        list_threshold = True

    for detector_idx, detector in enumerate(detectors):
        threshold = 0.5
        
        if detector in detectors_without_probs:
            metrics = get_metrics(list(df['true_label']), list(df[detector]), None, detector, visualize=visualize)
        else:
            if list_threshold:
                threshold = thresholds[detector_idx]
            else:
                threshold = thresholds
            metrics = get_metrics(list(df['true_label']), None, list(df[detector]), detector, threshold, visualize=visualize)

        final_df = pd.concat([
            final_df,
            pd.DataFrame(
                {
                    'detector': [detector],
                    'threshold': [threshold],
                    **metrics,
                }
            ).set_index('detector')    
        ])
    
    return final_df

In [None]:
best_threshold, best_threshold_pr, best_thresholds, best_thresholds_pr = get_best_threshold(original_df, detectors_with_probs_numpy + detectors_with_probs)

## Results for the threshold 0.5

In [None]:
get_results(original_df, visualize=False)

## Results for the global best threshold based on ROC curve

In [None]:
roc_df = get_results(original_df, best_thresholds, visualize=False)

In [None]:
roc_df[['threshold', 'Macro Precision', 'Macro Recall', 'Macro F1-score', 'AUC']].round(2)
def bootstrap_metrics(y_pred, y_true, threshold=0.5, calc_auc=True):
    """
    Bootstrapping based estimate.

    Return mean and confidence interval (lower and upper bound)
    """

    auc_scores = []
    macro_f1_scores = []
    macro_precision_scores = []
    macro_recall_scores = []

    for i in range(1000):
        idx = np.random.choice(len(y_pred), len(y_pred), replace=True)
        y_pred_sample = y_pred[idx]
        y_true_sample = y_true[idx]

        if calc_auc:
            fpr, tpr, _ = roc_curve(y_true_sample, y_pred_sample)
            auc_scores.append(auc(fpr, tpr))
        macro_f1_scores.append(f1_score(y_true_sample, y_pred_sample >= threshold, average='macro'))
        macro_precision_scores.append(precision_score(y_true_sample, y_pred_sample >= threshold, average='macro'))
        macro_recall_scores.append(recall_score(y_true_sample, y_pred_sample >= threshold, average='macro'))

    if calc_auc:
        auc_mean = np.mean(auc_scores)
        std_auc = np.std(auc_scores)
        auc_scores = np.array(auc_scores)
        auc_ci = f'{auc_mean:.3f} +- {1.96 * std_auc:.3f}'
    else:
        auc_ci = f'N/A'

    macro_f1_mean = np.mean(macro_f1_scores)
    macro_precision_mean = np.mean(macro_precision_scores)
    macro_recall_mean = np.mean(macro_recall_scores)

    std_macro_f1 = np.std(macro_f1_scores)
    std_macro_precision = np.std(macro_precision_scores)
    std_macro_recall = np.std(macro_recall_scores)

    macro_f1_scores = np.array(macro_f1_scores)
    macro_precision_scores = np.array(macro_precision_scores)
    macro_recall_scores = np.array(macro_recall_scores)

    # express 95% CI as one number with +- sign
    macro_f1_ci = f'{macro_f1_mean:.3f} +- {1.96 * std_macro_f1:.3f}'
    macro_precision_ci = f'{macro_precision_mean:.3f} +- {1.96 * std_macro_precision:.3f}'
    macro_recall_ci = f'{macro_recall_mean:.3f} +- {1.96 * std_macro_recall:.3f}'

    return {
        'AUC': auc_ci,
        'Macro F1-score': macro_f1_ci,
        'Macro Precision': macro_precision_ci,
        'Macro Recall': macro_recall_ci,
    }


In [None]:
# create dataframe with CI for each metrics and for each detectors
bootstrap_df = pd.DataFrame()

for idx, detector in enumerate(detectors):
    if detector in detectors_without_probs:
        metrics = bootstrap_metrics(original_df[detector], original_df['true_label'], calc_auc=False)
        bootstrap_df = pd.concat([
            bootstrap_df,
            pd.DataFrame(
                {
                    'detector': [detector],
                    'AUC': np.nan,
                    'Macro F1-score': str(metrics['Macro F1-score']),
                    'Macro Precision': str(metrics['Macro Precision']),
                }
            ).set_index('detector')    
        ])
    else:
        metrics = bootstrap_metrics(original_df[detector], original_df['true_label'], best_thresholds[idx])
        bootstrap_df = pd.concat([
            bootstrap_df,
            pd.DataFrame(
                {
                    'detector': [detector],
                    'AUC': str(metrics['AUC']),
                    'Macro F1-score': str(metrics['Macro F1-score']),
                    'Macro Precision': str(metrics['Macro Precision']),
                }
            ).set_index('detector')    
        ])

In [None]:
print(rename_detectors(bootstrap_df).to_latex())

In [None]:
print(format_table(rename_detectors(roc_df[['threshold', 'Macro Precision', 'Macro Recall', 'Macro F1-score', 'AUC']]).T, axis=1, rounding=2).T.to_latex())

## Results for the global best threshold based on Precision-Recall curve

In [None]:
get_results(original_df, best_thresholds_pr, visualize=False)

## Results based on the model

In [None]:
models = [
    'gpt-3.5-turbo',
    'text-davinci-003',
    'text-curie-001',
    'text-babbage-001',
    'falcon-40b-instruct',
    'opt-iml-max-30b',
    'vicuna-33b-v1.3',
    'Llama-2-70b-chat-hf',
    'Mistral-7B-Instruct-v0.1',
    'gpt-4'
]

In [None]:
generation_df = pd.read_csv('../../data/data.csv')

In [None]:
generation_df

In [None]:
original_df['model'] = ''
original_df['brief'] = ''
original_df['narrative'] = ''

for index, row in original_df.iterrows():
    if row['label'] == 'human':
        original_df.at[index, 'model'] = 'human'
        continue
    idx = generation_df.index[generation_df['generated_text'] == row['Generation']]
    if idx.size > 0:
        idx = idx[0]
        model_value = generation_df.loc[idx, 'model']
        brief = generation_df.loc[idx, 'brief']
        original_df.at[index, 'model'] = model_value
        original_df.at[index, 'brief'] = brief
        original_df.at[index, 'narrative'] = generation_df.loc[idx, 'narrative_idx']
    

In [None]:
def get_results_model(df, thresholds=0.5):
    metrics_df = pd.DataFrame()
    list_threshold = False

    if type(thresholds) == list:
        list_threshold = True
    
    for detector_idx, detector in enumerate(detectors):
        if list_threshold:
            if detector_idx >= len(thresholds):
                threshold = 0.5
            else:
                threshold = thresholds[detector_idx]
        else:
            threshold = thresholds
            
        metrics = get_metrics(list(df['true_label']), None, list(df[detector]), detector, visualize=False, threshold=threshold)
        metrics_df = pd.concat([
            metrics_df,
            pd.DataFrame(
                {
                    'detector': [detector],
                    'threshold': [threshold],
                    **metrics,
                }
            ).set_index('detector')    
        ])
    
    return metrics_df

In [None]:
models_df = pd.DataFrame(index=detectors, columns=models)

In [None]:
for model in models:
    model_df = format_table(get_results_model(original_df[(original_df['model'] == model) | (original_df['model'] == 'human')]).T, rounding=2).T
    for detector in detectors:
        f1, roc_auc = model_df.loc[detector, 'Macro F1-score'], model_df.loc[detector, 'AUC']
        models_df.loc[detector, model] = f'{f1} / {roc_auc}'

In [None]:
models_df

In [None]:
def rename_columns(df):
    df.rename(columns={
        'gpt-3.5-turbo': '\\textbf{ChatGPT}',
        'text-davinci-003': '\\textbf{GPT-3 Davinci}',
        'text-curie-001': '\\textbf{GPT-3 Curie}',
        'text-babbage-001': '\\textbf{GPT-3 Babbage}',
        'falcon-40b-instruct': '\\textbf{Falcon}',
        'opt-iml-max-30b': '\\textbf{OPT-IML-Max}',
        'vicuna-33b-v1.3': '\\textbf{Vicuna}',
        'Llama-2-70b-chat-hf': '\\textbf{Llama2}',
        'Mistral-7B-Instruct-v0.1': '\\textbf{Mistral}',
        'gpt-4': '\\textbf{GPT-4}',
    }, inplace=True)
    return df

In [None]:
print(rename_columns(rename_detectors(models_df)).to_latex())

In [None]:
models_df = pd.DataFrame(index=detectors, columns=models)

In [None]:
for model in models:
    model_df = format_table(get_results_model(original_df[(original_df['model'] == model) | (original_df['model'] == 'human')], thresholds=best_thresholds).T, rounding=2).T

    for detector in detectors:
        f1, roc_auc = model_df.loc[detector, 'Macro F1-score'], model_df.loc[detector, 'AUC']
        models_df.loc[detector, model] = f'{f1} / {roc_auc}'

In [None]:
models_df

In [None]:
print(re.sub(' +', ' ', rename_columns(rename_detectors(models_df)).to_latex()))