In [1]:
import json
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import jsonlines
import altair as alt
from vega_datasets import data



os.chdir("../")

SRC_PATH = ["src"]
for module_path in SRC_PATH:
    if module_path not in sys.path:
        sys.path.append(module_path)

from utils import *

# Experiment 2

## Full finetuning results

In [2]:
dataset_names = ["phi", "gemma", "mistral", "gemma_chat", "zephyr", "llama3", "round_robin"]
training_method = "full_finetuning"
trained_on_models = {"distil_roberta-base": {"10_06_1040": "phi", "10_06_1047": "gemma", "10_06_1054": "mistral", "10_06_1100": "round_robin"},
                    "roberta_large": {"10_06_1156": "phi", "10_06_1221": "gemma", "10_06_1246": "mistral", "10_06_1312": "round_robin"},
                    "electra_large": {"10_06_1146": "phi", "10_06_1215": "gemma", "10_06_1242": "mistral", "10_06_1308": "round_robin"}}


freeze_base_df = create_df_from_test_logs("full_finetuning", trained_on_models, dataset_names, use_test_at_threshold=True)
#heatmap_from_df(freeze_base_df, "accuracy")

In [3]:
freeze_base_df = freeze_base_df.sort_values(by="trained_on_dataset")
dataset_order = ["phi", "gemma", "mistral", "round_robin", "gemma_chat", "zephyr", "llama3"]
freeze_base_df = freeze_base_df.set_index("dataset").loc[dataset_order].reset_index()
detector_name_to_short_name = {"distil_roberta-base": "distil", "roberta_large": "roberta", "electra_large": "electra"}
freeze_base_df["detector_short_name"] = freeze_base_df["base_detector"].apply(lambda x: detector_name_to_short_name[x])
# set detector_name as f"{detector_short_name}_{trained_on_dataset}"
freeze_base_df["detector_name"] = freeze_base_df["detector_short_name"] + "_" + freeze_base_df["trained_on_dataset"]
freeze_base_df.head()

Unnamed: 0,dataset,accuracy,precision,recall,f1_score,fp_rate,tp_rate,std_accuracy,std_precision,std_recall,...,std_tp_rate_at_given_threshold,TP_at_given_threshold,TN_at_given_threshold,FP_at_given_threshold,FN_at_given_threshold,base_detector,trained_on_dataset,detector,detector_short_name,detector_name
0,phi,0.94657,0.916307,0.982998,0.948458,0.089908,0.982998,0.004918,0.00836,0.004118,...,0.006015,957.251,946.022,50.853,37.874,roberta_large,gemma,roberta_large_gemma,roberta,roberta_gemma
1,phi,0.964892,0.963227,0.966731,0.964959,0.036942,0.966731,0.004012,0.005942,0.005582,...,0.004729,973.021,947.908,48.967,22.104,electra_large,gemma,electra_large_gemma,electra,electra_gemma
2,phi,0.941804,0.925778,0.960711,0.942895,0.077127,0.960711,0.005395,0.008341,0.00619,...,0.007422,942.343,941.884,54.991,52.782,distil_roberta-base,gemma,distil_roberta-base_gemma,distil,distil_gemma
3,phi,0.966051,0.956237,0.976857,0.96642,0.044763,0.976857,0.003977,0.006481,0.004665,...,0.004707,972.198,952.766,44.109,22.927,electra_large,mistral,electra_large_mistral,electra,electra_mistral
4,phi,0.951109,0.917786,0.991064,0.952998,0.088907,0.991064,0.004701,0.008208,0.002856,...,0.004597,973.233,947.736,49.139,21.892,roberta_large,mistral,roberta_large_mistral,roberta,roberta_mistral


In [4]:
freeze_base_df.head(20)

Unnamed: 0,dataset,accuracy,precision,recall,f1_score,fp_rate,tp_rate,std_accuracy,std_precision,std_recall,...,std_tp_rate_at_given_threshold,TP_at_given_threshold,TN_at_given_threshold,FP_at_given_threshold,FN_at_given_threshold,base_detector,trained_on_dataset,detector,detector_short_name,detector_name
0,phi,0.94657,0.916307,0.982998,0.948458,0.089908,0.982998,0.004918,0.00836,0.004118,...,0.006015,957.251,946.022,50.853,37.874,roberta_large,gemma,roberta_large_gemma,roberta,roberta_gemma
1,phi,0.964892,0.963227,0.966731,0.964959,0.036942,0.966731,0.004012,0.005942,0.005582,...,0.004729,973.021,947.908,48.967,22.104,electra_large,gemma,electra_large_gemma,electra,electra_gemma
2,phi,0.941804,0.925778,0.960711,0.942895,0.077127,0.960711,0.005395,0.008341,0.00619,...,0.007422,942.343,941.884,54.991,52.782,distil_roberta-base,gemma,distil_roberta-base_gemma,distil,distil_gemma
3,phi,0.966051,0.956237,0.976857,0.96642,0.044763,0.976857,0.003977,0.006481,0.004665,...,0.004707,972.198,952.766,44.109,22.927,electra_large,mistral,electra_large_mistral,electra,electra_mistral
4,phi,0.951109,0.917786,0.991064,0.952998,0.088907,0.991064,0.004701,0.008208,0.002856,...,0.004597,973.233,947.736,49.139,21.892,roberta_large,mistral,roberta_large_mistral,roberta,roberta_mistral
5,phi,0.951515,0.926453,0.980971,0.952911,0.077973,0.980971,0.00483,0.008062,0.004413,...,0.00487,970.144,942.708,54.167,24.981,distil_roberta-base,mistral,distil_roberta-base_mistral,distil,distil_mistral
6,phi,0.95905,0.940534,0.98013,0.959906,0.062069,0.98013,0.004398,0.007162,0.004412,...,0.005231,967.185,947.394,49.481,27.94,roberta_large,phi,roberta_large_phi,roberta,roberta_phi
7,phi,0.985048,0.980324,0.989984,0.985123,0.019895,0.989984,0.002638,0.004324,0.003091,...,0.001954,991.116,954.779,42.096,4.009,electra_large,phi,electra_large_phi,electra,electra_phi
8,phi,0.960386,0.937189,0.986981,0.961423,0.066249,0.986981,0.004487,0.007587,0.003589,...,0.003911,978.011,944.042,52.833,17.114,distil_roberta-base,phi,distil_roberta-base_phi,distil,distil_phi
9,phi,0.948344,0.929801,0.969998,0.949451,0.073354,0.969998,0.004854,0.007748,0.005478,...,0.006749,946.937,952.093,44.782,48.188,roberta_large,round_robin,roberta_large_round_robin,roberta,roberta_round_robin


In [5]:
# rename column "tp_rate_at_given_threshold"
freeze_base_df = freeze_base_df.rename(columns={"tp_rate_at_given_threshold": "TPR"})

In [6]:
freeze_base_df["TPR"]

0     0.961937
1     0.977788
2     0.946948
3     0.976954
4     0.977997
        ...   
79    0.952824
80    0.978060
81    0.968573
82    0.902104
83    0.993079
Name: TPR, Length: 84, dtype: float64

### No cross model

In [7]:
# only keep entries where trained_on_dataset == dataset
no_cross_model_df = freeze_base_df[freeze_base_df["trained_on_dataset"] == freeze_base_df["dataset"]]

heatmap = alt.Chart(no_cross_model_df).mark_rect().encode(
    alt.Y('dataset:N', sort=None),
    #alt.Y('detector_short_name:N', sort=None, title="Detector"),
    alt.Color('TPR:Q').scale(scheme='redyellowgreen', domain=(0.90, 1)),
    #alt.Row("trained_on_dataset:N", title="Dataset used for training"),
).properties(
    width=100,
    height=200
)

heatmap_text = alt.Chart(no_cross_model_df).mark_text(baseline='middle').encode(
    #alt.X('dataset:N', sort=None, title="Dataset used for training and testing"),
    alt.Y('dataset:N', sort=None),
    #alt.Y('detector_short_name:N', sort=None, title="Detector"),
    text='TPR:Q',
    color=alt.condition(
        alt.datum.TPR > 0.5,
        alt.value('black'),
        alt.value('white')
    )
).properties(
    width=100,
    height=200
)

chart = alt.layer(heatmap, heatmap_text).facet(
    column=alt.Column("detector_short_name:N", title="Detector")
).configure(
    numberFormat='0.2f'
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
)
chart.save("notebooks/plots/heatmap_no_cross_llm.png")
chart

In [8]:
# same as above but with bar chart
bar_chart = alt.Chart(no_cross_model_df).mark_bar().encode(
    #alt.X('detector_short_name:N', sort=None, title="Detector"),
    alt.X('detector_short_name:N', sort=None, title=None),
    alt.Y('TPR:Q').scale(alt.Scale(domain=(0.85, 1), clamp=True)),
    #alt.Y('accuracy:Q'),
    alt.Color("detector_short_name:N", title="Detector"),
    column=alt.Column("dataset:N", title="Train and Test dataset")
).properties(
    width=100,
    height=200,  
).configure_axis(
    labelFontSize=18,
    titleFontSize=18,
).configure_legend(
    labelFontSize=18,
    titleFontSize=18,
    titleLimit=0
).configure_header(
    titleFontSize=18,
    labelFontSize=18
)
bar_chart.save("notebooks/plots/heatmap_no_cross_llm_bar.png")
bar_chart

### Cross model

In [9]:
chat_models = ["gemma_chat", "zephyr", "llama3"]
no_chat_df = freeze_base_df[freeze_base_df["dataset"].apply(lambda x: x not in chat_models)]
# select electra only
no_chat_df = no_chat_df[no_chat_df["detector_short_name"] == "electra"]

heatmap = alt.Chart(no_chat_df).mark_rect().encode(
    alt.X('dataset:N', sort="x", title="Tested on"),
    alt.Y('trained_on_dataset:N', sort="y", title="Trained on"),
    alt.Color('TPR:Q').scale(scheme='redyellowgreen', domain=[0.85, 1]),
    #alt.Row("trained_on_dataset:N", title="Dataset used for training"),
).properties(
    width=300,
    height=300
)

heatmap_text = alt.Chart(no_chat_df).mark_text(baseline='middle').encode(
    alt.X('dataset:N', sort="x", title="Tested on"),
    alt.Y('trained_on_dataset:N', sort="y", title="Trained on"),
    text='TPR:Q',
    color=alt.condition(
        alt.datum.accuracy > 0.5,
        alt.value('black'),
        alt.value('white')
    )
).properties(
    width=300,
    height=300
)

heatmap_cross = alt.layer(heatmap, heatmap_text).facet(
    #column=alt.Column("trained_on_dataset:N", title="Dataset used for training")
    column=alt.Column("detector_short_name:N", title="Detector")
).configure(
    numberFormat='0.2f'
).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).configure_legend(
    labelFontSize=18,
    titleFontSize=18,
    titleLimit=0
).configure_header(
    titleFontSize=18,
    labelFontSize=18
)

heatmap_cross.save("notebooks/plots/heatmap_cross_llm.png")
heatmap_cross

### Chat Models only

In [10]:
chat_models = ["gemma_chat", "zephyr", "llama3"]
chat_only_df = freeze_base_df[freeze_base_df["dataset"].apply(lambda x: x in chat_models)]
#chat_only_phi_df = chat_only_df[chat_only_df["trained_on_dataset"] == "phi"]
chat_only_electra_df = chat_only_df[chat_only_df["detector_short_name"] == "electra"]

heatmap = alt.Chart(chat_only_electra_df).mark_rect().encode(
    alt.X('dataset:N', sort="x", title="Tested on"),
    alt.Y('trained_on_dataset:N', sort=None, title="Trained on"),
    alt.Color('TPR:Q').scale(scheme='redyellowgreen', domain=[0.85, 1]),
    #alt.Row("trained_on_dataset:N", title="Dataset used for training"),
).properties(
    width=300,
    height=300
)

heatmap_text = alt.Chart(chat_only_electra_df).mark_text(baseline='middle').encode(
    alt.X('dataset:N', sort="x", title="Tested on"),
    alt.Y('trained_on_dataset:N', sort=None, title="Trained on"),
    text='TPR:Q',
    color=alt.condition(
        alt.datum.accuracy > 0.5,
        alt.value('black'),
        alt.value('white')
    )
).properties(
    width=300,
    height=300
)

chart = alt.layer(heatmap, heatmap_text).facet(
    column=alt.Column("detector_short_name:N", title="Detector")
).configure(
    numberFormat='0.2f'
).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).configure_legend(
    labelFontSize=18,
    titleFontSize=18,
    titleLimit=0
).configure_header(
    titleFontSize=18,
    labelFontSize=18
)

chart.save("notebooks/plots/heatmap_chat_only.png")
chart

### Zero shot results

In [11]:
fast_detect_gpt_results = {"fast_detect_gpt": {"07_05_0942" : "phi", "07_05_0949" : "gemma", "07_05_0956" : "mistral", "07_05_1003" : "round_robin",
                           "07_05_1007": "gemma_chat", "07_05_1014" : "zephyr", "07_05_1020" : "llama3"}}
roberta_open_ai_results = {"roberta_base_open_ai": {"06_05_1716" : "phi", "06_05_1718" : "gemma", "06_05_1719" : "mistral", "06_05_1721" : "round_robin",
                           "06_05_1723": "gemma_chat", "06_05_1724" : "zephyr", "06_05_1726" : "llama3"}}
freeze_base_df = create_df_from_test_logs("full_finetuning", trained_on_models, dataset_names, use_test_at_threshold=True)

freeze_base_df = add_test_logs_to_results_df(freeze_base_df, fast_detect_gpt_results, use_timestamp=False, use_test_at_threshold=True)
freeze_base_df = add_test_logs_to_results_df(freeze_base_df, roberta_open_ai_results, use_timestamp=False, use_test_at_threshold=True)

# create detector_short_name column for the new detectors
detector_name_to_short_name = {"distil_roberta-base": "distil", "roberta_large": "roberta", "electra_large": "electra", "fast_detect_gpt": "fast_detect_gpt", "roberta_base_open_ai": "roberta_open_ai"}
freeze_base_df["detector_short_name"] = freeze_base_df["base_detector"].apply(lambda x: detector_name_to_short_name[x])

In [12]:
freeze_base_df

Unnamed: 0,accuracy,precision,recall,f1_score,fp_rate,tp_rate,std_accuracy,std_precision,std_recall,std_f1_score,...,TN_at_given_threshold,FP_at_given_threshold,FN_at_given_threshold,base_detector,trained_on_dataset,detector,dataset,fpr,tpr,detector_short_name
8,0.952516,0.926617,0.982879,0.953898,0.077860,0.982879,0.004766,0.008003,0.004102,0.004754,...,934.139,55.211,34.199,distil_roberta-base,gemma,distil_roberta-base_gemma,gemma,,,distil
10,0.908952,0.920550,0.895295,0.907702,0.077376,0.895295,0.006518,0.008925,0.010024,0.007053,...,941.679,55.196,130.506,distil_roberta-base,gemma,distil_roberta-base_gemma,gemma_chat,,,distil
12,0.930729,0.923954,0.938833,0.931303,0.077376,0.938833,0.005710,0.008554,0.007360,0.005870,...,941.679,55.196,88.014,distil_roberta-base,gemma,distil_roberta-base_gemma,llama3,,,distil
9,0.930325,0.923356,0.938329,0.930750,0.077663,0.938329,0.005580,0.008211,0.007635,0.005770,...,930.832,55.006,80.168,distil_roberta-base,gemma,distil_roberta-base_gemma,mistral,,,distil
7,0.941804,0.925778,0.960711,0.942895,0.077127,0.960711,0.005395,0.008341,0.006190,0.005481,...,941.884,54.991,52.782,distil_roberta-base,gemma,distil_roberta-base_gemma,phi,,,distil
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,0.778583,0.500000,0.412911,0.452202,0.412911,0.412911,0.009646,0.000000,0.012285,0.007389,...,51.109,935.053,382.938,roberta_base_open_ai,z,roberta_base_open_ai,mistral,,,roberta_open_ai
3,0.840213,0.500000,0.381859,0.430265,0.381859,0.381859,0.041346,0.000000,0.061073,0.039891,...,3.029,33.051,35.909,roberta_base_open_ai,z,roberta_base_open_ai,round_robin,,,roberta_open_ai
4,0.795837,0.500000,0.427525,0.460836,0.427525,0.427525,0.009095,0.000000,0.012283,0.007160,...,52.945,942.180,420.883,roberta_base_open_ai,z,roberta_base_open_ai,gemma_chat,,,roberta_open_ai
5,0.662844,0.500000,0.310558,0.382962,0.310558,0.310558,0.011061,0.000000,0.013812,0.010522,...,43.050,840.765,139.928,roberta_base_open_ai,z,roberta_base_open_ai,zephyr,,,roberta_open_ai


In [13]:
# rename column "tp_rate_at_given_threshold"
freeze_base_df = freeze_base_df.rename(columns={"tp_rate_at_given_threshold": "TPR"})

In [14]:
zero_shot_detectors = ["fast_detect_gpt", "roberta_base_open_ai"]
zero_shot_only_only_df = freeze_base_df[freeze_base_df["detector"].apply(lambda x: x in zero_shot_detectors)]

heatmap = alt.Chart(zero_shot_only_only_df).mark_rect().encode(
    alt.X('dataset:N', sort="x", title="Testing on"),
    alt.Y('detector_short_name:N', sort=None, title="Detector"),
    alt.Color('TPR:Q').scale(scheme='redyellowgreen'),
    #alt.Row("trained_on_dataset:N", title="Dataset used for training"),
).properties(
    width=200,
    height=200
)

heatmap_text = alt.Chart(zero_shot_only_only_df).mark_text(baseline='middle').encode(
    alt.X('dataset:N', sort="x", title="Testing on"),
    alt.Y('detector_short_name:N', sort=None, title="Detector"),
    text='TPR:Q',
    color=alt.condition(
        alt.datum.accuracy > 0.5,
        alt.value('black'),
        alt.value('white')
    )
).properties(
    width=200,
    height=200
)

chart = alt.layer(heatmap, heatmap_text).configure(
    numberFormat='0.2f'
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
)

chart.save("notebooks/plots/heatmap_zero_shot.png")
chart

In [15]:
zero_shot_detectors = ["fast_detect_gpt", "roberta_base_open_ai"]
zero_shot_only_only_df = freeze_base_df[freeze_base_df["detector"].apply(lambda x: x in zero_shot_detectors)]

heatmap = alt.Chart(zero_shot_only_only_df).mark_rect().encode(
    alt.X('dataset:N', sort="x", title="Testing on"),
    #alt.Y('detector_short_name:N', sort=None, title="Detector"),
    alt.Color('TPR:Q').scale(scheme='redyellowgreen'),
    #alt.Row("trained_on_dataset:N", title="Dataset used for training"),
).properties(
    width=200,
    height=100
)

heatmap_text = alt.Chart(zero_shot_only_only_df).mark_text(baseline='middle').encode(
    alt.X('dataset:N', sort="x", title="Testing on"),
    #alt.Y('detector_short_name:N', sort=None, title="Detector"),
    text='TPR:Q',
    color=alt.condition(
        alt.datum.TPR > 0.5,
        alt.value('black'),
        alt.value('white')
    )
).properties(
    width=200,
    height=100
)

chart = alt.layer(heatmap, heatmap_text).facet(
    column=alt.Column("detector_short_name:N", title="Detector")
).configure(
    numberFormat='0.2f'
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
)

chart.save("notebooks/plots/heatmap_zero_shot.png")
chart

In [19]:
zero_shot_detectors_and_best_trained = ["fast_detect_gpt", "roberta_base_open_ai", "electra_large_phi"]
zero_shot_only_only_df = freeze_base_df[freeze_base_df["detector"].apply(lambda x: x in zero_shot_detectors_and_best_trained)]

# only keep chat datasets
chat_models = ["gemma_chat", "zephyr", "llama3"]
zero_shot_only_only_df = zero_shot_only_only_df[zero_shot_only_only_df["dataset"].apply(lambda x: x in chat_models)]


# same as above but with bar chart
bar_chart = alt.Chart(zero_shot_only_only_df).mark_bar().encode(
    alt.X('detector_short_name:N', sort=None, title=None),
    #alt.Y('TPR:Q').scale(alt.Scale(domain=(0.1, 1), clamp=True)),
    alt.Y('TPR:Q'),
    #alt.Y('accuracy:Q'),
    alt.Color("detector_short_name:N", title="Detector"),
    column=alt.Column("dataset:N", title="Test dataset")
).properties(
    width=100,
    height=200,  
).configure_axis(
    labelFontSize=18,
    titleFontSize=18,
).configure_legend(
    labelFontSize=18,
    titleFontSize=18,
    titleLimit=0
).configure_header(
    titleFontSize=18,
    labelFontSize=18
)
bar_chart.save("notebooks/plots/heatmap_zero_shot_bar.png")
bar_chart

In [13]:
zero_shot_detectors_and_best_trained = ["fast_detect_gpt", "roberta_base_open_ai", "electra_large_phi"]
zero_shot_only_only_df = freeze_base_df[freeze_base_df["detector"].apply(lambda x: x in zero_shot_detectors_and_best_trained)]

# same as above but with bar chart
bar_chart = alt.Chart(zero_shot_only_only_df).mark_bar().encode(
    alt.X('detector_short_name:N', sort=None, title=None),
    alt.Y('accuracy:Q').scale(alt.Scale(domain=(0.6, 1), clamp=True)),
    #alt.Y('accuracy:Q'),
    alt.Color("detector_short_name:N", title="Detector"),
    column=alt.Column("dataset:N", title=None)
).properties(
    width=100,
    height=100,  
)

# spread the char into two rows
grid_chart = alt.vconcat()
nb_rows = 2
nb_dataset_per_row = 4

datasets = zero_shot_only_only_df["dataset"].unique()
for i in range(nb_rows):
    row = alt.hconcat()
    for j in range(nb_dataset_per_row):
        if i*nb_dataset_per_row + j >= len(datasets):
            break
        row |= bar_chart.transform_filter(
            alt.datum.dataset == datasets[i*nb_dataset_per_row + j]
        )
    grid_chart &= row

grid_chart.configure_axis(
    labelFontSize=12,
    titleFontSize=12,
).configure_legend(
    labelFontSize=12,
    titleFontSize=12,
    titleLimit=0
).configure_header(
    titleFontSize=12,
    labelFontSize=12
)
grid_chart.save("notebooks/plots/heatmap_zero_shot_bar.png")

grid_chart

In [14]:
zero_shot_detectors_and_best_trained = ["fast_detect_gpt", "roberta_base_open_ai", "electra_large_phi"]
zero_shot_only_only_df = freeze_base_df[freeze_base_df["detector"].apply(lambda x: x in zero_shot_detectors_and_best_trained)]

# same as above but with bar chart
bar_chart = alt.Chart(zero_shot_only_only_df).mark_bar().encode(
    alt.X('detector_short_name:N', sort=None, title=None),
    alt.Y('accuracy:Q').scale(alt.Scale(domain=(0.6, 1), clamp=True)),
    #alt.Y('accuracy:Q'),
    alt.Color("detector_short_name:N", title="Detector"),
    column=alt.Column("dataset:N", title=None)
).properties(
    width=100,
    height=100,  
)

# spread the char into two rows
grid_chart = alt.vconcat()
nb_rows = 1
nb_dataset_per_row = 3

#datasets = zero_shot_only_only_df["dataset"].unique()
datasets = ["gemma_chat", "zephyr", "llama3"]
for i in range(nb_rows):
    row = alt.hconcat()
    for j in range(nb_dataset_per_row):
        if i*nb_dataset_per_row + j >= len(datasets):
            break
        row |= bar_chart.transform_filter(
            alt.datum.dataset == datasets[i*nb_dataset_per_row + j]
        )
    grid_chart &= row

grid_chart.configure_axis(
    labelFontSize=18,
    titleFontSize=18,
).configure_legend(
    labelFontSize=18,
    titleFontSize=18,
    titleLimit=0
).configure_header(
    titleFontSize=18,
    labelFontSize=18
)
grid_chart.save("notebooks/plots/heatmap_zero_shot_bar.png")

grid_chart

In [15]:
datasets

['gemma_chat', 'zephyr', 'llama3']

In [16]:
# best detector

# change type of accuracy, precision, recall, f1-score to float
freeze_base_df["accuracy"] = freeze_base_df["accuracy"].astype(float)
freeze_base_df["precision"] = freeze_base_df["precision"].astype(float)
freeze_base_df["recall"] = freeze_base_df["recall"].astype(float)
freeze_base_df["f1_score"] = freeze_base_df["f1_score"].astype(float)

freeze_base_df_metrics = freeze_base_df[["accuracy", "precision", "recall", "f1_score", "detector", "roc_auc"]]

# group by detector and compute the mean accuracy per detector
freeze_base_df_metrics.groupby(["detector"]).mean().reset_index().sort_values(by="accuracy", ascending=False)

Unnamed: 0,detector,accuracy,precision,recall,f1_score,roc_auc
4,electra_large_gemma,0.9606,0.96879,0.95175,0.959835,0.993213
5,electra_large_mistral,0.957739,0.951298,0.964487,0.957525,0.99184
7,electra_large_round_robin,0.955865,0.938181,0.976675,0.956758,0.98964
6,electra_large_phi,0.95141,0.982429,0.918786,0.948726,0.994325
13,roberta_large_round_robin,0.946048,0.932066,0.961636,0.946414,0.988616
8,fast_detect_gpt,0.94583,0.926387,0.968524,0.946821,0.988164
10,roberta_large_gemma,0.942699,0.913296,0.977457,0.944106,0.990002
1,distil_roberta-base_mistral,0.942193,0.935711,0.950167,0.942364,0.985778
11,roberta_large_mistral,0.941938,0.909133,0.981527,0.943619,0.991876
2,distil_roberta-base_phi,0.941822,0.944455,0.939163,0.941241,0.985402


In [17]:
# base detector

freeze_base_df_metrics = freeze_base_df[["accuracy", "precision", "recall", "f1_score", "base_detector", "roc_auc"]]

# group by detector and compute the mean accuracy per detector
freeze_base_df_metrics.groupby(["base_detector"]).mean().reset_index().sort_values(by="accuracy", ascending=False)

Unnamed: 0,base_detector,accuracy,precision,recall,f1_score,roc_auc
1,electra_large,0.956404,0.960175,0.952925,0.955711,0.992255
2,fast_detect_gpt,0.94583,0.926387,0.968524,0.946821,0.988164
4,roberta_large,0.942901,0.922568,0.966586,0.94361,0.989158
0,distil_roberta-base,0.937953,0.934076,0.942802,0.937942,0.984465
3,roberta_base_open_ai,0.793004,0.5,0.410399,0.448645,0.882568


In [18]:
# best dataset to train on

freeze_base_df_metrics= freeze_base_df[["accuracy", "precision", "recall", "f1_score", "trained_on_dataset", "roc_auc"]].copy()
freeze_base_df_metrics.groupby(["trained_on_dataset"]).mean().reset_index().sort_values(by="accuracy", ascending=False)

Unnamed: 0,trained_on_dataset,accuracy,precision,recall,f1_score,roc_auc
1,mistral,0.94729,0.932047,0.965394,0.947836,0.989831
0,gemma,0.946219,0.93766,0.956469,0.946385,0.989442
3,round_robin,0.944785,0.93183,0.959996,0.945374,0.986608
2,phi,0.944718,0.95422,0.934558,0.943423,0.988622
4,z,0.869417,0.713194,0.689462,0.697733,0.935366


In [19]:
# best detectors on chat models
chat_models = ["gemma_chat", "zephyr", "llama3"]
freeze_base_df_metrics_chat = freeze_base_df[["accuracy", "precision", "recall", "f1_score", "detector", "dataset", "roc_auc"]].copy()
freeze_base_df_metrics_chat = freeze_base_df_metrics_chat[freeze_base_df_metrics_chat["dataset"].isin(chat_models)]
freeze_base_df_metrics_chat = freeze_base_df_metrics_chat[["accuracy", "precision", "recall", "f1_score", "detector", "roc_auc"]]
freeze_base_df_metrics_chat.groupby(["detector"]).mean().reset_index().sort_values(by="accuracy", ascending=False)

Unnamed: 0,detector,accuracy,precision,recall,f1_score,roc_auc
7,electra_large_round_robin,0.954693,0.929906,0.983619,0.955919,0.991322
12,roberta_large_phi,0.954636,0.941256,0.969856,0.955198,0.989632
5,electra_large_mistral,0.953197,0.957367,0.948718,0.952826,0.990712
13,roberta_large_round_robin,0.950736,0.9317,0.972853,0.951782,0.990186
4,electra_large_gemma,0.950379,0.964247,0.93555,0.949372,0.992516
11,roberta_large_mistral,0.944716,0.919368,0.975024,0.946295,0.990473
10,roberta_large_gemma,0.943025,0.917237,0.973976,0.944693,0.990395
6,electra_large_phi,0.940296,0.979819,0.899208,0.937007,0.992423
2,distil_roberta-base_phi,0.93791,0.936328,0.939817,0.937797,0.985766
8,fast_detect_gpt,0.936224,0.918087,0.95775,0.937303,0.984069


In [20]:
# dataset where detectors struggle the most

freeze_base_df_metrics = freeze_base_df[["accuracy", "precision", "recall", "f1_score", "dataset", "roc_auc"]].copy()
freeze_base_df_metrics.groupby(["dataset"]).mean().reset_index().sort_values(by="accuracy", ascending=True)

Unnamed: 0,dataset,accuracy,precision,recall,f1_score,roc_auc
6,zephyr,0.910406,0.906172,0.877649,0.88952,0.96537
3,mistral,0.927329,0.901403,0.908661,0.904326,0.978228
1,gemma_chat,0.929724,0.902218,0.911353,0.906193,0.979023
2,llama3,0.939771,0.90372,0.932984,0.917462,0.98244
0,gemma,0.941139,0.902806,0.930679,0.916078,0.985264
5,round_robin,0.947617,0.926527,0.910758,0.916314,0.988528
4,phi,0.947947,0.903984,0.942003,0.922367,0.988267
