# 0. Setup

## 0.0 imports

In [1]:
import json
import logging
import os
import random
import sys
from types import SimpleNamespace

import numpy as np
import pandas as pd
import torch
import transformers
from sklearn.model_selection import train_test_split
from tqdm import tqdm

BASE_DIR = "../../"
sys.path.append(BASE_DIR)

from src.general_functions_and_patterns_for_detection import (
    load_dataframe_from_json,
    TrainRobertaHelper,
    RESULT_DIR, CLEANED_FILES_DIR, DETECTOR_RESULTS, TASK_DIR,
    LLMs, json_path_abstract,
    seed_everything
)

SEED = 2023
seed_everything(SEED)

from DetectRL.Detectors.metrics import get_roc_metric_result

prepare_df_for_roberta_training = TrainRobertaHelper.prepare_df_for_roberta_training
import DetectRL.Detectors.train_roberta as train_roberta

DEBUG = True
DRY_RUN = False
ALL_DATA = True
RESULT_DIR = os.path.join(RESULT_DIR, "T00")
os.makedirs(RESULT_DIR, exist_ok=True)

2025-09-12 08:13:58.391801: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-12 08:13:58.408583: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757664838.428214 2639735 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757664838.433557 2639735 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1757664838.448492 2639735 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# TODO: adjust CUDA setup depending on your setup
# Disable NCCL features incompatible with RTX 40xx
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

# Restrict to only GPU 1 (CUDA:1)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# 1. Test Training

## 1.0 General setup

In [3]:
task1_path = f"{TASK_DIR}/Task1/"
cleaned_file_version = "_cleaned_final_using_dipper_v2"

## 1.1 Using the default data for comparison

re-executing the multi llm generalisation test from DetectRL

### Training

In [4]:
for _llm in tqdm(LLMs):
    other_LLMs = list(set(LLMs) - {_llm})
    if DRY_RUN:
        _llm += "_dry_run"
        other_LLMs = [item + "_dry_run" for item in other_LLMs]
    appendix = "_all_data" if ALL_DATA else ""

    # --- Set parameters in a dict ---
    args_dict = {
        "model_name": "roberta-base",
        "save_model_path": f"{DETECTOR_RESULTS}/roberta_base_classifier_{_llm}{appendix}",
        "train_data_path": f"{task1_path}/multi_llms_{_llm}_train.json",
        "test_data_path": f"{task1_path}/multi_llms_{_llm}_test.json",
        "transfer_test_data_path": f"{task1_path}/multi_llms_{other_LLMs[0]}_test.json," \
                                   f"{task1_path}/multi_llms_{other_LLMs[1]}_test.json," \
                                   f"{task1_path}/multi_llms_{other_LLMs[2]}_test.json",
        "train_df": None,
        "test_df": None,
        "transfer_df": None,
        "epochs": 3,
        "learning_rate": 1e-6,
        "batch_size": 8,
        "seed": 2023,
        "mode": "train",
        "DEVICE": "cuda"
    }

    # Convert dict to namespace-like object
    args = SimpleNamespace(**args_dict)

    # Call the run function
    train_roberta.run(args)

  return forward_call(*args, **kwargs)


{'loss': 0.2523, 'grad_norm': 20.64860725402832, 'learning_rate': 9.473462066054658e-07, 'epoch': 0.15827793605571383}
{'loss': 0.0764, 'grad_norm': 0.3984912037849426, 'learning_rate': 8.945868945868945e-07, 'epoch': 0.31655587211142766}
{'loss': 0.0264, 'grad_norm': 0.01701672002673149, 'learning_rate': 8.418275825683234e-07, 'epoch': 0.4748338081671415}
{'loss': 0.0273, 'grad_norm': 0.014583592303097248, 'learning_rate': 7.89068270549752e-07, 'epoch': 0.6331117442228553}
{'loss': 0.0133, 'grad_norm': 0.012019606307148933, 'learning_rate': 7.363089585311806e-07, 'epoch': 0.7913896802785692}
{'loss': 0.0141, 'grad_norm': 0.11043582856655121, 'learning_rate': 6.835496465126095e-07, 'epoch': 0.949667616334283}
{'eval_loss': 1.088729977607727, 'eval_accuracy': 0.8388888888888889, 'eval_f1': 0.8388888888888889, 'eval_precision': 0.8388888888888889, 'eval_recall': 0.8388888888888889, 'eval_runtime': 17.084, 'eval_samples_per_second': 42.145, 'eval_steps_per_second': 5.268, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0112, 'grad_norm': 0.005832625553011894, 'learning_rate': 6.307903344940382e-07, 'epoch': 1.1079455523899968}
{'loss': 0.0129, 'grad_norm': 0.0030535783153027296, 'learning_rate': 5.780310224754669e-07, 'epoch': 1.2662234884457106}
{'loss': 0.0132, 'grad_norm': 0.004167329519987106, 'learning_rate': 5.252717104568957e-07, 'epoch': 1.4245014245014245}
{'loss': 0.0064, 'grad_norm': 0.009279988706111908, 'learning_rate': 4.7251239843832436e-07, 'epoch': 1.5827793605571383}
{'loss': 0.0124, 'grad_norm': 0.0025347890332341194, 'learning_rate': 4.1975308641975306e-07, 'epoch': 1.7410572966128521}
{'loss': 0.0145, 'grad_norm': 0.0016348951030522585, 'learning_rate': 3.669937744011818e-07, 'epoch': 1.899335232668566}
{'eval_loss': 0.5793552398681641, 'eval_accuracy': 0.9222222222222223, 'eval_f1': 0.9222222222222223, 'eval_precision': 0.9222222222222223, 'eval_recall': 0.9222222222222223, 'eval_runtime': 17.0436, 'eval_samples_per_second': 42.244, 'eval_steps_per_second': 5.281, 'ep

  return forward_call(*args, **kwargs)


{'loss': 0.004, 'grad_norm': 0.0015457450645044446, 'learning_rate': 3.142344623826105e-07, 'epoch': 2.05761316872428}
{'loss': 0.0042, 'grad_norm': 0.009953424334526062, 'learning_rate': 2.614751503640392e-07, 'epoch': 2.2158911047799936}
{'loss': 0.0063, 'grad_norm': 0.0010754704708233476, 'learning_rate': 2.0871583834546797e-07, 'epoch': 2.3741690408357075}
{'loss': 0.0054, 'grad_norm': 0.0015438495902344584, 'learning_rate': 1.5595652632689667e-07, 'epoch': 2.5324469768914213}
{'loss': 0.0027, 'grad_norm': 0.0012529799714684486, 'learning_rate': 1.0319721430832541e-07, 'epoch': 2.690724912947135}
{'loss': 0.0018, 'grad_norm': 0.0017474978230893612, 'learning_rate': 5.043790228975414e-08, 'epoch': 2.849002849002849}
{'eval_loss': 0.8853952884674072, 'eval_accuracy': 0.8916666666666667, 'eval_f1': 0.8916666666666667, 'eval_precision': 0.8916666666666667, 'eval_recall': 0.8916666666666667, 'eval_runtime': 17.108, 'eval_samples_per_second': 42.086, 'eval_steps_per_second': 5.261, 'epoc

  return forward_call(*args, **kwargs)


{'eval_loss': 0.8853952884674072, 'eval_accuracy': 0.8916666666666667, 'eval_f1': 0.8916666666666667, 'eval_precision': 0.8916666666666667, 'eval_recall': 0.8916666666666667, 'eval_runtime': 16.8549, 'eval_samples_per_second': 42.718, 'eval_steps_per_second': 5.34, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'loss': 0.3108, 'grad_norm': 9.832615852355957, 'learning_rate': 9.473462066054658e-07, 'epoch': 0.15827793605571383}
{'loss': 0.1563, 'grad_norm': 0.11783894896507263, 'learning_rate': 8.945868945868945e-07, 'epoch': 0.31655587211142766}
{'loss': 0.0658, 'grad_norm': 11.575504302978516, 'learning_rate': 8.418275825683234e-07, 'epoch': 0.4748338081671415}
{'loss': 0.0395, 'grad_norm': 13.838899612426758, 'learning_rate': 7.89068270549752e-07, 'epoch': 0.6331117442228553}
{'loss': 0.0281, 'grad_norm': 0.04161103069782257, 'learning_rate': 7.363089585311806e-07, 'epoch': 0.7913896802785692}
{'loss': 0.0229, 'grad_norm': 0.039605092257261276, 'learning_rate': 6.835496465126095e-07, 'epoch': 0.949667616334283}
{'eval_loss': 0.21092259883880615, 'eval_accuracy': 0.9597222222222223, 'eval_f1': 0.9597222222222223, 'eval_precision': 0.9597222222222223, 'eval_recall': 0.9597222222222223, 'eval_runtime': 17.0047, 'eval_samples_per_second': 42.341, 'eval_steps_per_second': 5.293, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0158, 'grad_norm': 0.008376765996217728, 'learning_rate': 6.307903344940382e-07, 'epoch': 1.1079455523899968}
{'loss': 0.0122, 'grad_norm': 0.01293924544006586, 'learning_rate': 5.780310224754669e-07, 'epoch': 1.2662234884457106}
{'loss': 0.0142, 'grad_norm': 0.009533166885375977, 'learning_rate': 5.252717104568957e-07, 'epoch': 1.4245014245014245}
{'loss': 0.0262, 'grad_norm': 0.018696915358304977, 'learning_rate': 4.7251239843832436e-07, 'epoch': 1.5827793605571383}
{'loss': 0.01, 'grad_norm': 0.004438058473169804, 'learning_rate': 4.1975308641975306e-07, 'epoch': 1.7410572966128521}
{'loss': 0.0067, 'grad_norm': 0.007136004976928234, 'learning_rate': 3.669937744011818e-07, 'epoch': 1.899335232668566}
{'eval_loss': 0.10678107291460037, 'eval_accuracy': 0.9819444444444444, 'eval_f1': 0.9819444444444444, 'eval_precision': 0.9819444444444444, 'eval_recall': 0.9819444444444444, 'eval_runtime': 17.0069, 'eval_samples_per_second': 42.336, 'eval_steps_per_second': 5.292, 'epoch':

  return forward_call(*args, **kwargs)


{'loss': 0.0069, 'grad_norm': 0.02365836128592491, 'learning_rate': 3.142344623826105e-07, 'epoch': 2.05761316872428}
{'loss': 0.0127, 'grad_norm': 0.7525432705879211, 'learning_rate': 2.614751503640392e-07, 'epoch': 2.2158911047799936}
{'loss': 0.0063, 'grad_norm': 0.0023267697542905807, 'learning_rate': 2.0871583834546797e-07, 'epoch': 2.3741690408357075}
{'loss': 0.0098, 'grad_norm': 0.004604933317750692, 'learning_rate': 1.5595652632689667e-07, 'epoch': 2.5324469768914213}
{'loss': 0.0065, 'grad_norm': 0.002717529656365514, 'learning_rate': 1.0319721430832541e-07, 'epoch': 2.690724912947135}
{'loss': 0.005, 'grad_norm': 0.005048414226621389, 'learning_rate': 5.043790228975414e-08, 'epoch': 2.849002849002849}
{'eval_loss': 0.1696123480796814, 'eval_accuracy': 0.9694444444444444, 'eval_f1': 0.9694444444444444, 'eval_precision': 0.9694444444444444, 'eval_recall': 0.9694444444444444, 'eval_runtime': 17.0486, 'eval_samples_per_second': 42.232, 'eval_steps_per_second': 5.279, 'epoch': 3.

  return forward_call(*args, **kwargs)


{'eval_loss': 0.1696123480796814, 'eval_accuracy': 0.9694444444444444, 'eval_f1': 0.9694444444444444, 'eval_precision': 0.9694444444444444, 'eval_recall': 0.9694444444444444, 'eval_runtime': 16.826, 'eval_samples_per_second': 42.791, 'eval_steps_per_second': 5.349, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'loss': 0.294, 'grad_norm': 8.698787689208984, 'learning_rate': 9.473462066054658e-07, 'epoch': 0.15827793605571383}
{'loss': 0.1849, 'grad_norm': 5.566482067108154, 'learning_rate': 8.945868945868945e-07, 'epoch': 0.31655587211142766}
{'loss': 0.0753, 'grad_norm': 0.9402514696121216, 'learning_rate': 8.418275825683234e-07, 'epoch': 0.4748338081671415}
{'loss': 0.0328, 'grad_norm': 0.022916847839951515, 'learning_rate': 7.89068270549752e-07, 'epoch': 0.6331117442228553}
{'loss': 0.0252, 'grad_norm': 0.03137892857193947, 'learning_rate': 7.363089585311806e-07, 'epoch': 0.7913896802785692}
{'loss': 0.0286, 'grad_norm': 0.010812385939061642, 'learning_rate': 6.835496465126095e-07, 'epoch': 0.949667616334283}
{'eval_loss': 1.5617460012435913, 'eval_accuracy': 0.7527777777777778, 'eval_f1': 0.7527777777777778, 'eval_precision': 0.7527777777777778, 'eval_recall': 0.7527777777777778, 'eval_runtime': 17.0089, 'eval_samples_per_second': 42.331, 'eval_steps_per_second': 5.291, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0211, 'grad_norm': 0.015292403288185596, 'learning_rate': 6.307903344940382e-07, 'epoch': 1.1079455523899968}
{'loss': 0.0138, 'grad_norm': 0.01023515872657299, 'learning_rate': 5.780310224754669e-07, 'epoch': 1.2662234884457106}
{'loss': 0.0127, 'grad_norm': 0.004542579874396324, 'learning_rate': 5.252717104568957e-07, 'epoch': 1.4245014245014245}
{'loss': 0.0179, 'grad_norm': 0.019629422575235367, 'learning_rate': 4.7251239843832436e-07, 'epoch': 1.5827793605571383}
{'loss': 0.0128, 'grad_norm': 0.002932896139100194, 'learning_rate': 4.1975308641975306e-07, 'epoch': 1.7410572966128521}
{'loss': 0.0101, 'grad_norm': 0.003935432527214289, 'learning_rate': 3.669937744011818e-07, 'epoch': 1.899335232668566}
{'eval_loss': 1.8899675607681274, 'eval_accuracy': 0.7527777777777778, 'eval_f1': 0.7527777777777778, 'eval_precision': 0.7527777777777778, 'eval_recall': 0.7527777777777778, 'eval_runtime': 16.996, 'eval_samples_per_second': 42.363, 'eval_steps_per_second': 5.295, 'epoch':

  return forward_call(*args, **kwargs)


{'loss': 0.0179, 'grad_norm': 0.0021579728927463293, 'learning_rate': 3.142344623826105e-07, 'epoch': 2.05761316872428}
{'loss': 0.0072, 'grad_norm': 0.002422079909592867, 'learning_rate': 2.614751503640392e-07, 'epoch': 2.2158911047799936}
{'loss': 0.0089, 'grad_norm': 0.002113007241860032, 'learning_rate': 2.0871583834546797e-07, 'epoch': 2.3741690408357075}
{'loss': 0.0173, 'grad_norm': 0.002230482641607523, 'learning_rate': 1.5595652632689667e-07, 'epoch': 2.5324469768914213}
{'loss': 0.0046, 'grad_norm': 0.0014976415550336242, 'learning_rate': 1.0319721430832541e-07, 'epoch': 2.690724912947135}
{'loss': 0.002, 'grad_norm': 0.0021020725835114717, 'learning_rate': 5.043790228975414e-08, 'epoch': 2.849002849002849}
{'eval_loss': 1.537498116493225, 'eval_accuracy': 0.8111111111111111, 'eval_f1': 0.8111111111111111, 'eval_precision': 0.8111111111111111, 'eval_recall': 0.8111111111111111, 'eval_runtime': 16.9673, 'eval_samples_per_second': 42.435, 'eval_steps_per_second': 5.304, 'epoch'

  return forward_call(*args, **kwargs)


{'eval_loss': 1.537498116493225, 'eval_accuracy': 0.8111111111111111, 'eval_f1': 0.8111111111111111, 'eval_precision': 0.8111111111111111, 'eval_recall': 0.8111111111111111, 'eval_runtime': 16.7377, 'eval_samples_per_second': 43.017, 'eval_steps_per_second': 5.377, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'loss': 0.2996, 'grad_norm': 5.264007568359375, 'learning_rate': 9.473462066054658e-07, 'epoch': 0.15827793605571383}
{'loss': 0.2293, 'grad_norm': 6.263920783996582, 'learning_rate': 8.945868945868945e-07, 'epoch': 0.31655587211142766}
{'loss': 0.1656, 'grad_norm': 0.7451289296150208, 'learning_rate': 8.418275825683234e-07, 'epoch': 0.4748338081671415}
{'loss': 0.0708, 'grad_norm': 0.12793347239494324, 'learning_rate': 7.89068270549752e-07, 'epoch': 0.6331117442228553}
{'loss': 0.0461, 'grad_norm': 0.022311659529805183, 'learning_rate': 7.363089585311806e-07, 'epoch': 0.7913896802785692}
{'loss': 0.0442, 'grad_norm': 0.013789534568786621, 'learning_rate': 6.835496465126095e-07, 'epoch': 0.949667616334283}
{'eval_loss': 2.531515121459961, 'eval_accuracy': 0.6236111111111111, 'eval_f1': 0.6236111111111111, 'eval_precision': 0.6236111111111111, 'eval_recall': 0.6236111111111111, 'eval_runtime': 16.9965, 'eval_samples_per_second': 42.362, 'eval_steps_per_second': 5.295, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0343, 'grad_norm': 0.016003813594579697, 'learning_rate': 6.307903344940382e-07, 'epoch': 1.1079455523899968}
{'loss': 0.037, 'grad_norm': 0.007550846319645643, 'learning_rate': 5.780310224754669e-07, 'epoch': 1.2662234884457106}
{'loss': 0.0353, 'grad_norm': 0.007808472495526075, 'learning_rate': 5.252717104568957e-07, 'epoch': 1.4245014245014245}
{'loss': 0.0338, 'grad_norm': 0.04921482503414154, 'learning_rate': 4.7251239843832436e-07, 'epoch': 1.5827793605571383}
{'loss': 0.0227, 'grad_norm': 0.00774453766644001, 'learning_rate': 4.1975308641975306e-07, 'epoch': 1.7410572966128521}
{'loss': 0.0229, 'grad_norm': 0.003965183161199093, 'learning_rate': 3.669937744011818e-07, 'epoch': 1.899335232668566}
{'eval_loss': 2.121584415435791, 'eval_accuracy': 0.7222222222222222, 'eval_f1': 0.7222222222222222, 'eval_precision': 0.7222222222222222, 'eval_recall': 0.7222222222222222, 'eval_runtime': 16.9932, 'eval_samples_per_second': 42.37, 'eval_steps_per_second': 5.296, 'epoch': 2.

  return forward_call(*args, **kwargs)


{'loss': 0.0196, 'grad_norm': 0.004914296790957451, 'learning_rate': 3.142344623826105e-07, 'epoch': 2.05761316872428}
{'loss': 0.0164, 'grad_norm': 0.004122857004404068, 'learning_rate': 2.614751503640392e-07, 'epoch': 2.2158911047799936}
{'loss': 0.0173, 'grad_norm': 0.004774910863488913, 'learning_rate': 2.0871583834546797e-07, 'epoch': 2.3741690408357075}
{'loss': 0.0313, 'grad_norm': 0.005968223791569471, 'learning_rate': 1.5595652632689667e-07, 'epoch': 2.5324469768914213}
{'loss': 0.0146, 'grad_norm': 0.004050179850310087, 'learning_rate': 1.0319721430832541e-07, 'epoch': 2.690724912947135}
{'loss': 0.0261, 'grad_norm': 0.005742188543081284, 'learning_rate': 5.043790228975414e-08, 'epoch': 2.849002849002849}
{'eval_loss': 2.2390310764312744, 'eval_accuracy': 0.7111111111111111, 'eval_f1': 0.7111111111111111, 'eval_precision': 0.7111111111111111, 'eval_recall': 0.7111111111111111, 'eval_runtime': 16.9825, 'eval_samples_per_second': 42.397, 'eval_steps_per_second': 5.3, 'epoch': 3

  return forward_call(*args, **kwargs)


{'eval_loss': 2.2390310764312744, 'eval_accuracy': 0.7111111111111111, 'eval_f1': 0.7111111111111111, 'eval_precision': 0.7111111111111111, 'eval_recall': 0.7111111111111111, 'eval_runtime': 16.7484, 'eval_samples_per_second': 42.989, 'eval_steps_per_second': 5.374, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
100%|██████████| 4/4 [7:11:05<00:00, 6466.46s/it]  


### combine training results

In [5]:
dfs = []
for llm in LLMs:
    other_LLMs = list(set(LLMs) - {llm})
    classifier_name = "roberta-base"
    directory_results = f"{DETECTOR_RESULTS}roberta_base_classifier_{llm}{appendix}"
    # Transfer test sets for the 3 other LLMs
    results = []
    for other in other_LLMs:
        result_file = os.path.join(directory_results, f"multi_llms_{other}_test.json.{classifier_name}_result.json")
        with open(result_file, "r") as fp:
            results.append(json.load(fp))
    averaged_results_df = pd.DataFrame(results)
    averaged_results_df.mean(numeric_only=True)
    averaged_results_df["llm_type_used_for_training"] = llm
    dfs.append(averaged_results_df)

df_combined = pd.concat(dfs)
df_combined.groupby(by=["llm_type_used_for_training"]).mean(numeric_only=True)

Unnamed: 0_level_0,roc_auc,optimal_threshold,precision,recall,f1,accuracy,tpr_at_fpr_0_01
llm_type_used_for_training,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ChatGPT,0.980756,-0.637154,0.934586,0.933862,0.934074,0.933931,0.544643
Claude-instant,0.99298,-0.668812,0.950746,0.977844,0.964018,0.963313,0.679233
Google-PaLM,0.993473,-8.7e-05,0.972141,0.957341,0.964675,0.964807,0.66832
Llama-2-70b,0.992162,-5.2e-05,0.974823,0.948413,0.961273,0.961985,0.574074


In [15]:
dfs = []
for llm in LLMs:
    classifier_name = "roberta-base"
    directory_results = f"{DETECTOR_RESULTS}roberta_base_classifier_{llm}{appendix}"
    result_file = os.path.join(directory_results, f"multi_llms_{llm}_test.json.{classifier_name}_result.json")
    with open(result_file, "r") as fp:
        results = [json.load(fp)]
    averaged_results_df = pd.DataFrame(results)
    averaged_results_df["llm_type_used_for_training"] = llm
    dfs.append(averaged_results_df)

df_combined = pd.concat(dfs)
df_combined = df_combined.groupby(by=["llm_type_used_for_training"]).mean(numeric_only=True)
optimal_thresholds = df_combined["optimal_threshold"].to_dict()
optimal_thresholds

{'ChatGPT': -2.456542097206693e-05,
 'Claude-instant': -7.507131522288546e-05,
 'Google-PaLM': -7.56011504563503e-05,
 'Llama-2-70b': -3.26035515172407e-05}

### Further evaluation tests

In [16]:
combined_results = []

for _llm in LLMs:

    directory_results = f"{DETECTOR_RESULTS}roberta_base_classifier_{_llm}_all_data"

    detector = transformers.AutoModelForSequenceClassification.from_pretrained(directory_results).to("cuda")
    tokenizer = transformers.AutoTokenizer.from_pretrained(directory_results)
    other_LLMs = sorted(list(set(LLMs) - {_llm}))
    args_experiment_dict = {
        "model_name": directory_results,
        "test_data_path": f"{task1_path}/multi_llms_{other_LLMs[0]}_test.json," \
                          f"{task1_path}/multi_llms_{other_LLMs[1]}_test.json," \
                          f"{task1_path}/multi_llms_{other_LLMs[2]}_test.json",
        "seed": 2023,
        "DEVICE": "cuda"
    }

    # Convert dict to namespace-like object
    args = SimpleNamespace(**args_experiment_dict)

    filenames = args.test_data_path.split(",")
    results = []

    for filename in filenames:
        logging.info(f"Test in {filename}")
        test_data = json.load(open(filename, "r"))

        random.seed(args.seed)
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

        predictions = {'human': [], 'llm': []}
        with torch.no_grad():
            for item in tqdm(test_data):
                text = item["text"]
                label = item["label"]

                if label == "human":
                    tokenized = tokenizer([text], padding=True, truncation=True, max_length=512,
                                          return_tensors="pt").to(args.DEVICE)
                    predictions["human"].append(detector(**tokenized).logits.softmax(-1)[:, 0].tolist()[0])
                    item["prediction"] = detector(**tokenized).logits.softmax(-1)[:, 0].tolist()[0]
                elif label == "llm":
                    tokenized = tokenizer([text], padding=True, truncation=True, max_length=512,
                                          return_tensors="pt").to(args.DEVICE)
                    predictions["llm"].append(detector(**tokenized).logits.softmax(-1)[:, 0].tolist()[0])
                    item["prediction"] = detector(**tokenized).logits.softmax(-1)[:, 0].tolist()[0]
                else:
                    raise ValueError(f"Unknown label {label}")

        predictions['human'] = [-i for i in predictions['human'] if np.isfinite(i)]
        predictions['llm'] = [-i for i in predictions['llm'] if np.isfinite(i)]

        result = get_roc_metric_result(predictions['human'], predictions['llm'], optimal_thresholds[_llm])
        result["llm_used_for_training"] = _llm
        result["evaluation_file_path"] = filename

        if "xlm-roberta-base" in args.model_name:
            result["model_type"] = "xlm-roberta-base"
        if "xlm-roberta-large" in args.model_name:
            result["model_type"] = "xlm-roberta-large"

        results.append(result)
        logging.info(f"{result}")
        with open(directory_results + f"{filename.split("/")[-1].split(".json")[0]}_evaluation_data.json", "w") as f:
            json.dump(test_data, f, indent=4)

        with open(directory_results + f"{filename.split("/")[-1].split(".json")[0]}_evaluation_results.json", "w") as f:
            json.dump(result, f, indent=4)

    combined_results.extend(results)

  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:35<00:00, 21.07it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:39<00:00, 20.15it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:44<00:00, 19.19it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:49<00:00, 18.37it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:42<00:00, 19.68it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:45<00:00, 19.06it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:49<00:00, 18.41it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:42<00:00, 19.61it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:41<00:00, 19.77it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:49<00:00, 18.37it/s]
  return forward_call(*args, **kwargs)
100%|██████████| 2008/2008 [01:42<00:00, 19.61it/s]

In [17]:
results_df = pd.DataFrame(combined_results)
results_df["eval_model"] = results_df["evaluation_file_path"].apply(lambda x: x.split("/")[-1].split(".")[0])
results_df.drop(columns=["evaluation_file_path"])

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,llm_used_for_training,eval_model
0,0.972107,-2.5e-05,"[[999, 1], [574, 434]]",0.997701,0.430556,0.601525,0.713645,0.287698,ChatGPT,multi_llms_Claude-instant_test
1,0.972681,-2.5e-05,"[[1000, 0], [484, 524]]",1.0,0.519841,0.684073,0.758964,0.534722,ChatGPT,multi_llms_Google-PaLM_test
2,0.997481,-2.5e-05,"[[999, 1], [180, 828]]",0.998794,0.821429,0.90147,0.909861,0.811508,ChatGPT,multi_llms_Llama-2-70b_test
3,0.998903,-7.5e-05,"[[996, 4], [65, 943]]",0.995776,0.935516,0.964706,0.965637,0.885913,Claude-instant,multi_llms_ChatGPT_test
4,0.988103,-7.5e-05,"[[998, 2], [261, 747]]",0.99733,0.741071,0.850313,0.869024,0.501984,Claude-instant,multi_llms_Google-PaLM_test
5,0.991935,-7.5e-05,"[[992, 8], [211, 797]]",0.990062,0.790675,0.879206,0.890936,0.649802,Claude-instant,multi_llms_Llama-2-70b_test
6,0.998813,-3.3e-05,"[[993, 7], [8, 1000]]",0.993049,0.992063,0.992556,0.99253,0.527778,Llama-2-70b,multi_llms_ChatGPT_test
7,0.984159,-3.3e-05,"[[998, 2], [422, 586]]",0.996599,0.581349,0.734336,0.788845,0.486111,Llama-2-70b,multi_llms_Claude-instant_test
8,0.993513,-3.3e-05,"[[999, 1], [242, 766]]",0.998696,0.759921,0.863099,0.878984,0.708333,Llama-2-70b,multi_llms_Google-PaLM_test
9,0.998326,-7.6e-05,"[[976, 24], [8, 1000]]",0.976562,0.992063,0.984252,0.984064,0.866071,Google-PaLM,multi_llms_ChatGPT_test


# 2. Only train for Claude

## 2.1 Cleaned

In [4]:
training_data_path = f"{CLEANED_FILES_DIR}arxiv_2800{cleaned_file_version}.parquet"
training_df = pd.read_parquet(training_data_path).reset_index()
training_df = prepare_df_for_roberta_training(training_df, "direct_prompt")
df_claude = training_df[training_df["llm_type"] == "Claude-instant"]
df_llama, df_palm, df_chatgpt = [training_df[training_df["llm_type"] == _llm].dropna(subset=["label", "text"]) for _llm
                                 in ["Llama-2-70b", "Google-PaLM", "ChatGPT"]]
df_claude.head()

Unnamed: 0,id,context,llm_type,text,domain,label,llm_prompting_strategy
1400,1401,Real Time Turbulent Video Perfecting by Image ...,Claude-instant,Image and video quality in Long Range Observat...,arxiv,human,direct_prompt
1401,1402,Finite Euler products and the Riemann Hypothesis,Claude-instant,We show that if the Riemann Hypothesis is true...,arxiv,human,direct_prompt
1402,1403,An Adaptive Strategy for the Classification of...,Claude-instant,One of the major problems in computational bio...,arxiv,human,direct_prompt
1403,1404,Detailed Models of super-Earths: How well can ...,Claude-instant,The field of extrasolar planets has rapidly ex...,arxiv,human,direct_prompt
1404,1405,The Distribution of AGN in Clusters of Galaxies,Claude-instant,We present a study of the distribution of AGN ...,arxiv,human,direct_prompt


In [5]:
train_df, test_df = train_test_split(df_claude, test_size=0.2, random_state=SEED, shuffle=True)
(train_df.shape, test_df.shape, len(df_llama), len(df_claude), len(df_chatgpt),
 train_df.columns)

((1120, 7),
 (280, 7),
 1400,
 1400,
 1400,
 Index(['id', 'context', 'llm_type', 'text', 'domain', 'label',
        'llm_prompting_strategy'],
       dtype='object'))

In [6]:
args = train_roberta.generate_args_for_training_roberta(
    train_df=train_df, test_df=test_df, transfer_df=[df_llama, df_palm, df_chatgpt],
    save_model_path=f"{RESULT_DIR}claude_direct_prompt_test", device="cpu"
)
train_roberta.run(args)

  return forward_call(*args, **kwargs)


{'eval_loss': 0.6678691506385803, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 4.6934, 'eval_samples_per_second': 46.874, 'eval_steps_per_second': 5.966, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.5951381921768188, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 4.9561, 'eval_samples_per_second': 44.389, 'eval_steps_per_second': 5.65, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.5272248983383179, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 5.2171, 'eval_samples_per_second': 42.169, 'eval_steps_per_second': 5.367, 'epoch': 3.0}
{'train_runtime': 230.0896, 'train_samples_per_second': 11.735, 'train_steps_per_second': 1.473, 'train_loss': 0.6224659607473728, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.5272248983383179, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 5.0973, 'eval_samples_per_second': 43.16, 'eval_steps_per_second': 5.493, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'b12eac4262bc2189cebaab088a335a4a571334611556d372f58fcaf8350d47c6_train': {'roc_auc': 1.0,
  'optimal_threshold': -0.501172661781311,
  'conf_matrix': [[700, 0], [0, 700]],
  'precision': 1.0,
  'recall': 1.0,
  'f1': 1.0,
  'accuracy': 1.0,
  'tpr_at_fpr_0_01': 1.0},
 'eed269866e6fceab0a382db506e57af78c264eb5c02f528bb0ef376bdc071940_train': {'roc_auc': 0.9994599051741908,
  'optimal_threshold': -0.5214246511459351,
  'conf_matrix': [[699, 1], [2, 691]],
  'precision': 0.9985549132947977,
  'recall': 0.9971139971139971,
  'f1': 0.9978339350180505,
  'accuracy': 0.9978463747307968,
  'tpr_at_fpr_0_01': 0.9956709956709957},
 '95d2a2c3cbfa06bb0cc8b9ba6f93ddc3bb62fd2d4bacb045025746b7135524aa_train': {'roc_auc': 1.0,
  'optimal_threshold': -0.4787825345993042,
  'conf_matrix': [[700, 0], [0, 700]],
  'precision': 1.0,
  'recall': 1.0,
  'f1': 1.0,
  'accuracy': 1.0,
  'tpr_at_fpr_0_01': 1.0}}

## 2.2 Not Cleaned

In [7]:
training_df = load_dataframe_from_json(json_path_abstract)
training_df = prepare_df_for_roberta_training(training_df, "direct_prompt")
df_claude = training_df[training_df["llm_type"] == "Claude-instant"]
df_llama, df_palm, df_chatgpt = [training_df[training_df["llm_type"] == _llm].dropna(subset=["label", "text"]) for _llm
                                 in ["Llama-2-70b", "Google-PaLM", "ChatGPT"]]
print(df_claude.head())

train_df, test_df = train_test_split(df_claude, test_size=0.2, random_state=SEED, shuffle=True)
print(train_df.shape, test_df.shape, len(df_llama), len(df_claude), len(df_chatgpt),
      train_df.columns)

args = train_roberta.generate_args_for_training_roberta(
    train_df=train_df, test_df=test_df, transfer_df=[df_llama, df_palm, df_chatgpt],
    save_model_path=f"{RESULT_DIR}claude_direct_prompt_test", device="cpu"
)

train_roberta.run(args)

        id                                            context        llm_type  \
1400  1401  Real Time Turbulent Video Perfecting by Image ...  Claude-instant   
1401  1402   Finite Euler products and the Riemann Hypothesis  Claude-instant   
1402  1403  An Adaptive Strategy for the Classification of...  Claude-instant   
1403  1404  Detailed Models of super-Earths: How well can ...  Claude-instant   
1404  1405    The Distribution of AGN in Clusters of Galaxies  Claude-instant   

                                                   text domain  label  \
1400  Image and video quality in Long Range Observat...  arxiv  human   
1401  We show that if the Riemann Hypothesis is true...  arxiv  human   
1402  One of the major problems in computational bio...  arxiv  human   
1403  The field of extrasolar planets has rapidly ex...  arxiv  human   
1404  We present a study of the distribution of AGN ...  arxiv  human   

     llm_prompting_strategy  
1400          direct_prompt  
1401          

  return forward_call(*args, **kwargs)


{'eval_loss': 0.6690271496772766, 'eval_accuracy': 0.990909090909091, 'eval_f1': 0.990909090909091, 'eval_precision': 0.990909090909091, 'eval_recall': 0.990909090909091, 'eval_runtime': 4.833, 'eval_samples_per_second': 45.52, 'eval_steps_per_second': 5.794, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.5879504084587097, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 5.3097, 'eval_samples_per_second': 41.433, 'eval_steps_per_second': 5.273, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.48871544003486633, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 5.5018, 'eval_samples_per_second': 39.987, 'eval_steps_per_second': 5.089, 'epoch': 3.0}
{'train_runtime': 238.3346, 'train_samples_per_second': 11.329, 'train_steps_per_second': 1.422, 'train_loss': 0.6202352968289085, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.48871544003486633, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 5.3737, 'eval_samples_per_second': 40.94, 'eval_steps_per_second': 5.211, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'67b190fbdcf57f867838602f2d91943f81f3ce69f9d6aa273a48ed1401f8944d_train': {'roc_auc': 1.0,
  'optimal_threshold': -0.501387894153595,
  'conf_matrix': [[700, 0], [0, 700]],
  'precision': 1.0,
  'recall': 1.0,
  'f1': 1.0,
  'accuracy': 1.0,
  'tpr_at_fpr_0_01': 1.0},
 '64dea5d6694c7311e474857861bd1ede726781f69b394924b9e5850bd3e16868_train': {'roc_auc': 0.9987673469387754,
  'optimal_threshold': -0.5224516987800598,
  'conf_matrix': [[697, 3], [2, 698]],
  'precision': 0.9957203994293866,
  'recall': 0.9971428571428571,
  'f1': 0.9964311206281228,
  'accuracy': 0.9964285714285714,
  'tpr_at_fpr_0_01': 0.9914285714285714},
 '262f2f7c22304d368ea5ae3b6bbe5ef8f1ec041e72221ac21ca3eba27951b9f8_train': {'roc_auc': 1.0,
  'optimal_threshold': -0.48368963599205017,
  'conf_matrix': [[700, 0], [0, 700]],
  'precision': 1.0,
  'recall': 1.0,
  'f1': 1.0,
  'accuracy': 1.0,
  'tpr_at_fpr_0_01': 1.0}}