# 0. Setup

## 0.0 imports

In [10]:
import os
import sys
import numpy as np
import pandas as pd
from typing import Literal

BASE_DIR = "../../"
sys.path.append(BASE_DIR)

from src.general_functions_and_patterns_for_detection import (
    TrainRobertaHelper, TrainingDataHandler,
    RESULT_DIR, REGEX_CLEANED_FILES, ORIGINAL_DATA_DIR,
    seed_everything
)

SEED = 2023
seed_everything(SEED)

prepare_df_for_roberta_training = TrainRobertaHelper.prepare_df_for_roberta_training
import DetectRL.Detectors.train_roberta as train_roberta

DEBUG = True
DRY_RUN = False
ALL_DATA = True
RESULT_DIR = os.path.join(RESULT_DIR, "T01")
os.makedirs(RESULT_DIR, exist_ok=True)

In [2]:
# TODO: adjust CUDA setup depending on your setup
# Disable NCCL features incompatible with RTX 40xx
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

# Restrict to only GPU 0 (CUDA:0)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# 1. Train multi LLM across all domains

- check which data is worth to be recleaned --> typical pattern for each LLM individually that are present across multiple domains
- selection made based on the data, that is most likely AI generated on an uncleaned model

In [3]:
# PROMPTS=["direct_prompt", "prompt_few_shot", "prompt_SICO"]
PROMPTS=["paraphrase_polish_human", "paraphrase_polish_llm"]
_prompt_str = "_" + "-".join(PROMPTS)

df_claude_cleaned, df_llama_cleaned, df_palm_cleaned, df_chatgpt_cleaned = \
    TrainingDataHandler.load_dataframes_all_llms_all_domains(REGEX_CLEANED_FILES, _suffix_path="_cleaned_all_v3.parquet", prompts=PROMPTS, paraphrase_polish_human_as_ai=False)

cleaned_df: dict = {
    "claude": df_claude_cleaned, "llama": df_llama_cleaned, 
    "palm": df_palm_cleaned, "chatgpt": df_chatgpt_cleaned
}

df_claude_nc, df_llama_nc, df_palm_nc, df_chatgpt_nc = \
    TrainingDataHandler.load_dataframes_all_llms_all_domains(ORIGINAL_DATA_DIR, _suffix_path=".json", prompts=PROMPTS, paraphrase_polish_human_as_ai=False)

uncleaned_df: dict = {
    "claude": df_claude_nc, "llama": df_llama_nc, 
    "palm": df_palm_nc, "chatgpt": df_chatgpt_nc
}

In [4]:
# evaluate
# train using the cleaned domains and training structures
train_df_claude, test_df_claude, adjusted_df_claude, sample_ids_claude = \
    TrainingDataHandler.split_training_data_frame_and_adjust_transfer_test_df(df_claude_cleaned, df_claude_nc)

In [14]:
def modify_and_store_df(_df_results: pd.DataFrame, _llm_training: str, _transfer_df_dict: dict = None, _dfs_used_for_testing: list = None,
                        samples: Literal["cleaned", "original"] = "original"):
    _df_results = pd.DataFrame(_df_results).T
    if _dfs_used_for_testing is not None:
        _df_results["df_used_for_testing"] = _dfs_used_for_testing    
    elif _transfer_df_dict is not None:
        _df_results["df_used_for_testing"] = _transfer_df_dict.keys()
    else:
        raise ValueError("Either dict with transfer dataframes or list with dataframe names used for testing has to be provided")
    _df_results["domain_test"] = [PROMPTS for _ in range(len(_df_results))]
    _df_results["cleaned"] = _df_results["df_used_for_testing"].apply(lambda x: "nc" not in x)
    _df_results.to_csv(f"{RESULT_DIR}/{_llm_training}_trained_{samples}_samples_results{_prompt_str}.csv")
    return _df_results

## 1.1 Using cleaned data

In [6]:
print(df_llama_cleaned.shape, df_palm_cleaned.shape, df_chatgpt_cleaned.shape)
print(df_llama_nc.shape, df_palm_nc.shape, df_chatgpt_nc.shape)
print(adjusted_df_claude.shape, test_df_claude.shape)
adjusted_df_claude.tail()

(11186, 7) (10647, 7) (11197, 7)
(11200, 7) (11200, 7) (11200, 7)
(2240, 7) (2211, 7)


Unnamed: 0,id,context,llm_type,text,domain,label,llm_prompting_strategy
11177,2078,Barnes & Noble in Southpark is a great place t...,Claude-instant,Here is a polished 17 sentence continuation of...,yelp_review,llm,paraphrase_polish_llm
11183,2084,"So help me God, if I ever get married or have ...",Claude-instant,Here is a polished version of the review with ...,yelp_review,llm,paraphrase_polish_llm
11184,2085,I drive out here for these reasons: nnI like m...,Claude-instant,Here is my attempt at polishing the review whi...,yelp_review,llm,paraphrase_polish_llm
11186,2087,While in Charlotte for business we ventured to...,Claude-instant,Here is a polished 14 sentence review:During o...,yelp_review,llm,paraphrase_polish_llm
11193,2094,Oh upstream how I love thee.,Claude-instant,Here is my attempt at polishing the review whi...,yelp_review,llm,paraphrase_polish_llm


In [12]:
args = train_roberta.generate_args_for_training_roberta(
    train_df = train_df_claude, test_df = test_df_claude, transfer_df=[test_df_claude, df_llama_cleaned, df_palm_cleaned, df_chatgpt_cleaned, 
                                                                       adjusted_df_claude, df_llama_nc, df_palm_nc, df_chatgpt_nc],
    save_model_path=f"{RESULT_DIR}claude_direct_prompt_all_domains_multi_llm_cleaned{_prompt_str}"
)
results_cleaned_train = train_roberta.run(args)
df_cleaned_train = modify_and_store_df(results_cleaned_train, "claude", 
                                       _dfs_used_for_testing= ["claude_cleaned", "llama_cleaned", "palm_cleaned", "chatgpt_cleaned", 
                                                              "claude_nc", "llama_nc", "palm_nc", "chatgpt_nc"],
                                       samples="cleaned")
df_cleaned_train

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned
fd7e0fb49d10b6e5054a4b0f7a4d40ec633e3d4ef73f50ba7b748eacd56133c2_train,0.956643,-0.754963,"[[1452, 207], [29, 523]]",0.716438,0.947464,0.815913,0.893261,0.001812,claude_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
15268a30d1c466d21c6e16ba7b5a4b76d01ad9c5eb5b536f7e5cedc7762c67b8_train,0.94845,-0.974335,"[[7260, 1136], [288, 2502]]",0.687741,0.896774,0.778469,0.872698,0.000717,llama_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
2e8af8cbf8341eaae174ec2875ff8ec8147b74084f1a1da772338248b91727fa_train,0.945368,-0.994146,"[[6601, 1377], [167, 2502]]",0.645012,0.93743,0.764203,0.854983,0.001124,palm_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.975582,-0.176101,"[[7829, 568], [64, 2736]]",0.828087,0.977143,0.896461,0.943556,0.003214,chatgpt_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
7b0f4bc09e69e9439960e32e5c4443fe85a9da3fbc074172ca5a88577c0d5db7_train,0.961399,-0.966674,"[[1465, 215], [32, 528]]",0.710633,0.942857,0.810437,0.889732,0.025,claude_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
357d2f75d787f00787f2bfb31c367978d56fe2843e11ae420e2a3ccc538ad93e_train,0.944598,-0.984199,"[[7108, 1292], [271, 2529]]",0.661869,0.903214,0.763933,0.860446,0.000714,llama_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
277d294ec1bf7b980608af53a29c176de2de7c1ffd062c77f36eabc843565065_train,0.930352,-0.99487,"[[6655, 1745], [183, 2617]]",0.599954,0.934643,0.730801,0.827857,0.000357,palm_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.975549,-0.176101,"[[7834, 566], [66, 2734]]",0.828485,0.976429,0.896393,0.943571,0.003214,chatgpt_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False


## 1.2 Using original data

### Claude

In [16]:
# train using the uncleaned domains and training structures
# evaluate
train_df_claude_nc, test_df_claude_nc, adjusted_df_claude_cleaned, sample_ids_claude_nc = \
    TrainingDataHandler.split_training_data_frame_and_adjust_transfer_test_df(df_claude_nc, df_claude_cleaned)

args = train_roberta.generate_args_for_training_roberta(
    train_df = train_df_claude_nc, test_df = test_df_claude_nc, transfer_df=[adjusted_df_claude_cleaned, df_llama_cleaned, df_palm_cleaned, df_chatgpt_cleaned, test_df_claude_nc, df_llama_nc, df_palm_nc, df_chatgpt_nc],
    save_model_path=f"{RESULT_DIR}claude_direct_prompt_all_domains_multi_llm_not_cleaned{_prompt_str}"
)

results_nc_train = train_roberta.run(args)
df_not_cleaned_train = modify_and_store_df(results_nc_train, "claude", 
                                       _dfs_used_for_testing = ["claude_cleaned", "llama_cleaned", "palm_cleaned", "chatgpt_cleaned", 
                                                                "claude_nc", "llama_nc", "palm_nc", "chatgpt_nc"])
df_not_cleaned_train.to_csv(f"{RESULT_DIR}/claude_trained_original_samples_results{_prompt_str}.csv")

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned
fd7e0fb49d10b6e5054a4b0f7a4d40ec633e3d4ef73f50ba7b748eacd56133c2_train,0.960867,-0.940477,"[[1406, 253], [20, 532]]",0.677707,0.963768,0.795812,0.876526,0.021739,claude_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
15268a30d1c466d21c6e16ba7b5a4b76d01ad9c5eb5b536f7e5cedc7762c67b8_train,0.946975,-0.967297,"[[7281, 1115], [344, 2446]]",0.686886,0.876703,0.770272,0.869569,0.001434,llama_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
2e8af8cbf8341eaae174ec2875ff8ec8147b74084f1a1da772338248b91727fa_train,0.941164,-0.994278,"[[6597, 1381], [220, 2449]]",0.639426,0.917572,0.753654,0.849629,0.030723,palm_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.983951,-0.136667,"[[7794, 603], [84, 2716]]",0.818319,0.97,0.887727,0.938644,0.0075,chatgpt_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True
7b0f4bc09e69e9439960e32e5c4443fe85a9da3fbc074172ca5a88577c0d5db7_train,0.961763,-0.928909,"[[1421, 259], [22, 538]]",0.675031,0.960714,0.792926,0.874554,0.025,claude_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
357d2f75d787f00787f2bfb31c367978d56fe2843e11ae420e2a3ccc538ad93e_train,0.946071,-0.96733,"[[7289, 1111], [351, 2449]]",0.687921,0.874643,0.770126,0.869464,0.001429,llama_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
277d294ec1bf7b980608af53a29c176de2de7c1ffd062c77f36eabc843565065_train,0.931743,-0.995067,"[[6563, 1837], [206, 2594]]",0.585421,0.926429,0.717466,0.817589,0.030357,palm_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.983967,-0.136667,"[[7798, 602], [83, 2717]]",0.81862,0.970357,0.888054,0.938839,0.007857,chatgpt_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False


### Llama

In [17]:
_llm_temp = "llama"
# train using the uncleaned domains and training structures
# evaluate
train_df, test_df, adjusted_df_cleaned, _ = \
    TrainingDataHandler.split_training_data_frame_and_adjust_transfer_test_df(uncleaned_df[_llm_temp], cleaned_df[_llm_temp])

transfer_dfs = {f"adjusted_df_cleaned_{_llm_temp}": adjusted_df_cleaned}
transfer_dfs.update({k+"_cleaned": df for k, df in cleaned_df.items() if k != _llm_temp})
transfer_dfs.update({f"test_df_nc_{_llm_temp}": test_df})
transfer_dfs.update({k+"_uncleaned": df for k, df in uncleaned_df.items() if k != _llm_temp})

args = train_roberta.generate_args_for_training_roberta(
    train_df = train_df, test_df = test_df, transfer_df=list(transfer_dfs.values()),
    save_model_path=f"{RESULT_DIR}{_llm_temp}_direct_prompt_all_domains_multi_llm_not_cleaned{_prompt_str}"
)
results_nc_train = train_roberta.run(args)

df_not_cleaned_train = modify_and_store_df(results_nc_train, _llm_temp, _transfer_df_dict=transfer_dfs)
df_not_cleaned_train

  return forward_call(*args, **kwargs)


{'loss': 0.4828, 'grad_norm': 5.453637599945068, 'learning_rate': 8.349867724867724e-07, 'epoch': 0.49603174603174605}
{'loss': 0.2526, 'grad_norm': 3.4856503009796143, 'learning_rate': 6.69642857142857e-07, 'epoch': 0.9920634920634921}
{'eval_loss': 0.2544172704219818, 'eval_accuracy': 0.9944196428571429, 'eval_f1': 0.9944196428571429, 'eval_precision': 0.9944196428571429, 'eval_recall': 0.9944196428571429, 'eval_runtime': 8.5938, 'eval_samples_per_second': 104.261, 'eval_steps_per_second': 13.033, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.1495, 'grad_norm': 1.2772891521453857, 'learning_rate': 5.042989417989418e-07, 'epoch': 1.4880952380952381}
{'loss': 0.1469, 'grad_norm': 0.2134893536567688, 'learning_rate': 3.3895502645502644e-07, 'epoch': 1.9841269841269842}
{'eval_loss': 0.051856767386198044, 'eval_accuracy': 0.9866071428571429, 'eval_f1': 0.9866071428571429, 'eval_precision': 0.9866071428571429, 'eval_recall': 0.9866071428571429, 'eval_runtime': 8.5694, 'eval_samples_per_second': 104.558, 'eval_steps_per_second': 13.07, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 0.1368, 'grad_norm': 0.27638953924179077, 'learning_rate': 1.736111111111111e-07, 'epoch': 2.4801587301587302}
{'loss': 0.1219, 'grad_norm': 38.08544921875, 'learning_rate': 8.267195767195766e-09, 'epoch': 2.9761904761904763}
{'eval_loss': 0.043106745928525925, 'eval_accuracy': 0.9899553571428571, 'eval_f1': 0.9899553571428571, 'eval_precision': 0.9899553571428571, 'eval_recall': 0.9899553571428571, 'eval_runtime': 8.5856, 'eval_samples_per_second': 104.361, 'eval_steps_per_second': 13.045, 'epoch': 3.0}
{'train_runtime': 844.8612, 'train_samples_per_second': 28.634, 'train_steps_per_second': 3.579, 'train_loss': 0.21424322390051745, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.043106745928525925, 'eval_accuracy': 0.9899553571428571, 'eval_f1': 0.9899553571428571, 'eval_precision': 0.9899553571428571, 'eval_recall': 0.9899553571428571, 'eval_runtime': 8.5756, 'eval_samples_per_second': 104.482, 'eval_steps_per_second': 13.06, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned
42ac23415e356a37eeac963961b928a57026c98d5a6e449ffec9fb7707cd82e5_train,0.972394,-0.019174,"[[1578, 102], [9, 549]]",0.843318,0.983871,0.908189,0.950402,0.003584,adjusted_df_cleaned_llama,"[paraphrase_polish_human, paraphrase_polish_llm]",True
4387ef05e11901e66121939b9e0f6fbe0eb04fbd1f22589ab8f5ff446db6e27e_train,0.927691,-0.998424,"[[6751, 1556], [283, 2491]]",0.615518,0.897981,0.730391,0.83404,0.002163,claude_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
2e8af8cbf8341eaae174ec2875ff8ec8147b74084f1a1da772338248b91727fa_train,0.969946,-0.618815,"[[7480, 498], [203, 2466]]",0.831984,0.923942,0.875555,0.93416,0.0,palm_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.988257,-0.021371,"[[8119, 278], [34, 2766]]",0.908673,0.987857,0.946612,0.972135,0.002857,chatgpt_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
8c970afc0a2447163e3ace1b5f92ebeb818110163b88cf616ef13e11ba4dbd78_train,0.972781,-0.019174,"[[1578, 102], [10, 550]]",0.843558,0.982143,0.907591,0.95,0.003571,test_df_nc_llama,"[paraphrase_polish_human, paraphrase_polish_llm]",False
d8648f9b583ae727f79759db0101edcdaaee2436e882146a1708a9b0a2dd7841_train,0.92077,-0.998367,"[[6915, 1485], [366, 2434]]",0.621077,0.869286,0.724513,0.834732,0.003571,claude_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
277d294ec1bf7b980608af53a29c176de2de7c1ffd062c77f36eabc843565065_train,0.966671,-0.047691,"[[8041, 359], [362, 2438]]",0.871648,0.870714,0.871181,0.935625,0.0,palm_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.988309,-0.017557,"[[8144, 256], [41, 2759]]",0.915091,0.985357,0.948925,0.973482,0.002857,chatgpt_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False


### Palm

In [18]:
_llm_temp = "palm"
# train using the uncleaned domains and training structures
# evaluate
train_df, test_df, adjusted_df_cleaned, _ = \
    TrainingDataHandler.split_training_data_frame_and_adjust_transfer_test_df(uncleaned_df[_llm_temp], cleaned_df[_llm_temp])

transfer_dfs = {f"adjusted_df_cleaned_{_llm_temp}": adjusted_df_cleaned}
transfer_dfs.update({k+"_cleaned": df for k, df in cleaned_df.items() if k != _llm_temp})
transfer_dfs.update({f"test_df_nc_{_llm_temp}": test_df})
transfer_dfs.update({k+"_uncleaned": df for k, df in uncleaned_df.items() if k != _llm_temp})

args = train_roberta.generate_args_for_training_roberta(
    train_df = train_df, test_df = test_df, transfer_df=list(transfer_dfs.values()),
    save_model_path=f"{RESULT_DIR}{_llm_temp}_direct_prompt_all_domains_multi_llm_not_cleaned{_prompt_str}"
)
results_nc_train = train_roberta.run(args)

df_not_cleaned_train = modify_and_store_df(results_nc_train, _llm_temp, _transfer_df_dict=transfer_dfs)
df_not_cleaned_train

  return forward_call(*args, **kwargs)


{'loss': 0.4867, 'grad_norm': 7.516754627227783, 'learning_rate': 8.349867724867724e-07, 'epoch': 0.49603174603174605}
{'loss': 0.2812, 'grad_norm': 27.3880558013916, 'learning_rate': 6.69642857142857e-07, 'epoch': 0.9920634920634921}
{'eval_loss': 0.6746987104415894, 'eval_accuracy': 0.8002232142857143, 'eval_f1': 0.8002232142857143, 'eval_precision': 0.8002232142857143, 'eval_recall': 0.8002232142857143, 'eval_runtime': 8.462, 'eval_samples_per_second': 105.885, 'eval_steps_per_second': 13.236, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.207, 'grad_norm': 37.83607864379883, 'learning_rate': 5.042989417989418e-07, 'epoch': 1.4880952380952381}
{'loss': 0.1726, 'grad_norm': 0.4003298580646515, 'learning_rate': 3.3895502645502644e-07, 'epoch': 1.9841269841269842}
{'eval_loss': 0.295428991317749, 'eval_accuracy': 0.9185267857142857, 'eval_f1': 0.9185267857142857, 'eval_precision': 0.9185267857142857, 'eval_recall': 0.9185267857142857, 'eval_runtime': 8.4316, 'eval_samples_per_second': 106.267, 'eval_steps_per_second': 13.283, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 0.1616, 'grad_norm': 0.4697631597518921, 'learning_rate': 1.736111111111111e-07, 'epoch': 2.4801587301587302}
{'loss': 0.1562, 'grad_norm': 0.16491959989070892, 'learning_rate': 8.267195767195766e-09, 'epoch': 2.9761904761904763}
{'eval_loss': 0.33069083094596863, 'eval_accuracy': 0.9174107142857143, 'eval_f1': 0.9174107142857143, 'eval_precision': 0.9174107142857143, 'eval_recall': 0.9174107142857143, 'eval_runtime': 8.4243, 'eval_samples_per_second': 106.36, 'eval_steps_per_second': 13.295, 'epoch': 3.0}
{'train_runtime': 842.9332, 'train_samples_per_second': 28.7, 'train_steps_per_second': 3.587, 'train_loss': 0.24400247057909688, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.33069083094596863, 'eval_accuracy': 0.9174107142857143, 'eval_f1': 0.9174107142857143, 'eval_precision': 0.9174107142857143, 'eval_recall': 0.9174107142857143, 'eval_runtime': 8.4203, 'eval_samples_per_second': 106.41, 'eval_steps_per_second': 13.301, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned
f70ba38644f688500f57994260d5b5f312fec7bf5bb66ffed666b2846faf2132_train,0.978056,-0.186382,"[[1482, 103], [36, 497]]",0.828333,0.932458,0.877317,0.934372,0.028143,adjusted_df_cleaned_palm,"[paraphrase_polish_human, paraphrase_polish_llm]",True
4387ef05e11901e66121939b9e0f6fbe0eb04fbd1f22589ab8f5ff446db6e27e_train,0.929727,-0.993838,"[[6774, 1533], [312, 2462]]",0.61627,0.887527,0.727434,0.833499,0.011175,claude_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
15268a30d1c466d21c6e16ba7b5a4b76d01ad9c5eb5b536f7e5cedc7762c67b8_train,0.967572,-0.812387,"[[7345, 1051], [112, 2678]]",0.718155,0.959857,0.821598,0.896031,0.00681,llama_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.99231,-0.011005,"[[8061, 336], [56, 2744]]",0.890909,0.98,0.933333,0.964991,0.003571,chatgpt_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
b099a90d99cf3f966c67518684a866bce99ac213e026a4d31fa5bee5ff6d1235_train,0.975207,-0.184362,"[[1576, 104], [46, 514]]",0.831715,0.917857,0.872666,0.933036,0.026786,test_df_nc_palm,"[paraphrase_polish_human, paraphrase_polish_llm]",False
d8648f9b583ae727f79759db0101edcdaaee2436e882146a1708a9b0a2dd7841_train,0.928356,-0.993868,"[[6840, 1560], [307, 2493]]",0.6151,0.890357,0.727565,0.833304,0.006786,claude_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
357d2f75d787f00787f2bfb31c367978d56fe2843e11ae420e2a3ccc538ad93e_train,0.96767,-0.755112,"[[7385, 1015], [125, 2675]]",0.724932,0.955357,0.824345,0.898214,0.006429,llama_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.992314,-0.011005,"[[8063, 337], [57, 2743]]",0.890584,0.979643,0.932993,0.964821,0.002857,chatgpt_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False


### ChatGPT

In [19]:
_llm_temp = "chatgpt"
# train using the uncleaned domains and training structures
# evaluate
train_df, test_df, adjusted_df_cleaned, _ = \
    TrainingDataHandler.split_training_data_frame_and_adjust_transfer_test_df(uncleaned_df[_llm_temp], cleaned_df[_llm_temp])

transfer_dfs = {f"adjusted_df_cleaned_{_llm_temp}": adjusted_df_cleaned}
transfer_dfs.update({k+"_cleaned": df for k, df in cleaned_df.items() if k != _llm_temp})
transfer_dfs.update({f"test_df_nc_{_llm_temp}": test_df})
transfer_dfs.update({k+"_uncleaned": df for k, df in uncleaned_df.items() if k != _llm_temp})

args = train_roberta.generate_args_for_training_roberta(
    train_df = train_df, test_df = test_df, transfer_df=list(transfer_dfs.values()),
    save_model_path=f"{RESULT_DIR}{_llm_temp}_direct_prompt_all_domains_multi_llm_not_cleaned{_prompt_str}"
)
results_nc_train = train_roberta.run(args)

df_not_cleaned_train = modify_and_store_df(results_nc_train, _llm_temp, _transfer_df_dict=transfer_dfs)
df_not_cleaned_train

  return forward_call(*args, **kwargs)


{'loss': 0.4192, 'grad_norm': 2.6958460807800293, 'learning_rate': 8.349867724867724e-07, 'epoch': 0.49603174603174605}
{'loss': 0.0784, 'grad_norm': 7.502600193023682, 'learning_rate': 6.69642857142857e-07, 'epoch': 0.9920634920634921}
{'eval_loss': 0.12219888716936111, 'eval_accuracy': 0.9654017857142857, 'eval_f1': 0.9654017857142857, 'eval_precision': 0.9654017857142857, 'eval_recall': 0.9654017857142857, 'eval_runtime': 8.6474, 'eval_samples_per_second': 103.615, 'eval_steps_per_second': 12.952, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0679, 'grad_norm': 0.44616591930389404, 'learning_rate': 5.042989417989418e-07, 'epoch': 1.4880952380952381}
{'loss': 0.0475, 'grad_norm': 0.03494182229042053, 'learning_rate': 3.3895502645502644e-07, 'epoch': 1.9841269841269842}
{'eval_loss': 0.04229801893234253, 'eval_accuracy': 0.9910714285714286, 'eval_f1': 0.9910714285714286, 'eval_precision': 0.9910714285714286, 'eval_recall': 0.9910714285714286, 'eval_runtime': 8.6133, 'eval_samples_per_second': 104.025, 'eval_steps_per_second': 13.003, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 0.0464, 'grad_norm': 0.020010611042380333, 'learning_rate': 1.736111111111111e-07, 'epoch': 2.4801587301587302}
{'loss': 0.0482, 'grad_norm': 0.12514695525169373, 'learning_rate': 8.267195767195766e-09, 'epoch': 2.9761904761904763}
{'eval_loss': 0.03483090177178383, 'eval_accuracy': 0.9921875, 'eval_f1': 0.9921875, 'eval_precision': 0.9921875, 'eval_recall': 0.9921875, 'eval_runtime': 8.6156, 'eval_samples_per_second': 103.997, 'eval_steps_per_second': 13.0, 'epoch': 3.0}
{'train_runtime': 847.9006, 'train_samples_per_second': 28.532, 'train_steps_per_second': 3.566, 'train_loss': 0.11750469246396313, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.03483090177178383, 'eval_accuracy': 0.9921875, 'eval_f1': 0.9921875, 'eval_precision': 0.9921875, 'eval_recall': 0.9921875, 'eval_runtime': 8.6068, 'eval_samples_per_second': 104.104, 'eval_steps_per_second': 13.013, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned
cc23eb4ef7575015f3f8ad6380b2e1249f4b18fba614492a5cc21edcf01b7689_train,0.995107,-0.002935,"[[1652, 26], [9, 551]]",0.954939,0.983929,0.969217,0.984361,0.025,adjusted_df_cleaned_chatgpt,"[paraphrase_polish_human, paraphrase_polish_llm]",True
4387ef05e11901e66121939b9e0f6fbe0eb04fbd1f22589ab8f5ff446db6e27e_train,0.932084,-0.999554,"[[7008, 1299], [321, 2453]]",0.653785,0.884283,0.751762,0.853804,0.001442,claude_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
15268a30d1c466d21c6e16ba7b5a4b76d01ad9c5eb5b536f7e5cedc7762c67b8_train,0.965621,-0.99947,"[[7557, 839], [195, 2595]]",0.755679,0.930108,0.833869,0.907563,0.000717,llama_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
2e8af8cbf8341eaae174ec2875ff8ec8147b74084f1a1da772338248b91727fa_train,0.953195,-0.999534,"[[7208, 770], [296, 2373]]",0.755011,0.889097,0.816586,0.899878,0.002623,palm_cleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",True
c01b13da64ea707c8f2c88ceed8230175060190724fc3094021851ea6e4fa45a_train,0.99512,-0.006331,"[[1651, 29], [8, 552]]",0.950086,0.985714,0.967572,0.983482,0.025,test_df_nc_chatgpt,"[paraphrase_polish_human, paraphrase_polish_llm]",False
d8648f9b583ae727f79759db0101edcdaaee2436e882146a1708a9b0a2dd7841_train,0.911325,-0.99957,"[[7048, 1352], [445, 2355]]",0.635285,0.841071,0.723836,0.839554,0.000357,claude_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
357d2f75d787f00787f2bfb31c367978d56fe2843e11ae420e2a3ccc538ad93e_train,0.963928,-0.99947,"[[7570, 830], [218, 2582]]",0.756741,0.922143,0.831294,0.906429,0.000357,llama_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False
277d294ec1bf7b980608af53a29c176de2de7c1ffd062c77f36eabc843565065_train,0.937213,-0.999541,"[[7285, 1115], [331, 2469]]",0.688895,0.881786,0.773496,0.870893,0.002857,palm_uncleaned,"[paraphrase_polish_human, paraphrase_polish_llm]",False


# 2. Result Evaluation 

In [22]:
f"{RESULT_DIR}/claude_trained_original_samples_results{_prompt_str}.csv"

'/mnt/hdd-baracuda/pdingfelder/mt_philipp_dingfelder_generated_text_detection/src/../results/T01/claude_trained_original_samples_results_paraphrase_polish_human-paraphrase_polish_llm.csv'

In [23]:
df_cleaned_train_claude = pd.read_csv(f"{RESULT_DIR}/claude_trained_cleaned_samples_results{_prompt_str}.csv", index_col=0)
df_cleaned_train_claude["llm_train"] = "Claude-instant_cleaned"
df_cleaned_train_claude["llm_test"] = ["Claude-instant", "Llama-2-70b", "Google-PaLM", "ChatGPT", "Claude-instant", "Llama-2-70b", "Google-PaLM", "ChatGPT"]

df_not_cleaned_claude = pd.read_csv(f"{RESULT_DIR}/claude_trained_original_samples_results{_prompt_str}.csv", index_col=0)
df_not_cleaned_claude["llm_train"] = "Claude-instant_uncleaned"
df_not_cleaned_claude["llm_test"] = ["Claude-instant", "Llama-2-70b", "Google-PaLM", "ChatGPT", "Claude-instant", "Llama-2-70b", "Google-PaLM", "ChatGPT"]

df_claude_multi_domain_eval_multi_llm = pd.concat([df_cleaned_train_claude, df_not_cleaned_claude])
df_claude_multi_domain_eval_multi_llm.sort_values(by=["llm_test", "llm_train", "cleaned"])

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,df_used_for_testing,domain_test,cleaned,llm_train,llm_test
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.975549,-0.176101,"[[7834, 566], [66, 2734]]",0.828485,0.976429,0.896393,0.943571,0.003214,chatgpt_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False,Claude-instant_cleaned,ChatGPT
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.975582,-0.176101,"[[7829, 568], [64, 2736]]",0.828087,0.977143,0.896461,0.943556,0.003214,chatgpt_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True,Claude-instant_cleaned,ChatGPT
a617f5d6441618fcb86810cddd0538c3b18dd27901b0be621495d69be376d7c4_train,0.983967,-0.136667,"[[7798, 602], [83, 2717]]",0.81862,0.970357,0.888054,0.938839,0.007857,chatgpt_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False,Claude-instant_uncleaned,ChatGPT
9d000c4d3a86c02e1ec8e6362801ee731c46b2e4e92716642bed23d09fb55bad_train,0.983951,-0.136667,"[[7794, 603], [84, 2716]]",0.818319,0.97,0.887727,0.938644,0.0075,chatgpt_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True,Claude-instant_uncleaned,ChatGPT
7b0f4bc09e69e9439960e32e5c4443fe85a9da3fbc074172ca5a88577c0d5db7_train,0.961399,-0.966674,"[[1465, 215], [32, 528]]",0.710633,0.942857,0.810437,0.889732,0.025,claude_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False,Claude-instant_cleaned,Claude-instant
fd7e0fb49d10b6e5054a4b0f7a4d40ec633e3d4ef73f50ba7b748eacd56133c2_train,0.956643,-0.754963,"[[1452, 207], [29, 523]]",0.716438,0.947464,0.815913,0.893261,0.001812,claude_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True,Claude-instant_cleaned,Claude-instant
7b0f4bc09e69e9439960e32e5c4443fe85a9da3fbc074172ca5a88577c0d5db7_train,0.961763,-0.928909,"[[1421, 259], [22, 538]]",0.675031,0.960714,0.792926,0.874554,0.025,claude_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False,Claude-instant_uncleaned,Claude-instant
fd7e0fb49d10b6e5054a4b0f7a4d40ec633e3d4ef73f50ba7b748eacd56133c2_train,0.960867,-0.940477,"[[1406, 253], [20, 532]]",0.677707,0.963768,0.795812,0.876526,0.021739,claude_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True,Claude-instant_uncleaned,Claude-instant
277d294ec1bf7b980608af53a29c176de2de7c1ffd062c77f36eabc843565065_train,0.930352,-0.99487,"[[6655, 1745], [183, 2617]]",0.599954,0.934643,0.730801,0.827857,0.000357,palm_nc,"['paraphrase_polish_human', 'paraphrase_polish...",False,Claude-instant_cleaned,Google-PaLM
2e8af8cbf8341eaae174ec2875ff8ec8147b74084f1a1da772338248b91727fa_train,0.945368,-0.994146,"[[6601, 1377], [167, 2502]]",0.645012,0.93743,0.764203,0.854983,0.001124,palm_cleaned,"['paraphrase_polish_human', 'paraphrase_polish...",True,Claude-instant_cleaned,Google-PaLM


In [24]:
df_claude_multi_domain_eval_multi_llm[["roc_auc", "f1", "accuracy", "tpr_at_fpr_0_01"]] = df_claude_multi_domain_eval_multi_llm[["roc_auc", "f1", "accuracy", "tpr_at_fpr_0_01"]].astype(np.float32)
df_claude_multi_domain_eval_multi_llm_wo_claude = df_claude_multi_domain_eval_multi_llm[df_claude_multi_domain_eval_multi_llm["llm_test"]!="Claude-instant"]
df_claude_multi_domain_eval_multi_llm_wo_claude[["roc_auc", "f1", "accuracy", "tpr_at_fpr_0_01", "llm_train", "cleaned"]].groupby(by=["llm_train", "cleaned"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,roc_auc,f1,accuracy,tpr_at_fpr_0_01
llm_train,cleaned,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Claude-instant_cleaned,False,0.950166,0.797043,0.877292,0.001429
Claude-instant_cleaned,True,0.956467,0.813044,0.890412,0.001685
Claude-instant_uncleaned,False,0.953927,0.791882,0.875298,0.013214
Claude-instant_uncleaned,True,0.957364,0.803885,0.885947,0.013219
