 # 0. Setup

## 0.0 imports

In [1]:
import os
import sys
import logging
import random
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
import json
from sklearn.model_selection import train_test_split
import transformers

# BASE_DIR_DETECT_RL = "/mnt/hdd-baracuda/pdingfelder/Masterarbeit/DetectRL"
BASE_DIR_DETECT_RL = "../"
sys.path.append(f"{BASE_DIR_DETECT_RL}/Detectors/")
sys.path.append(f"{BASE_DIR_DETECT_RL}/../datasets/")
RESULT_DIR = f"{BASE_DIR_DETECT_RL}/Detector_Results/"

import train_roberta
from metrics import get_roc_metric_result
from types import SimpleNamespace

DRY_RUN = False
ALL_DATA = True
SEED = 2023
logging.basicConfig(level=logging.WARNING, format="%(asctime)s %(levelname)s %(message)s")

ModuleNotFoundError: No module named 'numpy'

In [2]:
task_path = f"{BASE_DIR_DETECT_RL}/Benchmark/Tasks/"
task1_path = f"{task_path}/Task1/"
task2_path = f"{task_path}/Task2/"

# Disable NCCL features incompatible with RTX 40xx
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

# Restrict to only GPU 0 (CUDA:0)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
LLMs = ["ChatGPT", "Claude-instant", "Llama-2-70b", "Google-PaLM"]

def filter_for_one_llm(_df, column_llm="llm_type", llm_to_filter="Claude-instant"):
    return _df[_df[column_llm]==llm_to_filter]

def load_dataframe_from_json(_json_path, filter_llm: bool = False, column_llm: str = "llm_type", llm_to_filter: str= "Claude-instant"):
    # Load JSON data
    with open(_json_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    # Convert to DataFrame
    df = pd.DataFrame(data) 
    
    if filter_llm:
        df = filter_for_one_llm(df, column_llm, llm_to_filter)
    return df

## 0.1 Take a look at the different paths

In [3]:
import hashlib

task1_dir = task1_path
task2_dir = task2_path

unequal_rows_df = pd.DataFrame()
equal_count = 0
unequal_count = 0

def file_hash(filepath):
    """Return the SHA256 hash of a file."""
    with open(filepath, "rb") as f:
        return hashlib.sha256(f.read()).hexdigest()

for filename in os.listdir(task2_dir):
    if filename.endswith(".json"):
        path2 = os.path.join(task2_dir, filename)
        path1 = os.path.join(task1_dir, filename)

        if not os.path.exists(path1):
            print(f"File {filename} not found in Task1. Skipping.")
            continue

        hash1 = file_hash(path1)
        hash2 = file_hash(path2)

        if hash1 == hash2:
            df = pd.read_json(path2, encoding="utf-8")
            equal_count += len(df)
        else:
            print(path1, "and", path2, "are not the same by hash, checking for the rows")
            df1 = pd.read_json(path1)
            df2 = pd.read_json(path2)

            # Align columns to prevent misalignment
            common_columns = df1.columns.intersection(df2.columns)
            df1 = df1[common_columns]
            df2 = df2[common_columns]

            matches = df1.equals(df2)
            if matches:
                equal_count += len(df1)
            else:
                # Compare row-wise
                comparison = df1.eq(df2)
                row_equality = comparison.all(axis=1)

                equal_rows = df2[row_equality]
                unequal_rows = df2[~row_equality]

                equal_count += len(equal_rows)
                unequal_count += len(unequal_rows)

                unequal_rows_df = pd.concat([unequal_rows_df, unequal_rows], ignore_index=True)

print(f"Equal rows: {equal_count}")
print(f"Unequal rows: {unequal_count}")

Equal rows: 349165
Unequal rows: 0


In [4]:
for llm in LLMs:
    other_LLMs = list(set(LLMs) - {llm})

    # Train and test for the current LLM
    train_df = load_dataframe_from_json(os.path.join(task1_path, f"multi_llms_{llm}_train.json"))
    test_df = load_dataframe_from_json(os.path.join(task1_path, f"multi_llms_{llm}_test.json"))
    
    if llm == "ChatGPT":
        break
        
print(train_df.columns, test_df.columns)

Index(['text', 'label', 'data_type', 'llm_type'], dtype='object') Index(['text', 'label', 'data_type', 'llm_type'], dtype='object')


In [5]:
train_df[["label"]].value_counts()

label
llm      24187
human     1800
Name: count, dtype: int64

In [6]:
human_df_1 = train_df[train_df["label"]=="human"]
human_df_1["data_type"].value_counts()

data_type
abstract    450
document    450
story       450
content     450
Name: count, dtype: int64

In [7]:
train_df[["data_type", "label"]].value_counts()

data_type                        label
adversarial_character_llm        llm      2688
adversarial_word_llm             llm      2688
adversarial_character_word_llm   llm      2688
paraphrase_back_translation_llm  llm      2688
direct_prompt                    llm      2688
prompt_few_shot                  llm      2688
paraphrase_polish_llm            llm      2688
prompt_SICO                      llm      2688
paraphrase_dipper_llm            llm      2683
content                          human     450
abstract                         human     450
document                         human     450
story                            human     450
Name: count, dtype: int64

In [8]:
test_df[["data_type", "label", "llm_type"]].value_counts()


data_type                        label  llm_type
abstract                         human  ChatGPT     250
content                          human  ChatGPT     250
document                         human  ChatGPT     250
story                            human  ChatGPT     250
adversarial_character_llm        llm    ChatGPT     112
adversarial_word_llm             llm    ChatGPT     112
adversarial_character_word_llm   llm    ChatGPT     112
paraphrase_back_translation_llm  llm    ChatGPT     112
direct_prompt                    llm    ChatGPT     112
paraphrase_dipper_llm            llm    ChatGPT     112
paraphrase_polish_llm            llm    ChatGPT     112
prompt_SICO                      llm    ChatGPT     112
prompt_few_shot                  llm    ChatGPT     112
Name: count, dtype: int64

In [9]:
for llm in LLMs:
    other_LLMs = list(set(LLMs) - {llm})

    # Train and test for the current LLM
    train_df = load_dataframe_from_json(os.path.join(task2_path, f"multi_llms_{llm}_train.json"))
    test_df = load_dataframe_from_json(os.path.join(task2_path, f"multi_llms_{llm}_test.json"))
    
    if llm == "ChatGPT":
        break

train_df[["data_type", "label", "llm_type"]].value_counts()

data_type                        label  llm_type
adversarial_character_llm        llm    ChatGPT     2688
adversarial_word_llm             llm    ChatGPT     2688
adversarial_character_word_llm   llm    ChatGPT     2688
paraphrase_back_translation_llm  llm    ChatGPT     2688
direct_prompt                    llm    ChatGPT     2688
prompt_few_shot                  llm    ChatGPT     2688
paraphrase_polish_llm            llm    ChatGPT     2688
prompt_SICO                      llm    ChatGPT     2688
paraphrase_dipper_llm            llm    ChatGPT     2683
content                          human  ChatGPT      450
abstract                         human  ChatGPT      450
document                         human  ChatGPT      450
story                            human  ChatGPT      450
Name: count, dtype: int64

In [10]:
print(test_df["label"].value_counts())
test_df[["data_type", "label", "llm_type"]].value_counts()


label
llm      1008
human    1000
Name: count, dtype: int64


data_type                        label  llm_type
abstract                         human  ChatGPT     250
content                          human  ChatGPT     250
document                         human  ChatGPT     250
story                            human  ChatGPT     250
adversarial_character_llm        llm    ChatGPT     112
adversarial_word_llm             llm    ChatGPT     112
adversarial_character_word_llm   llm    ChatGPT     112
paraphrase_back_translation_llm  llm    ChatGPT     112
direct_prompt                    llm    ChatGPT     112
paraphrase_dipper_llm            llm    ChatGPT     112
paraphrase_polish_llm            llm    ChatGPT     112
prompt_SICO                      llm    ChatGPT     112
prompt_few_shot                  llm    ChatGPT     112
Name: count, dtype: int64

# 1. Training

## 1.0 General functions

In [11]:
def get_info_based_on_input_path(input_path):
    if "arxiv" in input_path:
        domain = "arxiv"
        prompt_key = "title"
        human_key = "abstract"
    elif "xsum" in input_path:
        domain = "xsum"
        prompt_key = "summary"
        human_key = "document"
    elif "writing_prompt" in input_path:
        domain = "writing_prompt"
        prompt_key = "story_prompt"
        human_key = "story"
    elif "yelp_review" in input_path:
        domain = "yelp_review"
        prompt_key = "start"
        human_key = "content"
    else: raise ValueError(f"Cannot find mapping for the input path: {input_path}")
    return domain, prompt_key, human_key

In [12]:
def prepare_df_for_roberta_training(df, column_to_be_used_for_text: str = "direct_prompt", column_to_be_used_for_human: str = "abstract",
                                    column_title: str = "title"):
    # Subset for human annotations
    df_human = df.loc[:, ["id", column_title, "llm_type", column_to_be_used_for_human]]
    df_human["label"] = "human"
    df_human = df_human.rename(columns={
        column_to_be_used_for_human: "text"
    })
    
    # Subset for LLM annotations
    df_llm = df.loc[:, ["id", column_title, "llm_type", column_to_be_used_for_text]]
    df_llm["label"] = "llm"
    df_llm = df_llm.rename(columns={
        column_to_be_used_for_text: "text"
    })
    
    # Combine both
    df_combined = pd.concat([df_human, df_llm], ignore_index=True)
    df_combined = df_combined.dropna(subset=["label", "text"], how="any")
    df_combined["text"].astype(str)
    return df_combined

In [19]:
DOMAINS = ["arxiv", "writing_prompt", "xsum", "yelp_review"]
LLMS = ["Claude-instant", "Llama-2-70b", "Google-PaLM", "ChatGPT"]
LLM_PROMPTS = ['direct_prompt', 'paraphrase_polish_human', 'paraphrase_polish_llm', 'prompt_few_shot',
       'prompt_SICO']
# TRAINING_DIR_CLEANED = f"{BASE_DIR_DETECT_RL}/../results/cleaned_detectRL_files/"
TRAINING_DIR_CLEANED = f"{BASE_DIR_DETECT_RL}../results/"
TRAINING_DIR_ORIGINAL = f"{BASE_DIR_DETECT_RL}/Benchmark/Original_Dataset/"

## 1.1 Aggregate Training Results

In [20]:
os.path.join(os.getcwd(), TRAINING_DIR_CLEANED)

'/mnt/hdd-baracuda/pdingfelder/Masterarbeit/DetectRL/Detectors/../../results/'

In [22]:
os.listdir(os.path.join(os.getcwd(), TRAINING_DIR_CLEANED))

['data_generation',
 'modified_Binoculars_results',
 'statistical_dataset_modified_classifier_results',
 'cleaned_detectRL_files',
 'writing_prompt_2800_cleaned_all_v2.parquet',
 'xsum_2800_cleaned_all_v2.parquet',
 'data_cleaning_gpt_4-1',
 'cleaned_vs_uncleaned_data_evaluation',
 'arxiv_2800_cleaned_all_v2.parquet',
 'yelp_review_2800_cleaned_all_v2.parquet']

In [25]:
data_paths = [TRAINING_DIR_CLEANED, TRAINING_DIR_ORIGINAL]
# DOMAINS = ["writing_prompt"]
result_list = []
count_runs_done = 0
count_runs_missing = 0

for h, _domain in enumerate(DOMAINS):
    for j, _llm in enumerate(LLMS):
        for k, prompt in enumerate(LLM_PROMPTS):
            # for i, _train_path in enumerate(data_paths):
            #     if _train_path.startswith(TRAINING_DIR_ORIGINAL):
                    
                # else:
            
            training_df_original = load_dataframe_from_json(f"{TRAINING_DIR_ORIGINAL}{_domain}_2800.json")
            training_df_cleaned = pd.read_parquet(f"{TRAINING_DIR_CLEANED}{_domain}_2800_cleaned_all_v2.parquet")
            _, prompt_key, human_key = get_info_based_on_input_path(_domain)
            training_df_original = prepare_df_for_roberta_training(training_df_original, column_to_be_used_for_text=prompt,
                                                          column_to_be_used_for_human=human_key, column_title=prompt_key)
            training_df_cleaned = prepare_df_for_roberta_training(training_df_cleaned, column_to_be_used_for_text=prompt,
                                                          column_to_be_used_for_human=human_key, column_title=prompt_key)
            training_df_original = train_roberta.hash_dataframe_as_parquet(training_df_original[training_df_original["llm_type"]==_llm])
            training_df_cleaned = train_roberta.hash_dataframe_as_parquet(training_df_cleaned[training_df_cleaned["llm_type"]==_llm])
            
            if training_df_cleaned == training_df_original:
                print(_llm, _domain, prompt)

ChatGPT xsum paraphrase_polish_human


In [None]:
data_paths = [TRAINING_DIR_CLEANED, TRAINING_DIR_ORIGINAL]
DOMAINS = ["writing_prompt"]
result_list = []
count_runs_done = 0
count_runs_missing = 0

for h, _domain in enumerate(tqdm(DOMAINS)):
    for j, _llm in enumerate(LLMS):
        for k, prompt in enumerate(LLM_PROMPTS):
            for i, _train_path in enumerate(data_paths):
                result = {}
                if _train_path.startswith(TRAINING_DIR_ORIGINAL):
                    training_df = load_dataframe_from_json(f"{TRAINING_DIR_ORIGINAL}{_domain}_2800.json")
                    # print("original")
                else:
                    training_df = pd.read_parquet(f"{TRAINING_DIR_CLEANED}{_domain}_2800_cleaned_all_v2.parquet")
                    # print("cleaned")
                _, prompt_key, human_key = get_info_based_on_input_path(_domain)
                training_df = prepare_df_for_roberta_training(training_df, column_to_be_used_for_text=prompt,
                                                              column_to_be_used_for_human=human_key, column_title=prompt_key)
                df_claude = training_df[training_df["llm_type"]==_llm]
                other_llms = LLMS.copy()
                other_llms.remove(_llm)
                df_llama, df_palm, df_chatgpt = [training_df[training_df["llm_type"]==_llm].dropna(subset=["label", "text"]) for _llm in other_llms]
                # print(df_claude.head())
                
                train_df, test_df = train_test_split(df_claude, test_size=0.2, random_state=SEED, shuffle=True)
                # print(train_df.shape, test_df.shape, len(df_llama), len(df_claude), len(df_chatgpt),
                #  train_df.columns)
                
                save_model_path = f"{RESULT_DIR}{_llm}_{prompt}_test"
                dict_temp = {_llm: test_df, other_llms[0]: df_llama, other_llms[1]: df_palm, other_llms[2]: df_chatgpt}.items()
                for key, df in dict_temp:
                    df_hash = train_roberta.hash_dataframe_as_parquet(df)
                    result_path = f"{save_model_path}/{df_hash}.roberta-base_result.json"
                    if os.path.exists(result_path):
                        with open(result_path) as fp:
                            result = json.load(fp)
                        fp.close()
                        result["training_llm"] = _llm
                        result["test_llm"] = key
                        result["hash_df"] = df_hash
                        result["domain"] = _domain
                        result["cleaned"] = _train_path == TRAINING_DIR_CLEANED
                        result["llm_prompt"] = prompt
                        result_list.append(result)
                        count_runs_done += 1
                    else:
                        count_runs_missing += 1

In [32]:
df_results = pd.DataFrame(result_list)
df_results[(df_results["domain"]=="writing_prompt")&(df_results["llm_prompt"]=="prompt_SICO")].sort_values(["training_llm", "test_llm"])

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,training_llm,test_llm,hash_df,domain,cleaned,llm_prompt
152,0.997443,-0.470435,"[[131, 2], [5, 142]]",0.986111,0.965986,0.975945,0.975,0.911565,ChatGPT,ChatGPT,ecfb24e2e94d4fe4a2960660e63d48d2c0e04282854c33...,writing_prompt,True,prompt_SICO
156,0.997443,-0.470435,"[[131, 2], [5, 142]]",0.986111,0.965986,0.975945,0.975,0.911565,ChatGPT,ChatGPT,ecfb24e2e94d4fe4a2960660e63d48d2c0e04282854c33...,writing_prompt,False,prompt_SICO
153,0.863724,-0.518434,"[[608, 92], [226, 474]]",0.837456,0.677143,0.748815,0.772857,0.268571,ChatGPT,Claude-instant,ccaade05ecc323d21a6f2959c2adb31c505447bcbf9d73...,writing_prompt,True,prompt_SICO
157,0.863724,-0.518434,"[[608, 92], [226, 474]]",0.837456,0.677143,0.748815,0.772857,0.268571,ChatGPT,Claude-instant,ccaade05ecc323d21a6f2959c2adb31c505447bcbf9d73...,writing_prompt,False,prompt_SICO
155,0.995547,-0.457632,"[[687, 13], [38, 662]]",0.980741,0.945714,0.962909,0.963571,0.882857,ChatGPT,Google-PaLM,eda0585faf602f9eae298a5bebf977cc3174724bb3d553...,writing_prompt,True,prompt_SICO
159,0.995547,-0.457632,"[[687, 13], [38, 662]]",0.980741,0.945714,0.962909,0.963571,0.882857,ChatGPT,Google-PaLM,eda0585faf602f9eae298a5bebf977cc3174724bb3d553...,writing_prompt,False,prompt_SICO
154,0.991367,-0.477456,"[[666, 34], [22, 678]]",0.952247,0.968571,0.96034,0.96,0.661429,ChatGPT,Llama-2-70b,2cb2ee720bd167fc27ea4a7dc47f462b2b7245bbaa7360...,writing_prompt,True,prompt_SICO
158,0.991367,-0.477456,"[[666, 34], [22, 678]]",0.952247,0.968571,0.96034,0.96,0.661429,ChatGPT,Llama-2-70b,2cb2ee720bd167fc27ea4a7dc47f462b2b7245bbaa7360...,writing_prompt,False,prompt_SICO
35,0.978006,-0.496136,"[[640, 60], [53, 647]]",0.915134,0.924286,0.919687,0.919286,0.397143,Claude-instant,ChatGPT,2b99800de53e431a8ee0a96b98f1fc091979f49c54e7e8...,writing_prompt,True,prompt_SICO
39,0.978006,-0.496136,"[[640, 60], [53, 647]]",0.915134,0.924286,0.919687,0.919286,0.397143,Claude-instant,ChatGPT,2b99800de53e431a8ee0a96b98f1fc091979f49c54e7e8...,writing_prompt,False,prompt_SICO


In [38]:
print(f"Runs executed: {count_runs_done}, missing: {count_runs_missing}, {count_runs_done/(count_runs_done+count_runs_missing)}")
df_results = pd.DataFrame(result_list)
df_results.head(20)

Runs executed: 342, missing: 298, 0.534375


Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,training_llm,test_llm,hash_df,domain,cleaned,llm_prompt
0,1.0,-0.492397,"[[133, 0], [0, 147]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,Claude-instant,2e20b8e1716c086366ea1da5303ecf95d4c8c9f7a02e78...,arxiv,True,direct_prompt
1,1.0,-0.505136,"[[700, 0], [0, 700]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,Llama-2-70b,85ab1b630691f37a4f385717a62b378df41ca5dc28f522...,arxiv,True,direct_prompt
2,0.99936,-0.515875,"[[700, 0], [4, 692]]",1.0,0.994253,0.997118,0.997135,0.994253,Claude-instant,Google-PaLM,eb6a9e1b095764a39d9d4aec157fb6525efe17a616d96a...,arxiv,True,direct_prompt
3,1.0,-0.488934,"[[700, 0], [0, 700]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,ChatGPT,3faa71ffb2522bb968592762bbc61e227d95177f37a732...,arxiv,True,direct_prompt
4,1.0,-0.482888,"[[133, 0], [0, 147]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,Claude-instant,68557067c9da65055fccb079cd7e40c05f84e92540790c...,arxiv,True,paraphrase_polish_human
5,0.99999,-0.510201,"[[700, 0], [1, 698]]",1.0,0.998569,0.999284,0.999285,0.998569,Claude-instant,Llama-2-70b,0302f68e4ebea6d14dff0b1cbc006e2294ff3a7005c754...,arxiv,True,paraphrase_polish_human
6,0.999919,-0.516627,"[[695, 5], [0, 672]]",0.992614,1.0,0.996294,0.996356,0.977679,Claude-instant,Google-PaLM,1236e9638ce4126041ab0ccaed3ac7322c04b15f244eeb...,arxiv,True,paraphrase_polish_human
7,0.999998,-0.513484,"[[700, 0], [1, 699]]",1.0,0.998571,0.999285,0.999286,0.998571,Claude-instant,ChatGPT,0eecedb266a173d5cfa7a81632e96922cb383827dae617...,arxiv,True,paraphrase_polish_human
8,1.0,-0.483697,"[[133, 0], [0, 147]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,Claude-instant,0558e3b656ab3a6932e772d5861c73ca984c456ec6695c...,arxiv,True,paraphrase_polish_llm
9,0.999992,-0.512974,"[[700, 0], [1, 698]]",1.0,0.998569,0.999284,0.999285,0.998569,Claude-instant,Llama-2-70b,28d0937220d817c4c4c586bd0beb2a3d3a7979676a8f20...,arxiv,True,paraphrase_polish_llm


In [42]:
df_results["domain"].unique()

array(['arxiv', 'writing_prompt', 'xsum'], dtype=object)

In [43]:
df_results[(df_results["domain"]=="xsum")&(df_results["llm_prompt"]=="prompt_few_shot")].sort_values(["training_llm", "test_llm"])

Unnamed: 0,roc_auc,optimal_threshold,conf_matrix,precision,recall,f1,accuracy,tpr_at_fpr_0_01,training_llm,test_llm,hash_df,domain,cleaned,llm_prompt
335,0.998916,-0.663829,"[[695, 5], [6, 694]]",0.992847,0.991429,0.992137,0.992143,0.614286,Claude-instant,ChatGPT,96cdf9ce21e5202d5020711865821a860976c0e175ee6a...,xsum,True,prompt_few_shot
332,1.0,-0.537093,"[[135, 0], [0, 140]]",1.0,1.0,1.0,1.0,1.0,Claude-instant,Claude-instant,b6c55736280df67cbf9e948ca514b508d79cafdf62f0db...,xsum,True,prompt_few_shot
334,0.926449,-0.723472,"[[670, 30], [86, 462]]",0.939024,0.843066,0.888462,0.907051,0.653285,Claude-instant,Google-PaLM,0b042bfd8735b78967638630404a5415dfd2ab9d1422df...,xsum,True,prompt_few_shot
333,0.996637,-0.678135,"[[693, 7], [16, 684]]",0.98987,0.977143,0.983465,0.983571,0.931429,Claude-instant,Llama-2-70b,1c3e99e696c1b014d3cc624638e95fe3789d2a57755499...,xsum,True,prompt_few_shot


In [24]:
grouped = (
    df_results
    .groupby(["training_llm", "domain", "cleaned", "llm_prompt"], as_index=False)
    .mean(numeric_only=True)
)

grouped.sort_values("tpr_at_fpr_0_01")

Unnamed: 0,training_llm,domain,cleaned,llm_prompt,roc_auc,optimal_threshold,precision,recall,f1,accuracy,tpr_at_fpr_0_01
55,Google-PaLM,writing_prompt,False,paraphrase_polish_human,0.970353,-0.516351,0.923675,0.928061,0.925093,0.923929,0.250085
60,Google-PaLM,writing_prompt,True,paraphrase_polish_human,0.970353,-0.516351,0.923675,0.928061,0.925093,0.923929,0.250085
31,Claude-instant,writing_prompt,False,paraphrase_polish_human,0.973160,-0.505484,0.914952,0.938639,0.926604,0.925000,0.290782
36,Claude-instant,writing_prompt,True,paraphrase_polish_human,0.973160,-0.505484,0.914952,0.938639,0.926604,0.925000,0.290782
16,ChatGPT,writing_prompt,True,paraphrase_polish_human,0.980641,-0.496234,0.936980,0.960425,0.948522,0.947321,0.330850
...,...,...,...,...,...,...,...,...,...,...,...
50,Google-PaLM,arxiv,True,paraphrase_polish_human,0.999998,-0.497331,1.000000,0.999642,0.999821,0.999821,0.999642
46,Google-PaLM,arxiv,False,paraphrase_polish_llm,0.999999,-0.493948,1.000000,0.999643,0.999821,0.999821,0.999643
49,Google-PaLM,arxiv,True,direct_prompt,0.999999,-0.499061,1.000000,0.999643,0.999821,0.999821,0.999643
52,Google-PaLM,arxiv,True,prompt_SICO,0.999999,-0.482345,1.000000,0.999643,0.999821,0.999821,0.999643


In [45]:
df_temp = pd.read_parquet(f"{TRAINING_DIR_CLEANED}/writing_prompt_2800_cleaned_all.parquet")
df_temp[df_temp["llm_type"]=="Claude-instant"]

Unnamed: 0,id,story,story_prompt,direct_prompt,llm_type,domain,paraphrase_polish_human,paraphrase_polish_llm,prompt_few_shot,prompt_SICO,...,adversarial_character_llm,adversarial_word_human,adversarial_word_llm,adversarial_character_word_human,adversarial_character_word_llm,paraphrase_back_translation_human,paraphrase_back_translation_llm,paraphrase_dipper_human,paraphrase_dipper_llm,icl_prompt
1400,1401,... For it is so that One is Dark and One is L...,Death and Life find out that they' re expecting.,Death sighed as he looked over his paperwork. ...,Claude-instant,writing_prompt,Here is my attempt at polishing the writing st...,Here is a polished version of the story:Death ...,Here is a 22 sentence story based on the promp...,Here is a 22 sentence story in a more human wr...,...,eHre is a 22 sentence story based on the promp...,... For it is equally that One is Dark and One...,Here is a 22 sentence story based on the promp...,... During it is so that One is Dark and One i...,Here is a 22 sentence story based on the promp...,"... Because it is dark, one is light. A person...",This is a 22 -sentence story based on prompts:...,"One reigns in endless uncertainty, mystery, an...",He had been busy these past few decades and wa...,
1401,1402,"Sara had a very special toy. Of course, of all...",Write a short story meant for children riddled...,,Claude-instant,writing_prompt,Here is my polished version of the story:Sara ...,"Friends, while I strive to be helpful, as an A...","As an AI language model, I am unable to engage...","As an AI language model, I am unable to engage...",...,"As an IA language model, I am unable to engage...","Sara brought a very special toy. Of course, of...","As an AI language model, I am unable to engage...","Sara had a very spcial toy. Des course, of all...","As an AI language model, I am unable to engage...","Sara has a very special toy. Of course, among ...","As a AI language model, I cannot participate i...",It was a beautiful stuffed cat with soft fur t...,"To learn more, visit https://poe.com/usage_gui...",
1402,1403,The world has gone to shit. I used to be able ...,"After the event of some anomaly, 25 % of the p...",John woke up feeling normal as always. When re...,Claude-instant,writing_prompt,Here is my attempt at polishing the story:The ...,Here is a polished version of the 25 sentence ...,Here is a 25 sentence story based on the promp...,Here is a 25 sentence story in a more human co...,...,Here is a 25 sentence story based on the promp...,The world has gone to inferno. I used to be ab...,Here is a 25 condemned story based on the prom...,The worldwide has gone to shit. I used to be a...,Here is a 25 sentence story based on the promp...,The world is worse. I was able to make a decen...,This is a 25 -based storytelling story: John w...,Now? I used to be able to make a decent living...,"Across town, people were discovering they coul...",
1403,1404,I miss you. It' s a feeling I know I could des...,I miss you.,The empty space where you used to be feels vas...,Claude-instant,writing_prompt,Here is my attempt at polishing the writing st...,Here is a polished version of the story:I miss...,Here is a 21 sentence story based on the promp...,Here is a 21 sentence story in human style wri...,...,Here is a 21 sentence story based on the propt...,I signorina you. It' s a feeling I realising I...,Here is a 21 sentence story based on the promp...,l miss you. It' s a feeling I know I could des...,Here is a 21 sentencing story based on the pro...,I miss you. I know I can only describe a feeli...,This is a 21 -based storytelling story: I miss...,These three words are everywhere. They are wri...,I keep hoping to get a text or a call from you...,
1404,1405,"I have a secret to share with you all, did you...","When a child is born, the eldest member of the...","When little Emma was born, her grandfather see...",Claude-instant,writing_prompt,"As an AI language model, I am unable to engage...",Here is a polished version of the 12 sentence ...,Here is a 12 sentence story based on the promp...,Here is a 12 sentence story in a more human wr...,...,Here is a 12 sentence story baded on the profp...,"I have a secret to share with you all, did you...",Here is a 12 sentence story based on the promp...,"I have a secert to share with you all, did you...",Here is a 12 sentence story based on the prоmp...,I have a secret that I share with you. Do you ...,This is a 12 -sentence story based on prompts:...,"I had a friend who ruined my life, and I had n...","No one could understand where he had gone, it ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,2096,Transformers: Age of Extinction. By Galaxy Bet...,Your helicopter dad only lets you go to the mo...,I asked my dad if I could see the new Marvel f...,Claude-instant,writing_prompt,Here is a polished version of the story summar...,Here is a polished 14 sentence story based on ...,Here is a 14 sentence story responding to the ...,Here is a 14 sentence story in a more human st...,...,Here is a 14 sentence story Qased on the promp...,Transformers: Antiquity of Extinction. By Gala...,Here is a 14 sentence story founded on the pro...,Transformers: Age of Vanishing. By Galaxy Bele...,Here is a 14 condemnation story based on the r...,Transformers: Extinction. Galaxy Galaxy Bethle...,This is a 14 -sentence story based on prompts:...,"This movie is about Cade Yeager, a friendly Am...",“Only if you can write a review connecting it ...,
2096,2097,""" Hello, Little One. Your mother and I have be...",A father and a daughter say goodbye to each ot...,"Stella helped her father, Mark, carry his suit...",Claude-instant,writing_prompt,"Here is my attempt at polishing the story:""Hel...",Here is a polished version of the 18 sentence ...,Here is an 18 sentence story based on the prom...,Here is an 18 sentence story in response to th...,...,Here is an 18 sentence story based on the prom...,""" Hello, Teeny One. Your mother and I could be...",Here is an 18 sentence story foundations on th...,""" Howdy, Little One. Your mother and l have be...",Here is an 18 sentence story based on the prom...,"""Hello, little guy. You and your mother have b...",This is an 18 -sentence story based on prompts...,You have my eyes. You have your mother's nose....,"Mark had to go on a business trip, but he didn...",
2097,2098,The air in the park became suffocatingly damp....,"The main character meets the devil, and is sur...",John was walking alone in the woods late at ni...,Claude-instant,writing_prompt,Here is my attempt at polishing the story:The ...,Here is a polished version of the story:It was...,Here is a 21 sentence story based on the promp...,"Here is a 21 sentence story in a proposed ""hum...",...,Here is a 21 sentence story based on the promp...,The airline in the park became suffocatingly d...,Here is a 21 sentence storytelling based on th...,The air in the park beϲame suffocatingly damp....,Here is a 21 sentence story based on the promp...,The air in the park became suffocating. Wet pe...,This is a 21 -based storytelling story: John w...,"It was late at night, and there was little mov...",He stumbled and fell to the ground. When he lo...,
2098,2099,I looked up at the calendar with the red heart...,The first letter of each sentence spells a mes...,"T he young boy walked through the forest, chas...",Claude-instant,writing_prompt,Here is my polished version of the story:I loo...,Here is my attempt at polishing the story whil...,"I apologize, upon reflection I do not feel com...","Here is a 23 sentence story in a creative, des...",...,Here is a 23 sentence story with a hiddn contr...,I looked up at the calendar with the tinto hea...,Here is a 23 sentence historic with a hidden c...,I lоoked up at the calendar with the red heart...,Here is a 23 sentence story with a hidden cont...,"I looked up at the calendar, and the red heart...",This is a 23 -sentence story. The first letter...,"I thought, how lucky I was to have met someone...",He tripped over a tree root and tumbled down a...,


In [46]:
load_dataframe_from_json(f"{TRAINING_DIR_ORIGINAL}/writing_prompt_2800.json", filter_llm=True)

Unnamed: 0,id,story,story_prompt,direct_prompt,llm_type,domain,paraphrase_polish_human,paraphrase_polish_llm,prompt_few_shot,prompt_SICO,...,adversarial_character_llm,adversarial_word_human,adversarial_word_llm,adversarial_character_word_human,adversarial_character_word_llm,paraphrase_back_translation_human,paraphrase_back_translation_llm,paraphrase_dipper_human,paraphrase_dipper_llm,icl_prompt
1400,1401,... For it is so that One is Dark and One is L...,Death and Life find out that they' re expecting.,Here is a 22 sentence story based on the promp...,Claude-instant,writing_prompt,Here is my attempt at polishing the writing st...,Here is a polished version of the story:Death ...,Here is a 22 sentence story based on the promp...,Here is a 22 sentence story in a more human wr...,...,eHre is a 22 sentence story based on the promp...,... For it is equally that One is Dark and One...,Here is a 22 sentence story based on the promp...,... During it is so that One is Dark and One i...,Here is a 22 sentence story based on the promp...,"... Because it is dark, one is light. A person...",This is a 22 -sentence story based on prompts:...,"One reigns in endless uncertainty, mystery, an...",He had been busy these past few decades and wa...,
1401,1402,"Sara had a very special toy. Of course, of all...",Write a short story meant for children riddled...,"As an AI language model, I am unable to engage...",Claude-instant,writing_prompt,Here is my polished version of the story:Sara ...,"Friends, while I strive to be helpful, as an A...","As an AI language model, I am unable to engage...","As an AI language model, I am unable to engage...",...,"As an IA language model, I am unable to engage...","Sara brought a very special toy. Of course, of...","As an AI language model, I am unable to engage...","Sara had a very spcial toy. Des course, of all...","As an AI language model, I am unable to engage...","Sara has a very special toy. Of course, among ...","As a AI language model, I cannot participate i...",It was a beautiful stuffed cat with soft fur t...,"To learn more, visit https://poe.com/usage_gui...",
1402,1403,The world has gone to shit. I used to be able ...,"After the event of some anomaly, 25 % of the p...",Here is a 25 sentence story based on the promp...,Claude-instant,writing_prompt,Here is my attempt at polishing the story:The ...,Here is a polished version of the 25 sentence ...,Here is a 25 sentence story based on the promp...,Here is a 25 sentence story in a more human co...,...,Here is a 25 sentence story based on the promp...,The world has gone to inferno. I used to be ab...,Here is a 25 condemned story based on the prom...,The worldwide has gone to shit. I used to be a...,Here is a 25 sentence story based on the promp...,The world is worse. I was able to make a decen...,This is a 25 -based storytelling story: John w...,Now? I used to be able to make a decent living...,"Across town, people were discovering they coul...",
1403,1404,I miss you. It' s a feeling I know I could des...,I miss you.,Here is a 21 sentence story based on the promp...,Claude-instant,writing_prompt,Here is my attempt at polishing the writing st...,Here is a polished version of the story:I miss...,Here is a 21 sentence story based on the promp...,Here is a 21 sentence story in human style wri...,...,Here is a 21 sentence story based on the propt...,I signorina you. It' s a feeling I realising I...,Here is a 21 sentence story based on the promp...,l miss you. It' s a feeling I know I could des...,Here is a 21 sentencing story based on the pro...,I miss you. I know I can only describe a feeli...,This is a 21 -based storytelling story: I miss...,These three words are everywhere. They are wri...,I keep hoping to get a text or a call from you...,
1404,1405,"I have a secret to share with you all, did you...","When a child is born, the eldest member of the...",Here is a 12 sentence story based on the promp...,Claude-instant,writing_prompt,"As an AI language model, I am unable to engage...",Here is a polished version of the 12 sentence ...,Here is a 12 sentence story based on the promp...,Here is a 12 sentence story in a more human wr...,...,Here is a 12 sentence story baded on the profp...,"I have a secret to share with you all, did you...",Here is a 12 sentence story based on the promp...,"I have a secert to share with you all, did you...",Here is a 12 sentence story based on the prоmp...,I have a secret that I share with you. Do you ...,This is a 12 -sentence story based on prompts:...,"I had a friend who ruined my life, and I had n...","No one could understand where he had gone, it ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,2096,Transformers: Age of Extinction. By Galaxy Bet...,Your helicopter dad only lets you go to the mo...,Here is a 14 sentence story based on the promp...,Claude-instant,writing_prompt,Here is a polished version of the story summar...,Here is a polished 14 sentence story based on ...,Here is a 14 sentence story responding to the ...,Here is a 14 sentence story in a more human st...,...,Here is a 14 sentence story Qased on the promp...,Transformers: Antiquity of Extinction. By Gala...,Here is a 14 sentence story founded on the pro...,Transformers: Age of Vanishing. By Galaxy Bele...,Here is a 14 condemnation story based on the r...,Transformers: Extinction. Galaxy Galaxy Bethle...,This is a 14 -sentence story based on prompts:...,"This movie is about Cade Yeager, a friendly Am...",“Only if you can write a review connecting it ...,
2096,2097,""" Hello, Little One. Your mother and I have be...",A father and a daughter say goodbye to each ot...,Here is an 18 sentence story based on the prom...,Claude-instant,writing_prompt,"Here is my attempt at polishing the story:""Hel...",Here is a polished version of the 18 sentence ...,Here is an 18 sentence story based on the prom...,Here is an 18 sentence story in response to th...,...,Here is an 18 sentence story based on the prom...,""" Hello, Teeny One. Your mother and I could be...",Here is an 18 sentence story foundations on th...,""" Howdy, Little One. Your mother and l have be...",Here is an 18 sentence story based on the prom...,"""Hello, little guy. You and your mother have b...",This is an 18 -sentence story based on prompts...,You have my eyes. You have your mother's nose....,"Mark had to go on a business trip, but he didn...",
2097,2098,The air in the park became suffocatingly damp....,"The main character meets the devil, and is sur...",Here is a 21 sentence story based on the promp...,Claude-instant,writing_prompt,Here is my attempt at polishing the story:The ...,Here is a polished version of the story:It was...,Here is a 21 sentence story based on the promp...,"Here is a 21 sentence story in a proposed ""hum...",...,Here is a 21 sentence story based on the promp...,The airline in the park became suffocatingly d...,Here is a 21 sentence storytelling based on th...,The air in the park beϲame suffocatingly damp....,Here is a 21 sentence story based on the promp...,The air in the park became suffocating. Wet pe...,This is a 21 -based storytelling story: John w...,"It was late at night, and there was little mov...",He stumbled and fell to the ground. When he lo...,
2098,2099,I looked up at the calendar with the red heart...,The first letter of each sentence spells a mes...,Here is a 23 sentence story with a hidden cont...,Claude-instant,writing_prompt,Here is my polished version of the story:I loo...,Here is my attempt at polishing the story whil...,"I apologize, upon reflection I do not feel com...","Here is a 23 sentence story in a creative, des...",...,Here is a 23 sentence story with a hiddn contr...,I looked up at the calendar with the tinto hea...,Here is a 23 sentence historic with a hidden c...,I lоoked up at the calendar with the red heart...,Here is a 23 sentence story with a hidden cont...,"I looked up at the calendar, and the red heart...",This is a 23 -sentence story. The first letter...,"I thought, how lucky I was to have met someone...",He tripped over a tree root and tumbled down a...,
