In [6]:
# Install necessary libraries
!pip install datasets
!pip install torch torchvision torchaudio --quiet
!pip install transformers --quiet
!pip install scikit-learn pandas numpy --quiet



In [7]:
# Import libraries
import torch
import torch.nn as nn
from transformers import DebertaTokenizer, DebertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import pandas as pd
import numpy as np


In [8]:
# Load and preprocess the data
from datasets import load_dataset

dataset = load_dataset("Hello-SimpleAI/HC3", name='all')
dataset = dataset['train'].to_pandas()

# Limit to the first 500 prompts and preprocess columns
reduced_df = dataset.head(500).drop(columns=['source'])

human_df = reduced_df[['human_answers']].rename(columns={'human_answers': 'text'})
human_df['label'] = 0

llm_df = reduced_df[['chatgpt_answers']].rename(columns={'chatgpt_answers': 'text'})
llm_df['label'] = 1

# Combine human and GPT answers into a single dataframe
full_df = pd.concat([human_df, llm_df], axis=0, ignore_index=True)
full_df['text'] = full_df['text'].apply(lambda x: ''.join(x))  # Flatten text

# Shuffle the data
df = full_df.sample(frac=1).reset_index(drop=True)

print(f"Processed dataset size: {len(df)}")
print(df.head())


Processed dataset size: 1000
                                                text  label
0  EDIT , Nov 21 : By popular demand , now includ...      0
1  Not really ELI5 - able because it 's a complic...      0
2  The recommended daily allowance (RDA) of Vitam...      1
3  The quality of the glass is different , but mo...      0
4  Microtransactions are a way for companies to m...      1


In [9]:
# Split the data into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'].values,
    df['label'].values,
    test_size=0.2, random_state=42
)

print(f"Number of training samples: {len(train_texts)}")
print(f"Number of validation samples: {len(val_texts)}")


Number of training samples: 800
Number of validation samples: 200


# Define Custom Dataset Class

In [10]:
# Define custom dataset class
class EssayDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(label, dtype=torch.long)
        }


In [11]:
# Tokenizer setup
tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

train_dataset = EssayDataset(train_texts, train_labels, tokenizer)
val_dataset = EssayDataset(val_texts, val_labels, tokenizer)


In [12]:
# Load pre-trained model
model = DebertaForSequenceClassification.from_pretrained('microsoft/deberta-base', num_labels=2)


pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)




In [14]:
# Define a custom compute_metrics function for Hugging Face Trainer
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    accuracy = np.sum(predictions == labels) / len(labels)
    return {"accuracy": accuracy}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


  trainer = Trainer(


In [15]:
# Train the model
trainer.train()
# Evaluate the model on validation set
metrics = trainer.evaluate()
print("\nEvaluation Results:")
print(metrics)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0003,0.000158,1.0
2,0.0001,7.5e-05,1.0


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0003,0.000158,1.0
2,0.0001,7.5e-05,1.0
3,0.0001,6.2e-05,1.0



Evaluation Results:
{'eval_loss': 6.163702346384525e-05, 'eval_accuracy': 1.0, 'eval_runtime': 240.868, 'eval_samples_per_second': 0.83, 'eval_steps_per_second': 0.104, 'epoch': 3.0}
