In [None]:
import os
from datasets import load_dataset

!pip install -U datasets
!pip install -U sentence-transformers

ds = load_dataset("Hello-SimpleAI/HC3", "all")

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

from transformers import XLMRobertaModel, XLMRobertaTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizer
from transformers import DebertaV2Model, DebertaV2Tokenizer

import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transforme

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

HC3.py:   0%|          | 0.00/9.47k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/39.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/24322 [00:00<?, ? examples/s]

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from sentence_transformers import SentenceTransformer
import tqdm
class pretrained_model(nn.Module):
    def __init__(self, model_name='xlm-roberta-base', use_sentence_model = False, output_hidden_states=False):
        super(pretrained_model, self).__init__()
        self.use_sentence_model = use_sentence_model
        if self.use_sentence_model:
          self.sentence_model = SentenceTransformer("mics-nlp/xlm-roberta-small-all-nli-triplet")
        else:
          if model_name == 'xlm-roberta-base':
            self.tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
            self.model = XLMRobertaModel.from_pretrained(model_name, output_hidden_states=output_hidden_states)
           # self.sentence_model = SentenceTransformer("mics-nlp/xlm-roberta-small-all-nli-triplet")
          elif model_name == 'roberta-base':
            self.tokenizer = RobertaTokenizer.from_pretrained(model_name)
            self.model = RobertaModel.from_pretrained(model_name, output_hidden_states=output_hidden_states)
          elif model_name == 'microsoft/deberta-v3-base':
            self.tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
            self.model = DebertaV2Model.from_pretrained(model_name, output_hidden_states=output_hidden_states)

    def forward(self, input_texts, return_attention_mask=True):
      if self.use_sentence_model:
        return self.sentence_model.encode(input_texts, convert_to_tensor=True)
      else:
        encoded_input = self.tokenizer(input_texts, padding=True, truncation=True, return_tensors='pt', max_length=256)
        encoded_input = encoded_input.to(device)
        model_output = self.model(**encoded_input)
        return model_output.last_hidden_state.mean(dim=1)

class MLP(nn.Module):
  def __init__(self):
    super(MLP, self).__init__()
    self.fc1 = nn.Linear(768, 256)
    self.fc2 = nn.Linear(256, 32)
    self.fc3 = nn.Linear(32, 1)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

class custom_model(nn.Module):
  def __init__(self):
    super(custom_model, self).__init__()
    self.pretrained_version = pretrained_model('microsoft/deberta-v3-base')
    self.mlp = MLP()

  def forward(self, x):
    #with torch.no_grad():
    x = self.pretrained_version(x)
    x = self.mlp(x)
    return x

In [None]:
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()

def training_loop(model, learning_rate, num_epochs, device, tr_dataloader, test_dataloader, freeze_unfreeze=False, pos_weight=None):

    if pos_weight is None:
      criterion = F.binary_cross_entropy_with_logits
    else:
      criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    model.to(device)
    model.train()
    if freeze_unfreeze:
      for param in model.pretrained_version.model.parameters():
        param.requires_grad = False
    else:
      for param in model.pretrained_version.model.parameters():
        param.requires_grad = False
      for param in model.pretrained_version.model.encoder.layer[-4:].parameters():
        param.requires_grad = True
    for epoch in range(num_epochs):
        total_loss = 0
        counter = 0
        for batch_inputs, batch_labels in tqdm.tqdm(tr_dataloader):
            optimizer.zero_grad()
            with autocast():
              outputs = model(list(batch_inputs))
              outputs = outputs.squeeze(1)
              loss = criterion(outputs, batch_labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()
            del outputs, loss
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
            gc.collect()
        avg_loss = total_loss / len(train_data_loader) # Corrected average loss calculation
        #decrease learning rate by 10 in the optimizer
        if epoch % 2 == 0:
          for param_group in optimizer.param_groups:
            param_group['lr'] *= 0.1
        with torch.no_grad():
            model.eval()
            total_val_loss = 0
            for val_batch_inputs, val_batch_labels in test_dataloader:
                val_outputs = model(list(val_batch_inputs))
                val_outputs = val_outputs.squeeze(1)
                val_loss = criterion(val_outputs, val_batch_labels)
                total_val_loss += val_loss.item()
            avg_val_loss = total_val_loss / len(test_dataloader)
            model.train()
        if freeze_unfreeze:
          for param in model.pretrained_version.model.encoder.layer[-4:].parameters():
            param.requires_grad = True
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    print("Training complete.")

  scaler = GradScaler()


In [None]:
class HC3Dataset(torch.utils.data.Dataset):
  def __init__(self, ds, device='cpu'):
        # Flatten all human and AI answers
        human_texts = sum(ds[:]['human_answers'], [])
        ai_texts = sum(ds[:]['chatgpt_answers'], [])

        # Combine texts and labels
        self.texts = human_texts + ai_texts
        self.labels = torch.tensor(
            [0] * len(human_texts) + [1] * len(ai_texts),
            device=device
        )
        self.labels = self.labels.to(torch.float32)
  def __len__(self):
      return len(self.labels)

  def __getitem__(self, idx):
      return self.texts[idx], self.labels[idx]

In [None]:
from torch.utils.data import Dataset, random_split, DataLoader
train_size = int(0.95 * len(ds["train"]))  # 80% for training
test_size = len(ds['train']) - train_size  # Remaining 20% for testing
train_dataset, test_dataset = random_split(ds['train'], [train_size, test_size])

train_dataset = HC3Dataset(train_dataset, device)
test_dataset = HC3Dataset(test_dataset, device)

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)

labels = torch.cat([train_dataset[i][1].unsqueeze(0) for i in range(len(train_dataset))])
neg = (labels == 0).sum().item()
pos = (labels == 1).sum().item()
pos_weight = torch.tensor([neg / pos]).to(device)
#model = custom_model()

In [None]:
#Load model from 'model7epochs.pth'
model = custom_model()
model.load_state_dict(torch.load('model_deberta_610_10pm.pth'))

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

In [None]:
import torch
import gc
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
gc.collect()

training_loop(model, 0.0001, 3, device, train_data_loader, test_data_loader, freeze_unfreeze=False, pos_weight = pos_weight)


  with autocast():

  0%|          | 1/635 [00:05<53:57,  5.11s/it][A
  0%|          | 2/635 [00:08<42:18,  4.01s/it][A
  0%|          | 3/635 [00:10<32:59,  3.13s/it][A
  1%|          | 4/635 [00:12<28:56,  2.75s/it][A
  1%|          | 5/635 [00:14<26:09,  2.49s/it][A
  1%|          | 6/635 [00:16<23:30,  2.24s/it][A
  1%|          | 7/635 [00:18<22:06,  2.11s/it][A
  1%|▏         | 8/635 [00:20<21:32,  2.06s/it][A
  1%|▏         | 9/635 [00:21<20:37,  1.98s/it][A
  2%|▏         | 10/635 [00:23<19:58,  1.92s/it][A
  2%|▏         | 11/635 [00:25<19:31,  1.88s/it][A
  2%|▏         | 12/635 [00:27<19:10,  1.85s/it][A
  2%|▏         | 13/635 [00:29<18:57,  1.83s/it][A
  2%|▏         | 14/635 [00:30<19:00,  1.84s/it][A
  2%|▏         | 15/635 [00:32<19:01,  1.84s/it][A
  3%|▎         | 16/635 [00:34<18:54,  1.83s/it][A
  3%|▎         | 17/635 [00:36<18:46,  1.82s/it][A
  3%|▎         | 18/635 [00:38<18:40,  1.82s/it][A
  3%|▎         | 19/635 [00:40<18:36,  1.81s/it][A


Epoch 1/3, Loss: 0.0450, Val Loss: 0.0790


100%|██████████| 635/635 [20:13<00:00,  1.91s/it]


Epoch 2/3, Loss: 0.0050, Val Loss: 0.0151


 73%|███████▎  | 464/635 [14:48<05:27,  1.91s/it]


KeyboardInterrupt: 

In [None]:
def evaluation_loop(model, device, dataloader):
  #return accuracy of the model
  with torch.no_grad():
      model.to(device)
      allPreds = torch.Tensor([]).to(device)
      allAnswers = torch.Tensor([]).to(device)
      model.eval()
      for batch_inputs, batch_labels in iter(dataloader):
          batch_inputs = (batch_inputs)
          outputs = model(batch_inputs)
          predicted_classes =  torch.round(torch.sigmoid(outputs))
          allPreds = torch.cat((allPreds, predicted_classes))
          allAnswers = torch.cat((allAnswers, batch_labels))
      allPreds = allPreds.squeeze()
      allAnswers = allAnswers.squeeze()
      accuracy = (allPreds == allAnswers).sum() / len(allPreds)
  return allPreds, allAnswers

In [None]:
import json
from pathlib import Path
from torch.utils.data import Dataset
class HumanAIDataset(Dataset):
    def __init__(self, files, transform=None):
        if isinstance(files, (str, Path)):
            files = [files]

        self.texts, self.labels = [], []

        if "hewlett.json" in files:
          with open("hewlett.json", 'r') as f:
            data = json.load(f)
          documents = [entry["document"] for entry in data if "document" in entry]
          print(documents)
          self.texts.extend(documents)
          self.labels.extend([0] * len(documents))
          files.pop(files.index("hewlett.json"))

        if "toefl.json" in files:
          with open("toefl.json", 'r') as f:
            data = json.load(f)
          documents = [entry["document"] for entry in data if "document" in entry]
          self.texts.extend(documents)
          self.labels.extend([0] * len(documents))
          files.pop(files.index("toefl.json"))


        for fp in map(Path, files):
            with fp.open(encoding="utf-8") as f:
                for line in f:
                    if not line.strip():
                        continue
                    record = json.loads(line)
                    if "human_text" in record and record["human_text"]:
                        self.texts.append(record["human_text"])
                        self.labels.append(0)
                    if "machine_text" in record and record["machine_text"]:
                        self.texts.append(record["machine_text"])
                        self.labels.append(1)
                    if "document" in record and record["document"]:
                        self.texts.append(record["document"])
                        self.labels.append(0)

        self.labels = torch.tensor(self.labels).to(device)
        self.transform = transform

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text, label = self.texts[idx], self.labels[idx]
        if self.transform:
            text = self.transform(text)
        return text, label

files = ['arxiv_chatGPT.jsonl', 'arxiv_cohere.jsonl' , 'reddit_chatGPT.jsonl', 'reddit_cohere.jsonl']
dataset = HumanAIDataset( files )
dev_dataset = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True)
del dataset
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
gc.collect()

allPreds, allAnswers = evaluation_loop(model, device, dev_dataset)
(allPreds == allAnswers).sum() / len(allPreds)
def calculate_precision_recall(predictions, labels):
  true_positives = ((predictions == 1) & (labels == 1)).sum().float()
  false_positives = ((predictions == 1) & (labels == 0)).sum().float()
  false_negatives = ((predictions == 0) & (labels == 1)).sum().float()

  precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
  recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

  return precision, recall

print(f"Accuracy: {((allPreds == allAnswers).sum() / len(allPreds)).item():.4f}")

precision, recall = calculate_precision_recall(allPreds, allAnswers)
print(f"Precision: {precision.item():.4f}")
print(f"Recall: {recall.item():.4f}")

#And now the F1-Score
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
print(f"F1-Score: {f1_score.item():.4f}")

Accuracy: 0.9428
Precision: 0.9713
Recall: 0.9023
F1-Score: 0.9355


In [None]:
#save the model
torch.save(model.state_dict(), 'model_deberta_610_10pm.pth')