<a href="https://colab.research.google.com/github/mjdileep/Model-Explorer/blob/main/LLM_Detect_Hierarchical_Attention_%5BTrainable_Embeddings%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import os
import gc
import math
import random
import glob, os
from sklearn import set_config
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

set_config(transform_output = 'pandas')
pd.options.mode.chained_assignment = None
from transformers import GPT2Tokenizer


tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
seq_length = 128
short_seq_length = 16

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [None]:
train_essays = pd.read_csv('/content/drive/MyDrive/AIML/llm_detect/train_v2_drcat_02.csv')
train_essays = train_essays[train_essays.text.str.len().between(700, 200000)]

train_essays.rename(columns={"label": "generated"}, inplace=True)
train_essays = train_essays.drop_duplicates(subset=['text'])
train_essays = train_essays[train_essays['generated'].notnull()]
train_essays.shape

(44690, 5)

In [None]:
ext_train_essays = pd.read_csv('/kaggle/input/daigt-external-train-dataset/train_external_drcat_02.csv')
ext_train_essays.rename(columns={"label": "generated"}, inplace=True)
ext_train_essays = ext_train_essays.drop_duplicates(subset=['text'])
ext_train_essays = ext_train_essays[ext_train_essays['generated'].notnull()]
ext_train_essays.shape

In [None]:
train_essays["tokens"] = train_essays.text.apply(lambda x: torch.tensor(tokenizer(x)["input_ids"]))
ext_train_essays["tokens"] = ext_train_essays.text.apply(lambda x: torch.tensor(tokenizer(x)["input_ids"]))

Token indices sequence length is longer than the specified maximum sequence length for this model (1441 > 1024). Running this sequence through the model will result in indexing errors


In [None]:
#train_essays.to_pickle("/content/drive/MyDrive/AIML/llm_detect/train_essays_tiktoken.pt")
train_essays = pd.read_pickle("/content/drive/MyDrive/AIML/llm_detect/train_essays_tiktoken.pt")
#ext_train_essays.to_pickle("/content/drive/MyDrive/AIML/llm_detect/ext_train_essays_tiktoken.pt")
ext_train_essays = pd.read_pickle("/content/drive/MyDrive/AIML/llm_detect/ext_train_essays_tiktoken.pt")

In [None]:
def pad(tokens):
    index = tokens.shape[0]
    if index<seq_length:
        tokens = torch.cat([tokens, torch.full((seq_length-tokens.shape[0],), 50256)])
    return tokens

train_essays["tokens"] = train_essays.tokens.apply(lambda x: pad(x))
ext_train_essays["tokens"] = ext_train_essays.tokens.apply(lambda x: pad(x))

In [None]:
train_essays.prompt_name.unique()

array(['Phones and driving', 'Car-free cities', 'Summer projects',
       '"A Cowboy Who Rode the Waves"',
       'Mandatory extracurricular activities', 'Exploring Venus',
       'Facial action coding system', 'The Face on Mars',
       'Community service', 'Grades for extracurricular activities',
       'Driverless cars', 'Does the electoral college work?',
       'Cell phones at school', 'Distance learning',
       'Seeking multiple opinions'], dtype=object)

In [None]:
columns = ['Phones and driving', 'Car-free cities', 'Summer projects',
       'A Cowboy Who Rode the Waves',
       'Mandatory extracurricular activities', 'Exploring Venus',
       'Facial action coding system', 'The Face on Mars', 'Community service']
train_set  = train_essays[~train_essays.prompt_name.isin(columns)]
test_set  = train_essays[train_essays.prompt_name.isin(columns)]
c1_shape = test_set[test_set["generated"]==0].shape[0]
c2_shape = test_set[test_set["generated"]==1].shape[0]
N = min(c1_shape, c2_shape)
test_set = pd.concat([test_set[test_set["generated"]==0].iloc[:N,:], test_set[test_set["generated"]==1].iloc[:N,:]])
if c1_shape>c2_shape:
    train_set = pd.concat([train_set, test_set[test_set["generated"]==0].iloc[N:,:]])
else:
    train_set = pd.concat([train_set, test_set[test_set["generated"]==1].iloc[N:,:]])

test_set = pd.concat([test_set, ext_train_essays])


In [None]:
print("train_set 0:", train_set[train_set["generated"]==0].shape)
print("train_set 1:", train_set[train_set["generated"]==1].shape)

print("test_set1 0:", test_set[test_set["generated"]==0].shape)
print("test_set1 1:", test_set[test_set["generated"]==1].shape)

train_set 0: (12963, 6)
train_set 1: (10582, 6)
test_set1 0: (9553, 9)
test_set1 1: (11348, 9)


In [None]:
min_dim = 128
n_dim = 512
#device="cpu"
device="cuda"
n_layers = 8

class ResModule(nn.Module):
    def __init__(self, n_dim, dropout=0.2, f=1.0):
        super().__init__()
        self.f=f
        self.l1 = nn.Linear(n_dim, int(n_dim/2))
        self.act = nn.LeakyReLU(1/5.5)
        self.d = nn.Dropout(dropout)
        self.l2 = nn.Linear(int(n_dim/2), n_dim)

    def forward(self, x):
        return x + self.d(self.act(self.l1(x)))


class ResStack(nn.Module):
    def __init__(self, stack_size, n_dim, dropout=0.5):
        super().__init__()
        self.stack = nn.ModuleList([ResModule(n_dim, dropout, 1.0/stack_size) for i in range(stack_size)])

    def forward(self, x):
        out = x
        for each in self.stack:
            out = each(out)
        return out


class FCBlock(nn.Module):
    def __init__(self, dim_in, dim_out, dropout, act="RELU"):
        super().__init__()
        self.fc = nn.Linear(dim_in, dim_out)
        self.act = nn.LeakyReLU(1/5.5)
        self.d = nn.Dropout(dropout)
    def forward(self, x):
        return  self.d(self.act(self.fc(x)))



class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.embeddings = nn.Embedding(50257, min_dim)
        self.transformer_encoder = nn.TransformerEncoder(
              nn.TransformerEncoderLayer(
                  activation=nn.LeakyReLU(1/5.5),
                  d_model=min_dim,
                  dim_feedforward=n_dim,
                  nhead=4,
                  dropout=0.0,
                  batch_first=True),
              num_layers=n_layers
            )
        self.positional_encoding = self.create_positional_encoding(min_dim, int(seq_length/short_seq_length)).to(device)
        self.mask = nn.Transformer.generate_square_subsequent_mask(int(seq_length/short_seq_length)).to(device)
        self.lstm_encoder = nn.LSTM(
            min_dim,
            n_dim,
            num_layers=n_layers,
            bias=True,
            batch_first=True,
            dropout=0.0,
            proj_size=min_dim)
        self.bn = nn.BatchNorm1d(min_dim)
        self.fc1 = FCBlock(min_dim, min_dim, 0.1)
        self.fc2 = FCBlock(min_dim, min_dim, 0.1)
        self.out = nn.Linear(min_dim, 1)


    def create_positional_encoding(self, hidden_size, max_sequence_length):
        positional_encoding = torch.zeros(max_sequence_length, hidden_size)
        position = torch.arange(0, max_sequence_length).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, hidden_size, 2).float() * -(torch.log(torch.tensor(10000.0)) / hidden_size))
        positional_encoding[:, 0::2] = torch.sin(position * div_term)
        positional_encoding[:, 1::2] = torch.cos(position * div_term)
        positional_encoding = positional_encoding.unsqueeze(0)
        return positional_encoding

    def forward(self, x):
        N, S = x.shape
        x = self.embeddings(x)
        lstm_outs = []
        for i in range(short_seq_length, seq_length+1, short_seq_length):
            lstm_out, (hn, cn) = self.lstm_encoder(x[:,i-short_seq_length:i,:])
            lstm_outs.append(lstm_out[:,-1:,:])
        lstm_out = torch.cat(lstm_outs, 1)
        attn_out = self.transformer_encoder(lstm_out+self.positional_encoding.detach(), mask=self.mask.detach(), is_causal=True)
        out = attn_out[torch.arange(N),-1,:]
        out = self.fc2(self.fc1(self.bn(out)))
        out = self.out(out)

        return nn.functional.sigmoid(out).view(-1)


model = Model().to(device)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data["tokens"]
        self.targets = data["generated"]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        l = self.data.iloc[index].shape[0]
        r = np.random.randint(0,l-seq_length+1)
        x = self.data.iloc[index][r:r+seq_length]
        y = self.targets.iloc[index]
        return x, y

class CustomDataLoader(DataLoader):
    def __init__(self, *args, **kwargs):
        super(CustomDataLoader, self).__init__(*args, **kwargs)

    def __iter__(self):
        data_iter = super(CustomDataLoader, self).__iter__()
        return data_iter
custom_dataset = CustomDataset(train_set)
custom_dataset_test = CustomDataset(test_set)

In [None]:
from warnings import simplefilter
import time
from copy import copy
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.utils.rnn import PackedSequence
import numpy as np
import _pickle
import gc
from sklearn.metrics import roc_auc_score
import argparse
import os
from tempfile import gettempdir


# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)

task=None
logger=None


def epoch_update_gamma(y_true, y_pred, epoch=-1, delta=2):
        """
        Calculate gamma from last epoch's targets and predictions.
        Gamma is updated at the end of each epoch.
        y_true: `Tensor`. Targets (labels).  Float either 0.0 or 1.0 .
        y_pred: `Tensor` . Predictions.
        """
        sub_sample_size = 2000.0
        pos = y_pred[y_true==1]
        neg = y_pred[y_true==0] # yo pytorch, no boolean tensors or operators?  Wassap?
        # subsample the training set for performance
        cap_pos = pos.shape[0]
        cap_neg = neg.shape[0]
        pos = pos[torch.rand_like(pos) < sub_sample_size/cap_pos]
        neg = neg[torch.rand_like(neg) < sub_sample_size/cap_neg]
        ln_pos = pos.shape[0]
        ln_neg = neg.shape[0]
        pos_expand = pos.view(-1,1).expand(-1,ln_neg).reshape(-1)
        neg_expand = neg.repeat(ln_pos)
        diff = neg_expand - pos_expand
        Lp = diff[diff>0] # because we're taking positive diffs, we got pos and neg flipped.
        ln_Lp = Lp.shape[0]-1
        diff_neg = -1.0 * diff[diff<0]
        diff_neg = diff_neg.sort()[0]
        ln_neg = diff_neg.shape[0]-1
        ln_neg = max([ln_neg, 0])
        left_wing = int(ln_Lp*delta)
        left_wing = max([0,left_wing])
        left_wing = min([ln_neg,left_wing])
        default_gamma = torch.tensor(0.2, dtype=torch.float).to(device)
        if diff_neg.shape[0] > 0 :
            gamma = diff_neg[left_wing]
        else:
            gamma = default_gamma # default=torch.tensor(0.2, dtype=torch.float).cuda() #zoink
        L1 = diff[diff>-1.0*gamma]
        if epoch > -1 :
            return gamma
        else :
            return default_gamma


def roc_star_loss(_y_true, y_pred, gamma, _epoch_true, epoch_pred):
        """
        Nearly direct loss function for AUC.
        See article,
        C. Reiss, "Roc-star : An objective function for ROC-AUC that actually works."
        https://github.com/iridiumblue/articles/blob/master/roc_star.md
            _y_true: `Tensor`. Targets (labels).  Float either 0.0 or 1.0 .
            y_pred: `Tensor` . Predictions.
            gamma  : `Float` Gamma, as derived from last epoch.
            _epoch_true: `Tensor`.  Targets (labels) from last epoch.
            epoch_pred : `Tensor`.  Predicions from last epoch.
        """
        #convert labels to boolean
        y_true = (_y_true>=0.50)
        epoch_true = (_epoch_true>=0.50)

        # if batch is either all true or false return small random stub value.
        if torch.sum(y_true)==0 or torch.sum(y_true) == y_true.shape[0]: return torch.sum(y_pred)*1e-8

        pos = y_pred[y_true]
        neg = y_pred[~y_true]

        epoch_pos = epoch_pred[epoch_true]
        epoch_neg = epoch_pred[~epoch_true]

        # Take random subsamples of the training set, both positive and negative.
        max_pos = 1000 # Max number of positive training samples
        max_neg = 1000 # Max number of positive training samples
        cap_pos = epoch_pos.shape[0]
        epoch_pos = epoch_pos[torch.rand_like(epoch_pos) < max_pos/cap_pos]
        epoch_neg = epoch_neg[torch.rand_like(epoch_neg) < max_neg/cap_pos]

        ln_pos = pos.shape[0]
        ln_neg = neg.shape[0]

        # sum positive batch elements agaionst (subsampled) negative elements
        if ln_pos>0 :
            pos_expand = pos.view(-1,1).expand(-1,epoch_neg.shape[0]).reshape(-1)
            neg_expand = epoch_neg.repeat(ln_pos)

            diff2 = neg_expand - pos_expand + gamma
            l2 = diff2[diff2>0]
            m2 = l2 * l2
        else:
            m2 = torch.tensor([0], dtype=torch.float).to(device)

        # Similarly, compare negative batch elements against (subsampled) positive elements
        if ln_neg>0 :
            pos_expand = epoch_pos.view(-1,1).expand(-1, ln_neg).reshape(-1)
            neg_expand = neg.repeat(epoch_pos.shape[0])

            diff3 = neg_expand - pos_expand + gamma
            l3 = diff3[diff3>0]
            m3 = l3*l3
        else:
            m3 = torch.tensor([0], dtype=torch.float).to(device)

        if (torch.sum(m2)+torch.sum(m3))!=0 :
            res2 = torch.sum(m2)/max_pos+torch.sum(m3)/max_neg
        else:
            res2 = torch.sum(m2)+torch.sum(m3)

        res2 = torch.where(torch.isnan(res2), torch.zeros_like(res2), res2)

        return res2



In [None]:
checkpoint = torch.load("/content/drive/MyDrive/AIML/llm_detect/models/model_last_h.pt", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:
num_epochs = 1000
learning_rate = 1e-5
batch_size = 64


optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# Define the number of folds for cross-validation
num_folds = 50
fold_limit= 0

# Create StratifiedKFold object for stratified k-fold cross-validation
kf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
test_loader = DataLoader(dataset=custom_dataset_test, batch_size=256)

# Training loop for cross-validation
for fold, (train_indices, val_indices) in enumerate(kf.split(train_set, train_set["generated"])):
    if fold <  fold_limit:
        continue

    print(f"Fold {fold + 1}/{num_folds}")

    # Create data loaders for training and validation using your custom DataLoader
    train_loader = DataLoader(dataset=custom_dataset, batch_size=batch_size, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
    valid_loader = DataLoader(dataset=custom_dataset, batch_size=batch_size, sampler=torch.utils.data.SubsetRandomSampler(val_indices))

    # Train your model
    for epoch in range(num_epochs):
        model.train()
        train_loader_iter = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}', leave=True)
        whole_y_pred=np.array([])
        whole_y_t=np.array([])
        i=0
        for x_batch, y_batch in train_loader_iter:
            optimizer.zero_grad()
            y_pred = model(x_batch.to(device))

            if epoch>0 or fold >0:
                loss = roc_star_loss(y_batch.to(device), y_pred, epoch_gamma, last_whole_y_t, last_whole_y_pred)

            else:
                loss = 0.0000001*nn.functional.binary_cross_entropy(y_pred, 1.0*y_batch.to(device))

            loss.backward()
            optimizer.step()

            whole_y_pred = np.append(whole_y_pred, y_pred.clone().detach().cpu().numpy())
            whole_y_t    = np.append(whole_y_t, y_batch.clone().detach().cpu().numpy())
            i+=1


        # Validation
        model.eval()
        last_whole_y_t = torch.tensor(whole_y_t).to(device)
        last_whole_y_pred = torch.tensor(whole_y_pred).to(device)
        epoch_gamma = epoch_update_gamma(last_whole_y_t, last_whole_y_pred, epoch)

        all_test_preds = np.array([])
        all_test_t = np.array([])
        with torch.no_grad():
            valid_loader_iter = tqdm(test_loader, desc='', leave=True)
            for  x_batch, y_batch in valid_loader_iter:
                y_pred = model(x_batch.to(device)).detach().cpu().numpy()
                y_t = y_batch.detach().cpu().numpy()
                all_test_preds=np.concatenate([all_test_preds,y_pred],axis=0)
                all_test_t = np.concatenate([all_test_t,y_t],axis=0)
            try:
                valid_auc = roc_auc_score(all_test_t>=0.5, all_test_preds)
            except:
                valid_auc =-1

        try:
            train_roc_val = roc_auc_score(whole_y_t>=0.5, whole_y_pred)
        except:
            train_roc_val=-1
        whole_y_pred=np.concatenate([all_test_preds, whole_y_pred],axis=0)
        whole_y_t=np.concatenate([all_test_t,whole_y_t],axis=0)
        try:
            combined_roc_val = roc_auc_score(whole_y_pred>=0.5, whole_y_t)
        except:
            combined_roc_val=-1

        print("Gamma = ", epoch_gamma)
        print("Valid AUC = ", valid_auc)
        print("Combined AUC = ", combined_roc_val)
        print("\r Training AUC = ", train_roc_val)

        # Save model

    del train_loader
    gc.collect()
    torch.cuda.empty_cache()
    model.train()


Fold 1/50


Epoch 1/1000: 100%|██████████| 361/361 [00:50<00:00,  7.10it/s]
100%|██████████| 82/82 [00:12<00:00,  6.48it/s]


Gamma =  tensor(0.0039, device='cuda:0', dtype=torch.float64)
Valid AUC =  0.9653511247806931
Combined AUC =  0.7817313731169133
 Training AUC =  0.9981332378260361


Epoch 2/1000: 100%|██████████| 361/361 [00:50<00:00,  7.11it/s]
100%|██████████| 82/82 [00:12<00:00,  6.35it/s]


Gamma =  tensor(0.0036, device='cuda:0', dtype=torch.float64)
Valid AUC =  0.9558707287665594
Combined AUC =  0.7664773191977261
 Training AUC =  0.9984485795061928


Epoch 3/1000: 100%|██████████| 361/361 [00:50<00:00,  7.11it/s]
100%|██████████| 82/82 [00:12<00:00,  6.40it/s]


Gamma =  tensor(0.0030, device='cuda:0', dtype=torch.float64)
Valid AUC =  0.9621917245830461
Combined AUC =  0.7611404183525824
 Training AUC =  0.9986161863721553


Epoch 4/1000: 100%|██████████| 361/361 [00:50<00:00,  7.08it/s]
100%|██████████| 82/82 [00:12<00:00,  6.38it/s]


Gamma =  tensor(0.0030, device='cuda:0', dtype=torch.float64)
Valid AUC =  0.9612112291845937
Combined AUC =  0.7604259302597707
 Training AUC =  0.9985405124148973


Epoch 5/1000:  56%|█████▌    | 203/361 [00:28<00:23,  6.80it/s]