In [None]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install datasets

In [None]:
import numpy as np 
import pandas as pd 

import re
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,

    )
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

from datasets import Dataset

from huggingface_hub import notebook_login

from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer

In [None]:
torch.cuda.empty_cache()

In [None]:
from huggingface_hub import login
login(token="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")

In [6]:
# Load tokenizer
model_id = "mistralai/Mistral-Nemo-Instruct-2407"
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Set pad_token_id equal to the eos_token_id if not set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Set a reasonable default for models without max length
if tokenizer.model_max_length > 1024:
    tokenizer.model_max_length = 1024

In [None]:
# Define the instruction
instruction = ("Determine whether the following text was generated by a human or an AI. "
               "If the text is human-generated, generate '1'; if AI-generated, generate '0'. "
               "Do not provide any explanation, just generate '1' or '0'.")

# Template for formatting the prompt
classification_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{}

### Text:
{}

### Label:
{}"""

# Function to format each row according to your template
def format_prompt_template(example, tokenizer):
    formatted_text = classification_prompt.format(instruction, example["text"], example["label"])
    example["text"] = formatted_text
    return example

# Load dataset (adjust the dataset path/source as needed)
dataset = load_dataset("yaful/MAGE")

# Convert the dataset to a Pandas DataFrame for sampling
df = dataset['train'].to_pandas()

# Sample a fraction of the dataset
sample_fraction = 0.1
sample_size = int(len(df) * sample_fraction)
min_class_size = int(sample_size / df['label'].nunique())

# Sample equally from each class
sampled_df = df.groupby('label').apply(lambda x: x.sample(n=min_class_size, random_state=42)).reset_index(drop=True)

# Convert the sampled DataFrame back to a Hugging Face Dataset
sampled_train_dataset = Dataset.from_pandas(sampled_df)

# Perform train-test split
train_test_split = sampled_train_dataset.train_test_split(test_size=0.02, seed=42)
train_dataset = train_test_split['train']
validation_dataset = train_test_split['test']

column_names = list(train_dataset.features)
relevant_columns = [col for col in column_names if col in ["text", "label"]]

# Apply the formatting to both the training and validation datasets
train_dataset = train_dataset.map(format_prompt_template,fn_kwargs={"tokenizer": tokenizer},
                                  remove_columns=[col for col in column_names if col not in relevant_columns], num_proc=4)

validation_dataset = validation_dataset.map(format_prompt_template,fn_kwargs={"tokenizer": tokenizer},
                                            remove_columns=[col for col in column_names if col not in relevant_columns], num_proc=4)

In [None]:
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation="sdpa"
)

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

training_arguments = TrainingArguments(
    output_dir="./results",
    do_eval=True,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    #num_train_epochs=1,
    max_steps=5000,
    evaluation_strategy="steps",
    lr_scheduler_type="cosine",
    eval_steps=500,
    logging_steps=500,  
    logging_strategy="steps",
    warmup_steps=5,
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments)

In [None]:
train_result = trainer.train()

In [11]:
trainer.save_state()

In [None]:
output_dir="./results/checkpoint-5000"
tokenizer = AutoTokenizer.from_pretrained(output_dir)

tokenizer.pad_token = tokenizer.eos_token

if tokenizer.model_max_length > 1024:
    tokenizer.model_max_length = 1024

torch_dtype = torch.float16
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True)

model = AutoModelForCausalLM.from_pretrained(output_dir, quantization_config=bnb_config, device_map="auto")

In [None]:
# Load the dataset
dataset = load_dataset("yaful/MAGE")
df_test = pd.DataFrame(dataset['test'])

# Split the test set by label, ensuring equal proportions for each label (0 and 1)
df_test_0 = df_test[df_test['label'] == 0]
df_test_1 = df_test[df_test['label'] == 1]

# Set the desired proportion, e.g., 20% of the test data
proportion = 0.1

# Sample equal proportion from both labels
df_test_0_sample = df_test_0.sample(frac=proportion, random_state=42)
df_test_1_sample = df_test_1.sample(frac=proportion, random_state=42)

# Combine the two samples
df_test_combined = pd.concat([df_test_0_sample, df_test_1_sample]).reset_index(drop=True)

# Shuffle the result if needed
df_test_combined = df_test_combined.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
# Load the dataset
dataset = load_dataset("yaful/MAGE")

# Define a filter function that checks the length of the text
def filter_by_length(example):
    text_length = len(example['text'])
    return 10 <= text_length <= 50

# Apply the filter to the 'test' split of the dataset
shorter_test_dataset = dataset['test'].filter(filter_by_length)
df_short_test = pd.DataFrame(shorter_test_dataset)
df_short_test = df_short_test.groupby('label').sample(24, random_state=42)

In [9]:
df_ood = pd.read_csv("/home/ec2-user/data-analysis/ood_dataset.csv")

In [None]:
instruction = ("Determine whether the following text was generated by a human or an AI. "
               "If the text is human-generated, generate solely and only label '1'; if AI-generated, generate solely and only label '0'. "
               "Do not provide any explanation, just generate '1' or '0'. If you were in doubt please provide most likely label.")

classification_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{}

### Text:
{}

{}"""

label_pattern = r'Label:\s*(\d)'

def format_prompt_template(text, instruction):
    return classification_prompt.format(instruction, text, "") 

# Iterate through each row in the dataframe
for index, row in df.iterrows():
    text = row['text']
    formatted_prompt = format_prompt_template(text, instruction)
    print(index)
    # Tokenize and make the prediction
    inputs = tokenizer(formatted_prompt, return_tensors='pt', padding=True, truncation=True)
    outputs = model.generate(**inputs, max_new_tokens=5, pad_token_id=tokenizer.eos_token_id)
    
    # Decode the prediction
    prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    print(prediction)
    match = re.search(label_pattern, prediction)
    if match:
        label = match.group(1)  # Extracted '0' or '1'
    else:
        label = "N/A"  # In case no label is found

    df.loc[index, "training_results"] = label