In [None]:
import numpy as np
import pandas as pd
import torch
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import AdamW, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

from transformers import BertForSequenceClassification, BertTokenizer
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from transformers import XLNetTokenizer, XLNetForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import XLMTokenizer, XLMForSequenceClassification
from transformers import CTRLTokenizer, CTRLForSequenceClassification

In [None]:
data = pd.read_csv('reviews.csv')
data.head()

In [None]:
# Prepare the dataset for training
texts = data['review'].tolist()
labels = data['label'].apply(lambda x: 1 if x == 'ai' else 0).tolist()

In [None]:
# Split data into training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

## Baseline Models

## Advanced Models

In [None]:
# Define a custom dataset class
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

### BERT

In [None]:
loaded_model = BertForSequenceClassification.from_pretrained("./bert_model")
loaded_tokenizer = BertTokenizer.from_pretrained("./bert_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### RoBERTa

In [None]:
loaded_model = RobertaForSequenceClassification.from_pretrained("./roberta_model")
loaded_tokenizer = RobertaTokenizer.from_pretrained("./roberta_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### GPT-2

In [None]:
loaded_model = GPT2ForSequenceClassification.from_pretrained("./gpt2_model")
loaded_tokenizer = GPT2Tokenizer.from_pretrained("./gpt2_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### XLNet

In [None]:
loaded_model = XLNetForSequenceClassification.from_pretrained("./xlnet_model")
loaded_tokenizer = XLNetTokenizer.from_pretrained("./xlnet_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### Grover

In [None]:
loaded_model = AutoModelForSequenceClassification.from_pretrained("./grover_model")
loaded_tokenizer = AutoTokenizer.from_pretrained("./grover_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### XLM

In [None]:
loaded_model = XLMForSequenceClassification.from_pretrained("./xlm_model")
loaded_tokenizer = XLMTokenizer.from_pretrained("./xlm_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

### CTRL

In [None]:
loaded_model = CTRLForSequenceClassification.from_pretrained("./ctrl_model")
loaded_tokenizer = CTRLTokenizer.from_pretrained("./ctrl_model")

test_encodings = loaded_tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_dataset = TextDataset(test_encodings, test_labels)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy='steps',     # Evaluation strategy to adopt during training
    eval_steps=100,                  # Evaluation step to perform evaluation
    save_steps=100,                  # Save checkpoint every X steps
    log_level='info',                # Set logging level to info
    log_level_replica='info'         # Save checkpoint every X steps
)

trainer = Trainer(
    model=loaded_model,
    args=training_args,
    eval_dataset=test_dataset
)
test_results = trainer.evaluate()
print(test_results)

## Advanced Models with Adversarial Training

In [None]:
# Load Data
class TextDataset(Dataset):
    def __init__(self, tokenizer, filepath):
        self.data = pd.read_csv(filepath)
        self.tokenizer = tokenizer
        self.texts = self.data['review'].tolist()
        # Convert labels to integers
        self.labels = torch.tensor(self.data['label'].apply(lambda x: 0 if x == 'human' else 1).tolist())

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        inputs = self.tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
        input_ids = inputs['input_ids'].squeeze(0)  # Remove batch dimension
        return input_ids, label

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    true_labels, predictions = [], []

    with torch.no_grad():
        for batch_inputs, batch_labels in dataloader:
            inputs = {'input_ids': batch_inputs}
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_labels = torch.argmax(logits, dim=1)
            predictions.extend(predicted_labels.numpy())
            true_labels.extend(batch_labels.numpy())

    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='binary')
    conf_matrix = confusion_matrix(true_labels, predictions)

    return accuracy, precision, recall, f1, conf_matrix

### BERT

In [None]:
# Load the saved model and tokenizer
output_dir = './bert_adv_model'
tokenizer = BertTokenizer.from_pretrained(output_dir)
model = BertForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### RoBERTa

In [None]:
# Load the saved model and tokenizer
output_dir = './roberta_adv_model'
tokenizer = RobertaTokenizer.from_pretrained(output_dir)
model = RobertaForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### GPT-2

In [None]:
# Load the saved model and tokenizer
output_dir = './gpt2_adv_model'
tokenizer = GPT2Tokenizer.from_pretrained(output_dir)
model = GPT2ForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### XLNet

In [None]:
# Load the saved model and tokenizer
output_dir = './xlnet_adv_model'
tokenizer = XLNetTokenizer.from_pretrained(output_dir)
model = XLNetForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### Grover

In [None]:
# Load the saved model and tokenizer
output_dir = './grover_adv_model'
tokenizer = AutoTokenizer.from_pretrained(output_dir)
model = AutoModelForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### XLM

In [None]:
# Load the saved model and tokenizer
output_dir = './xlm_adv_model'
tokenizer = XLMTokenizer.from_pretrained(output_dir)
model = XLMForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()

### CTRL

In [None]:
# Load the saved model and tokenizer
output_dir = './ctrl_adv_model'
tokenizer = CTRLTokenizer.from_pretrained(output_dir)
model = CTRLForSequenceClassification.from_pretrained(output_dir)

test_filepath = 'reviews.csv'
test_dataset = TextDataset(tokenizer, test_filepath)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Evaluate the model
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'Human'], yticklabels=['AI', 'Human'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix for Test Set')
plt.show()