# **Unmasking the creator: Direct Classification Method**

### **Loading datasets**

In [None]:
from datasets import Dataset, load_dataset, concatenate_datasets
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import os

In [None]:
wiki_intro_dataset = load_dataset("aadityaubhat/GPT-wiki-intro", split="train")
wiki_intro_dataset

In [None]:
hc3_df = pd.read_csv("/kaggle/input/hc3-dataset/hc3_cleaned_2.csv")
hc3_df.head()

In [None]:
def stripping(text):
    text = text.strip()
    return text

hc3_df['generated_texts'] = hc3_df['generated_texts'].apply(lambda x: stripping(x))

### **Removing unnecessary columns and creating a Dataset object**

In [None]:
cols_to_remove = wiki_intro_dataset.column_names
cols_to_remove.remove("wiki_intro")
cols_to_remove.remove("generated_intro")
cols_to_remove.remove("prompt")
wiki_intro_dataset = wiki_intro_dataset.remove_columns(cols_to_remove)
wiki_intro_dataset

In [None]:
wiki_intro_dataset = wiki_intro_dataset.rename_column("wiki_intro", "human_texts")
wiki_intro_dataset = wiki_intro_dataset.rename_column("generated_intro", "generated_texts")
wiki_intro_dataset

In [None]:
wiki_intro_df = Dataset.to_pandas(wiki_intro_dataset)
wiki_intro_df['source'] = "wiki-intro"
wiki_intro_df.head()

In [None]:
hc3_df.rename(columns = {'human_answer':'human_texts'}, inplace = True)
hc3_df.head()
# hc3_dataset = Dataset.from_pandas(hc3_df)
# # cols_to_remove = ["prompt"]
# # hc3_dataset = hc3_dataset.remove_columns(cols_to_remove)
# hc3_dataset = hc3_dataset.rename_column("human_answer", "human_texts")
# # hc3_dataset = hc3_dataset.rename_column("chatgpt_answers", "generated_texts")
# hc3_dataset

In [None]:
dataset_df = pd.concat([hc3_df, wiki_intro_df], axis=0)
dataset_df = dataset_df.sample(frac=1).reset_index(drop=True)
dataset_df.head()

In [None]:
dataset_human = dataset_df[["prompt", "human_texts", "source"]]
dataset_ai = dataset_df[["prompt", "generated_texts", "source"]]

dataset_human = dataset_human.assign(label=0)
dataset_ai = dataset_ai.assign(label=1)

dataset_human.rename(columns={'human_texts': 'text'}, inplace=True)
dataset_ai.rename(columns={'generated_texts': 'text'}, inplace=True)

df = pd.concat([dataset_human, dataset_ai], ignore_index=True)
df = df.sample(frac=1).reset_index(drop=True) # shuffling the rows
# df.to_csv("wiki_and_hc3.csv")

In [None]:
df_train, df_test = train_test_split(df, test_size=0.05, stratify=df['source'])
df_train = df_train.sample(frac=1).reset_index(drop=True)
df_test = df_test.sample(frac=1).reset_index(drop=True)
print(df_train.shape)
print(df_test.shape)

In [None]:
df_train.label.value_counts(), df_test.label.value_counts()

In [None]:
dataset_train = Dataset.from_pandas(df_train)
dataset_test = Dataset.from_pandas(df_test)

In [None]:
# !pip install huggingface_hub --quiet
# !pip install -U accelerate --quiet

# !pip install -U huggingface-hub --quiet
# !pip install datasets==2.13 --quiet

In [None]:
!python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token(<your_token)"

from huggingface_hub import notebook_login
notebook_login()

In [None]:
dataset_train.push_to_hub("hc3-wiki-intro-dataset", split="train")
dataset_test.push_to_hub("hc3-wiki-intro-dataset", split="test")

# **Training Models**

## **Logistic Regression**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

In [None]:
X_train = train_df['text']
X_test = test_df['text']

y_train = train_df['label']
y_test = test_df['label']

In [None]:
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
# C = [0.7, 0.5, 0.3]
# max_iter = [100, 200, 300]

# for c in C:
#     for iters in max_iter:
#         log_reg = LogisticRegression(C=c, max_iter=iters)
#         log_reg.fit(X_train_tfidf, y_train)
#         y_pred_train = log_reg.predict(X_train_tfidf)
#         y_pred_test = log_reg.predict(X_test_tfidf)
#         print(f"for C={c} and max_iter={iters}")
#         print("ACCURACY")
#         print(f"Train accuracy: {accuracy_score(y_train, y_pred_train)}")
#         print(f"Test accuracy: {accuracy_score(y_test, y_pred_test)}")
#         print("\nF1 SCORE")
#         print(f"Train F1: {f1_score(y_train, y_pred_train)}")
#         print(f"Test F1: {f1_score(y_test, y_pred_test)}")
#         print("\n\n")

## **SGD Classifier**

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
clf = SGDClassifier(loss='log_loss', alpha=0.0001, max_iter=500, tol=None)
clf.fit(X_train_tfidf, y_train)
y_pred_train = clf.predict(X_train_tfidf)
y_pred_test = clf.predict(X_test_tfidf)
print(f"Train accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"Test accuracy: {accuracy_score(y_test, y_pred_test)}")

## **Fine Tuning RoBERTa**

In [None]:
!pip install transformers --quiet
!pip install huggingface_hub --quiet
!pip install -U accelerate --quiet

!pip install -U huggingface-hub --quiet
!pip install datasets==2.13 --quiet

In [None]:
from transformers import AutoTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from IPython.display import FileLink, FileLinks
from datasets import Dataset, load_dataset, concatenate_datasets
import sklearn
from sklearn.metrics import accuracy_score
import os

In [None]:
!python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"

from huggingface_hub import notebook_login
notebook_login()

In [None]:
train_dataset = load_dataset("rajendrabaskota/hc3-wiki-intro-tokenized-max-len-512", split="train")
test_dataset = load_dataset("rajendrabaskota/hc3-wiki-intro-tokenized-max-len-512", split="test")

train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [None]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = RobertaForSequenceClassification.from_pretrained('roberta-base')

In [None]:
print(f"Total number of parameters: {sum(p.numel() for p in model.parameters())/1e6} M")

In [None]:
batch_size = 16
epochs = 1
output_dir = "ai-human-classification-hc3-wiki-recleaned-dataset-max-length-512"
logging_steps = len(train_dataset) // batch_size
training_args = TrainingArguments(output_dir,
                                  num_train_epochs=epochs,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  evaluation_strategy="steps",
                                  eval_steps=100,
                                  logging_strategy="steps",
                                  logging_steps=100,
                                  learning_rate=5e-5,
                                  weight_decay=0.01,
                                  save_strategy="no"
#                                   save_steps=300,
#                                   load_best_model_at_end=True,
#                                   save_total_limit=2,
#                                   push_to_hub=False
                                 )

In [None]:
test_accuracy_scores = []
test_f1_scores = []

train_accuracy_scores = []
train_f1_scores = []

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    print(f"labels: {labels.shape}")
    print(f"preds: {preds.shape}")
    accuracy = accuracy_score(labels, preds)
    f1 = sklearn.metrics.f1_score(labels, preds, average='binary')
    test_accuracy_scores.append(accuracy)
    test_f1_scores.append(f1)
    
    return {'accuracy': accuracy, 'f1_score': f1}

In [None]:
trainer = Trainer(model=model, 
                  args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=train_dataset,
                  eval_dataset=test_dataset,
                 tokenizer=tokenizer)

In [None]:
trainer.train()

In [None]:
FileLinks(f"ai-human-classification-hc3-wiki-recleaned-dataset/checkpoint-200")