In [None]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk import word_tokenize, FreqDist
import spacy
import string
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import textstat
from gensim import corpora, models
from nltk.lm import MLE
from nltk.lm.preprocessing import padded_everygram_pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.util import ngrams
from scipy.sparse import hstack

In [None]:
# data Preparation

text_data = pd.read_csv("ai-or-human-ml-model/AI_Human_random_10k.csv")

human_data = text_data[text_data["generated"] == 0].sample(500)
ai_data = text_data[text_data["generated"] == 1].sample(500)

text_data = pd.concat([human_data, ai_data])

In [None]:
# download stop words & define punctuation

import nltk

nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
punctuations = string.punctuation

In [None]:
# text preprocessing

nltk.download("punkt")


def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)

    tokens = word_tokenize(text)
    filtered_tokens = [
        token
        for token in tokens
        if token not in stop_words and token not in punctuations
    ]

    preprocessed_text = " ".join(filtered_tokens)

    return preprocessed_text


text_data["text"] = text_data["text"].apply(preprocess_text)

In [6]:
nlp = spacy.load("en_core_web_sm")

In [7]:
# feature extraction (linguistic features)


def extract_linguistic_features(text, label):
    doc = nlp(text)
    features = {}

    # lexical features
    features["word_count"] = len([token for token in doc if not token.is_punct])
    features["unique_word_count"] = len(
        set([token.text for token in doc if not token.is_punct])
    )
    features["ttr"] = (
        features["unique_word_count"] / features["word_count"]
        if features["word_count"] > 0
        else 0
    )
    features["noun_count"] = len([token for token in doc if token.pos_ == "NOUN"])
    features["verb_count"] = len([token for token in doc if token.pos_ == "VERB"])
    features["adj_count"] = len([token for token in doc if token.pos_ == "ADJ"])

    # syntactic features
    features["avg_sent_len"] = np.mean([len(sent) for sent in doc.sents])
    features["max_sent_len"] = max([len(sent) for sent in doc.sents], default=0)
    features["min_sent_len"] = min([len(sent) for sent in doc.sents], default=0)
    features["passive_voice_count"] = len(
        [token for token in doc if token.dep_ == "auxpass"]
    )

    # discourse & rhetorical features
    features["entity_count"] = len(doc.ents)
    features["conjunction_count"] = len(
        [token for token in doc if token.pos_ == "CCONJ"]
    )

    # stylistic features
    features["contraction_count"] = len(
        [
            token
            for token in doc
            if token.text in ["n't", "'s", "'d", "'ll", "'re", "'ve"]
        ]
    )
    features["punctuation_count"] = len([token for token in doc if token.is_punct])

    return features, label

In [None]:
# initialize the language model

corpus = text_data["text"].tolist()
train_data = [list(doc.split()) for doc in corpus]
train_data, padded_sents = padded_everygram_pipeline(2, train_data)
language_model = MLE(2)
language_model.fit(train_data, padded_sents)

# perplexity calculation


def calculate_perplexity(text):
    if text.split():
        perplexity = language_model.perplexity(text.split())
        return perplexity
    return 0


# sentiment analysis


def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = analyzer.polarity_scores(text)
    return sentiment_scores


# n-gram frequency distribution


def get_ngram_freq_dist(text, n):
    tokens = word_tokenize(text)
    ngrams_list = list(ngrams(tokens, n))
    freq_dist = FreqDist(ngrams_list)
    return freq_dist

In [10]:
# feature extraction (additional features)


def extract_additional_features(text, label):
    features = {}

    # perplexity
    perplexity = calculate_perplexity(text)
    features["perplexity"] = perplexity

    # readability scores

    if text.strip():
        features["flesch_reading_ease"] = textstat.flesch_reading_ease(text)
        features["smog_index"] = textstat.smog_index(text)
    else:
        features["flesch_reading_ease"] = 0
        features["smog_index"] = 0

    # sentiment and emotion analysis

    sentiment_scores = analyze_sentiment(text)
    features["sentiment_scores"] = sentiment_scores

    # named entity recognition

    doc = nlp(text)
    features["person_count"] = len([ent for ent in doc.ents if ent.label_ == "PERSON"])
    features["org_count"] = len([ent for ent in doc.ents if ent.label_ == "ORG"])
    features["loc_count"] = len([ent for ent in doc.ents if ent.label_ == "LOC"])

    # topic modeling
    if text.strip():
        dictionary = corpora.Dictionary([text.split()])
        corpus = [dictionary.doc2bow(text.split())]
        lda_model = models.LdaMulticore(
            corpus=corpus, id2word=dictionary, num_topics=10
        )
        topic_distribution = lda_model.get_document_topics(corpus[0])
        features["topic_distribution"] = topic_distribution
    else:
        features["topic_distribution"] = []

    # n-gram distributions
    features["bigram_freq_dist"] = get_ngram_freq_dist(text, 2)
    features["trigram_freq_dist"] = get_ngram_freq_dist(text, 3)

    return features, label

In [None]:
# extract features for text data

text_features = []
text_labels = []
additional_text_features = []
additional_text_labels = []
text_data = text_data[text_data["text"].str.len() > 0]

for text, label in zip(text_data["text"], text_data["generated"]):
    features, label = extract_linguistic_features(text, label)
    additional_features, label = extract_additional_features(text, label)
    text_features.append(features)
    text_labels.append(label)
    additional_text_features.append(additional_features)
    additional_text_labels.append(label)

In [12]:
# features vectorization & combining them

text_vectorizer = TfidfVectorizer()
text_tfidf = text_vectorizer.fit_transform([str(d) for d in text_features])

additional_vectorizer = TfidfVectorizer()
additional_tfidf = additional_vectorizer.fit_transform(
    [str(d) for d in additional_text_features]
)

X_text = hstack((text_tfidf, additional_tfidf))

In [13]:
# data splitting

X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(
    X_text, text_labels, test_size=0.2, random_state=42
)

In [None]:
# ensemble learning & hypertuning

voting_classifier = VotingClassifier(
    estimators=[
        ("rf", RandomForestClassifier()),
        ("lr", LogisticRegression()),
        ("svc", SVC(probability=True)),
    ],
    voting="soft",
)

# hyperparameter tuning
param_grid = {
    "rf__n_estimators": [100, 200, 500],
    "rf__max_depth": [None, 10, 20],
    "lr__C": [0.1, 1, 10],
    "svc__C": [0.1, 1, 10],
    "svc__gamma": ["auto", "scale"],
}

grid_search = GridSearchCV(
    voting_classifier, param_grid, cv=5, scoring="f1", verbose=1, n_jobs=-1
)
grid_search.fit(X_train_text, y_train_text)

# evaluate best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_text)
accuracy = accuracy_score(y_test_text, y_pred)
precision = precision_score(y_test_text, y_pred)
recall = recall_score(y_test_text, y_pred)
f1 = f1_score(y_test_text, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

In [None]:
# function to predict if text is human or AI-generated


def predict_text_authorship(input_text, model, vectorizers):
    input_text_features = extract_linguistic_features(input_text, 0)[0]
    input_text_additional_features = extract_additional_features(input_text, 0)[0]
    input_text_features_str = str(input_text_features)
    input_text_additional_features_str = str(input_text_additional_features)
    input_text_tfidf = hstack(
        (
            vectorizers[0].transform([input_text_features_str]),
            vectorizers[1].transform([input_text_additional_features_str]),
        )
    )
    prediction = model.predict(input_text_tfidf)[0]
    if prediction == 0:
        return "Human-generated"
    else:
        return "AI-generated"


example_text = "Education is the cornerstone of a prosperous society."

print(
    f"Predicted authorship: {predict_text_authorship(example_text, best_model, [text_vectorizer, additional_vectorizer])}"
)

In [None]:
# 3. LIME (Local Interpretable Model-agnostic Explanations)

from lime import lime_text


def preprocess_and_predict(text):
    if isinstance(text, str):
        preprocessed_text = preprocess_text(text)
        preprocessed_text = [preprocessed_text]
    else:
        preprocessed_text = [preprocess_text(t) for t in text]

    text_tfidf = text_vectorizer.transform(preprocessed_text)
    additional_tfidf = additional_vectorizer.transform(preprocessed_text)
    X_text = hstack((text_tfidf, additional_tfidf))

    proba_scores = best_model.predict_proba(X_text)

    return proba_scores


explainer = lime_text.LimeTextExplainer(class_names=["Human", "AI"])


idx = 0
raw_text = text_data["text"].iloc[idx]
exp = explainer.explain_instance(raw_text, preprocess_and_predict, num_features=10)

print(f"Explanation for instance {idx}:\n{exp.as_list()}")

In [None]:
# confusion matrix

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

conf_matrix = confusion_matrix(y_test_text, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(
    conf_matrix,
    annot=True,
    cmap="Blues",
    xticklabels=["Human", "AI"],
    yticklabels=["Human", "AI"],
)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

In [None]:
# classification report

from sklearn.metrics import classification_report

print(classification_report(y_test_text, y_pred, target_names=["Human", "AI"]))

In [None]:
# ROC curve and AUC
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(
    y_test_text, best_model.predict_proba(X_test_text)[:, 1]
)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color="darkorange", lw=2, label=f"ROC curve (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.show()

In [None]:
# Precision-Recall curve
from sklearn.metrics import precision_recall_curve


precision, recall, thresholds = precision_recall_curve(
    y_test_text, best_model.predict_proba(X_test_text)[:, 1]
)


plt.figure(figsize=(8, 6))
plt.plot(recall, precision, lw=2, color="darkgreen", label="Precision-Recall Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend(loc="lower left")
plt.show()

In [None]:
# cross-validation scores

from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(best_model, X_text, text_labels, cv=10, scoring="f1")

print("Cross-Validation Scores:")
print(cv_scores)
print(f"Mean Cross-Validation Score: {cv_scores.mean():.4f}")

In [None]:
# learning curve
from sklearn.model_selection import learning_curve


train_sizes, train_scores, test_scores = learning_curve(
    best_model, X_text, text_labels, cv=5, scoring="f1", n_jobs=-1
)


train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)


plt.figure(figsize=(10, 6))
plt.plot(train_sizes, train_mean, color="blue", label="Training Score")
plt.fill_between(
    train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color="blue"
)
plt.plot(train_sizes, test_mean, color="green", label="Cross-Validation Score")
plt.fill_between(
    train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color="green"
)
plt.xlabel("Number of Training Examples")
plt.ylabel("F1-Score")
plt.title("Learning Curve")
plt.legend(loc="best")
plt.show()

In [None]:
# calibration curve

from sklearn.calibration import CalibratedClassifierCV, calibration_curve


calibrated_model = CalibratedClassifierCV(best_model, cv="prefit", method="isotonic")
calibrated_model.fit(X_train_text, y_train_text)


fraction_of_positives, mean_predicted_value = calibration_curve(
    y_test_text, calibrated_model.predict_proba(X_test_text)[:, 1], n_bins=10
)


plt.figure(figsize=(10, 6))
plt.plot([0, 1], [0, 1], linestyle="--", label="Perfect Calibration")
plt.plot(
    mean_predicted_value, fraction_of_positives, "s-", label="Calibrated Classifier"
)
plt.xlabel("Mean Predicted Probability")
plt.ylabel("Fraction of Positives")
plt.title("Calibration Curve")
plt.legend(loc="best")
plt.show()