In [24]:
# Load label AI and Watermarked AI
import pandas as pd

df_clean = pd.read_csv("../data/ai_samples.csv")
df_clean["Label"] = "AI_Plain"

df_watermarked = pd.read_csv("../data/ai_samples_watermarked.csv")
df_watermarked["Label"] = "AI_Watermarked"

df_combined = pd.concat([df_clean, df_watermarked]).reset_index(drop=True)


In [26]:
df_combined.tail

<bound method NDFrame.tail of                                              Question  \
0   How do you feel about Elon Musk saying that Tr...   
1   How do you feel about Elon Musk saying that Tr...   
2   How do you feel about Elon Musk saying that Tr...   
3   How do you feel about Elon Musk saying that Tr...   
4   How do you feel about Elon Musk saying that Tr...   
..                                                ...   
95                   The power of vulnerability | TED   
96                   The power of vulnerability | TED   
97  The Unstoppable Power of Letting Go | TEDxWilm...   
98  The Unstoppable Power of Letting Go | TEDxWilm...   
99  The Unstoppable Power of Letting Go | TEDxWilm...   

                                                 Text            Source  \
0   Honestly, it’s kind of surreal how casually so...       r/AskReddit   
1   Part of me thinks Elon just likes chaos. He kn...       r/AskReddit   
2   Elon Musk saying Trump is in the Epstein files...       

In [11]:
import nltk # for tokenizing text into words and sentences
import string # for getting list of punctuations

#nltk.download('punkt_tab') # model by nltk for word and sentence tokenization

# take a block of text and calculate features related to writing style
def extract_features(text):

    # split into sentences and words
    sentences = nltk.sent_tokenize(text)
    words = nltk.word_tokenize(text)

    word_count = len(words)
    sentence_count = len(sentences)

    avg_word_length = sum(len(w) for w in words)/ word_count if word_count> 0 else 0
    punctuation_count = sum(1 for c in text if c in string.punctuation)

    return pd.Series({
        "word_count": word_count,
        "sentence_count": sentence_count,
        "avg_word_length": avg_word_length,
        "punctuation_count": punctuation_count
    })

df_features = df_combined["Text"].apply(extract_features)
df_combined = pd.concat([df_combined,df_features], axis = 1)

In [12]:
# Extract stylometric and TF ID features again

def contains_zwsp(text):
    return "\u200b" in text


def contains_arabic_comma(text):
    return "،" in text

df_combined["has_zwsp"] = df_combined["Text"].apply(contains_zwsp).astype(int)
df_combined["has_arabic_comma"] = df_combined["Text"].apply(contains_arabic_comma).astype(int)



In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack, csr_matrix

# 1. TF-IDF
vectorizer = TfidfVectorizer(max_features=300, stop_words="english")
X_tfidf = vectorizer.fit_transform(df_combined["Text"])

# 2. Stylometric features
stylometric = df_combined[["word_count", "sentence_count", "avg_word_length", "punctuation_count"]]
stylometric_scaled = StandardScaler().fit_transform(stylometric)

# 3. Binary watermark flags
watermark_flags = df_combined[["has_zwsp", "has_arabic_comma"]].values

# Combine everything into X_combined
X_combined = hstack([
    X_tfidf,
    csr_matrix(stylometric_scaled),
    csr_matrix(watermark_flags)
])


In [22]:
print(df_combined.groupby('Label')[['has_zwsp', 'has_arabic_comma']].mean())

                has_zwsp  has_arabic_comma
Label                                     
AI_Plain             0.0               0.0
AI_Watermarked       0.0               0.0


In [14]:
# Train a classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# X = TF-IDF + stylometrics + binary Unicode features
# y = AI_Plain vs AI_Watermarked

X_train, X_test, y_train, y_test = train_test_split(X_combined, df_combined["Label"], stratify=df_combined["Label"], test_size=0.2, random_state=42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))


                precision    recall  f1-score   support

      AI_Plain       0.09      0.10      0.10        10
AI_Watermarked       0.00      0.00      0.00        10

      accuracy                           0.05        20
     macro avg       0.05      0.05      0.05        20
  weighted avg       0.05      0.05      0.05        20



In [15]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(clf, X_combined, df_combined["Label"], cv=5)
print(f"Cross-val accuracy: {scores.mean():.2f}")


Cross-val accuracy: 0.50


In [16]:
y_train_pred = clf.predict(X_train)
print(classification_report(y_train, y_train_pred))


                precision    recall  f1-score   support

      AI_Plain       0.62      0.60      0.61        40
AI_Watermarked       0.61      0.62      0.62        40

      accuracy                           0.61        80
     macro avg       0.61      0.61      0.61        80
  weighted avg       0.61      0.61      0.61        80



In [21]:
from sklearn.linear_model import LogisticRegression
clf2 = LogisticRegression(max_iter=1000)

clf2.fit(X_train, y_train)
y_pred2 = clf2.predict(X_test)
print(classification_report(y_test, y_pred2))

                precision    recall  f1-score   support

      AI_Plain       0.09      0.10      0.10        10
AI_Watermarked       0.00      0.00      0.00        10

      accuracy                           0.05        20
     macro avg       0.05      0.05      0.05        20
  weighted avg       0.05      0.05      0.05        20

