In [6]:
# AI-generated text recognition - test playground

In [17]:
# import libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import time


In [3]:
# first we will experiment with some fully unassisted learning

In [18]:
dataset = pd.read_csv('/Users/samlewis/Desktop/mlDatasets/train_v2_drcat_02.csv')
dataset.head()

Unnamed: 0,text,label,prompt_name,source,RDizzl3_seven
0,Phones\n\nModern humans today are always on th...,0,Phones and driving,persuade_corpus,False
1,This essay will explain if drivers should or s...,0,Phones and driving,persuade_corpus,False
2,Driving while the use of cellular devices\n\nT...,0,Phones and driving,persuade_corpus,False
3,Phones & Driving\n\nDrivers should not be able...,0,Phones and driving,persuade_corpus,False
4,Cell Phone Operation While Driving\n\nThe abil...,0,Phones and driving,persuade_corpus,False


In [19]:
# for reproducability
def seed_everything(seed=100):
    import random
    random.seed(seed)
    np.random.seed(seed)

seed_everything()

In [20]:
# some preprocessing

In [21]:
def remove_punctuations(Df):
    punct_tag=re.compile(r'[^\w\s]')
    data=punct_tag.sub(r'',Df)
    return data
dataset['text']=dataset['text'].apply(lambda z: remove_punctuations(z))

In [22]:
def remove_html(Df):
    html_tag=re.compile(r'<.*?>')
    data=html_tag.sub(r'',Df)
    return data
dataset['text']=dataset['text'].apply(lambda z: remove_html(z))

In [23]:
def remove_url(Df):
    url_clean= re.compile(r"https://\S+|www\.\S+")
    data=url_clean.sub(r'',Df)
    return data
dataset['text']=dataset['text'].apply(lambda z: remove_url(z))

In [24]:
def remove_emoji(Df):
    emoji_clean= re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    data=emoji_clean.sub(r'',Df)
    url_clean= re.compile(r"https://\S+|www\.\S+")
    data=url_clean.sub(r'',Df)
    return data
dataset['text']=dataset['text'].apply(lambda z: remove_emoji(z))

In [9]:
# 75/25 train/test split

In [25]:
# X = dataset[["text","punCount"]]
X = dataset["text"]
y = dataset["source"]
y = (y != 'persuade_corpus')

from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=100)

In [26]:
# various versions of our TD-IDF vectorizer
from sklearn.feature_extraction.text import TfidfVectorizer


In [30]:
# 2-4 ngrams
vectorizer = TfidfVectorizer(ngram_range=(2, 4),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode',)
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)


In [14]:
# 3-5 ngrams
vectorizer = TfidfVectorizer(ngram_range=(3, 5),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode',)
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)



In [10]:
# 1-3 ngrams, TF only
vectorizer = TfidfVectorizer(ngram_range=(1, 3),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode',use_idf=False)
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)


In [12]:
# 1-3 ngrams - this is the chosen vectorizer 
vectorizer = TfidfVectorizer(ngram_range=(1, 3),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode')
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)


In [11]:
# CountVectorizer with Multinomial Native-Bayes classification, used in the following two models before returning
# to the TfidfVectorizer with a SVM classifier.

import warnings
warnings.filterwarnings('ignore')
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer

text_clf = Pipeline([
        ('vect', CountVectorizer()),
        ('tfidf', TfidfTransformer()),
        ('clf', MultinomialNB()),
])

In [12]:
text_clf.fit(X_train,y_train)

In [13]:
y_pred = text_clf.predict(X_test)

In [14]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       0.85      0.99      0.92     20716
        True       0.99      0.76      0.86     15179

    accuracy                           0.90     35895
   macro avg       0.92      0.88      0.89     35895
weighted avg       0.91      0.90      0.89     35895



In [26]:
# the above classification report comes from the CountVectorizer/TfidfTransformer/MultinomialNB model using the default unigrams

In [14]:
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

In [22]:
# CountVectorizer/TfidfTransformer/MultinomialNB, 3-5 ngrams
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       0.90      0.98      0.94     20716
        True       0.97      0.85      0.90     15179

    accuracy                           0.92     35895
   macro avg       0.93      0.91      0.92     35895
weighted avg       0.93      0.92      0.92     35895



In [19]:
# I then decided to experiement with a Support Vector Machine and the TfidfVectorizer instead 

In [15]:
from sklearn.svm import SVC

In [15]:
svm_classifier = SVC(kernel='linear', class_weight='balanced')
svm_classifier.fit(train_x, y_train)

In [16]:
y_pred_svm = svm_classifier.predict(test_x)

In [27]:
# the preliminary SVM, 2-4 ngrams, linear kernal
print(classification_report(y_test, y_pred_svm, digits=5)) 

              precision    recall  f1-score   support

       False    0.92765   0.98972   0.95768     20716
        True    0.98456   0.89466   0.93746     15179

    accuracy                        0.94952     35895
   macro avg    0.95611   0.94219   0.94757     35895
weighted avg    0.95172   0.94952   0.94913     35895



In [None]:
# here, I unfortunately lost a couple of models due to my computer crashing. I tested various ngrams with linear kernels (due
# to time complexity and processing power constraints) and determined that 1-3 ngrams was right for this project. I was 
# hesitant include unigrams at first as I thought it might just add noise to the data, but I think it helps capture the 
# pattern of AI text generators to overuse very common words such as 'the'. 

In [18]:
# 1-3 ngrams, RBF kernel
svm_classifier2 = SVC(kernel='rbf', class_weight='balanced')
svm_classifier2.fit(train_x, y_train)

In [19]:
y_pred_svm2 = svm_classifier2.predict(test_x)

In [28]:
print(classification_report(y_test, y_pred_svm2, digits=5)) 

              precision    recall  f1-score   support

       False    0.87348   0.99010   0.92814     20716
        True    0.98349   0.80427   0.88489     15179

    accuracy                        0.91152     35895
   macro avg    0.92848   0.89719   0.90652     35895
weighted avg    0.92000   0.91152   0.90985     35895



In [21]:
# 1-3 ngrams, sigmoid kernel, different class weights to comabt unbalanced precision and recall scores.
# however I do understand that my high precision score for detecting AI text is ideal here, as we really want to avoid 
# false positives (which only make up 1.3% of all 'True' classifications).
svm_classifier3 = SVC(kernel='sigmoid', class_weight={0: 2, 1: 1})
svm_classifier3.fit(train_x, y_train)

In [22]:
import time
print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm3 = svm_classifier3.predict(test_x)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start 20 : 21 : 42
done 20 : 34 : 24


In [23]:
print(classification_report(y_test, y_pred_svm3, digits=5)) 

              precision    recall  f1-score   support

       False    0.92377   0.99150   0.95644     20716
        True    0.98712   0.88833   0.93512     15179

    accuracy                        0.94788     35895
   macro avg    0.95544   0.93992   0.94578     35895
weighted avg    0.95056   0.94788   0.94742     35895



In [24]:
# 1-3 ngrams, sigmoid kernel, different class weights
svm_classifier3 = SVC(kernel='sigmoid', class_weight={0: 5, 1: 1})
svm_classifier3.fit(train_x, y_train)

In [25]:
print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm3 = svm_classifier3.predict(test_x)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start 20 : 42 : 6
done 20 : 55 : 19


In [26]:
print(classification_report(y_test, y_pred_svm3, digits=5)) 

              precision    recall  f1-score   support

       False    0.92381   0.99150   0.95646     20716
        True    0.98712   0.88840   0.93516     15179

    accuracy                        0.94790     35895
   macro avg    0.95546   0.93995   0.94581     35895
weighted avg    0.95058   0.94790   0.94745     35895



In [15]:
# 1-3 ngrams, polynomial kernel
import time
from sklearn.svm import SVC
print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
svm_classifier4 = SVC(kernel='poly', class_weight='balanced')
svm_classifier4.fit(train_x, y_train)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start 17 : 30 : 14
done 17 : 42 : 8


In [16]:
print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm4 = svm_classifier4.predict(test_x)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start 17 : 44 : 33
done 18 : 5 : 36


In [20]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_test, y_pred_svm4, digits=5)) 

              precision    recall  f1-score   support

       False    0.58530   0.99020   0.73572     20716
        True    0.76061   0.04249   0.08049     15179

    accuracy                        0.58944     35895
   macro avg    0.67296   0.51635   0.40810     35895
weighted avg    0.65943   0.58944   0.45864     35895



In [11]:
# sigmoid classifier, no IDF, 1-3 ngrams
import time
from sklearn.svm import SVC
print("start training", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
svm_classifier_NOIDF = SVC(kernel='rbf')
svm_classifier_NOIDF.fit(train_x, y_train)
print("start pred", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm_NOIDF = svm_classifier_NOIDF.predict(test_x)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start training 17 : 49 : 18
start pred 18 : 34 : 44
done 18 : 39 : 55


In [12]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_test, y_pred_svm_NOIDF, digits=5)) 
print(confusion_matrix(y_test, y_pred_svm_NOIDF))

              precision    recall  f1-score   support

       False    0.94123   0.99614   0.96791      5177
        True    0.99428   0.91520   0.95310      3797

    accuracy                        0.96189      8974
   macro avg    0.96775   0.95567   0.96050      8974
weighted avg    0.96368   0.96189   0.96164      8974

[[5157   20]
 [ 322 3475]]


In [None]:
# 1-3 ngrams, rbf kernel, TF-IDF
print("start training", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
svm_classifier_13IDF = SVC(C=0.5,kernel='rbf', class_weight='balanced')
svm_classifier_13IDF.fit(train_x, y_train)
print("start pred", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm_13IDF = svm_classifier_IDF.predict(test_x)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start training 14 : 23 : 30


In [None]:
# I then impliment a Grid Search to determine optimal C-values for first a linear kernel, then later a Radial Basis Function kernel

In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
parameters = {'kernel':['linear'], 'C':[1, 5]}
svc = SVC()
clf = GridSearchCV(svc, parameters)
print("start train", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
clf.fit(train_x, y_train)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
sorted(clf.cv_results_.keys() # <- oops wrong command


start train 17 : 20 : 38
done 1 : 48 : 48


['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_C',
 'param_kernel',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [14]:
clf.cv_results_

{'mean_fit_time': array([1773.84183464, 3199.13683333]),
 'std_fit_time': array([  30.75722604, 1247.7753873 ]),
 'mean_score_time': array([168.81297297, 307.29162416]),
 'std_score_time': array([  7.59497107, 237.86231429]),
 'param_C': masked_array(data=[1, 5],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'linear'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'}, {'C': 5, 'kernel': 'linear'}],
 'split0_test_score': array([0.96048135, 0.95097311]),
 'split1_test_score': array([0.95988113, 0.94799406]),
 'split2_test_score': array([0.96329866, 0.95497771]),
 'split3_test_score': array([0.96612184, 0.95631501]),
 'split4_test_score': array([0.96419019, 0.95349183]),
 'mean_test_score': array([0.96279464, 0.95275034]),
 'std_test_score': array([0.00232859, 0.00296643]),
 'rank_test_score': array([1, 2], dtype=int32)}

In [15]:
clf.best_estimator_

In [16]:
clf.best_score_

0.9627946365124836

In [None]:
# all C-values were relavatively similar in accuracy. there was no overarching correlation between C-value and accuracy,

In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
parameters = {'kernel':['rbf'], 'C':[0.5,1,2]}
svc = SVC()
clf2 = GridSearchCV(svc, parameters)
print("start train", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
clf2.fit(train_x, y_train)
print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5]) 
sorted(clf2.cv_results_)


start train 1 : 37 : 24
done 19 : 31 : 24


['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_C',
 'param_kernel',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [14]:
clf2.best_estimator_

In [15]:
clf2.best_score_

0.9636564228008044

In [None]:
# tried one final RBF model with 1-5 ngrams. this turned out to be significantly less accurate than my peak 0.964 accuracy, 
# which I am satisfied with. 

In [27]:
from sklearn.svm import SVC
import time

print("start vectorization", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
vectorizer = TfidfVectorizer(ngram_range=(1, 5),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode')
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)

print("start training", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
svm_classifier_final = SVC(C=0.5,kernel='rbf', class_weight='balanced')
svm_classifier_final.fit(train_x, y_train)

print("start pred", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    
y_pred_svm_final = svm_classifier_final.predict(test_x)

print("done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

start vectorization 22 : 16 : 55
start training 22 : 19 : 56
start pred 0 : 20 : 38
done 0 : 34 : 37


In [28]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_svm_final, digits=5)) 

              precision    recall  f1-score   support

       False    0.94508   0.95101   0.94804      6532
        True    0.93109   0.92295   0.92700      4685

    accuracy                        0.93929     11217
   macro avg    0.93809   0.93698   0.93752     11217
weighted avg    0.93924   0.93929   0.93925     11217



In [None]:
# the rest of this document is a few random models 

In [122]:
# count punctuation - unused feature augmentation. i have read about the trend of AI-generated text to use more 
# punctuation than a human would usually use and considered adding that to the model. 
def count_puncts(x):
  # sub. punct. with '' and returns the new string with the no. of replacements.
  new_str, count = re.subn(r'\W', '', x)
  return count

dataset['punCount'] = dataset['text'].apply(count_puncts)

In [123]:
dataset.head()

Unnamed: 0,text,label,prompt_name,source,RDizzl3_seven,punCount
0,Phones\n\nModern humans today are always on th...,0,Phones and driving,persuade_corpus,False,387
1,This essay will explain if drivers should or s...,0,Phones and driving,persuade_corpus,False,439
2,Driving while the use of cellular devices\n\nT...,0,Phones and driving,persuade_corpus,False,182
3,Phones Driving\n\nDrivers should not be able ...,0,Phones and driving,persuade_corpus,False,228
4,Cell Phone Operation While Driving\n\nThe abil...,0,Phones and driving,persuade_corpus,False,341


In [59]:
dataset[dataset["label"] == 1]["punCount"].mean

<bound method NDFrame._add_numeric_operations.<locals>.mean of 25996    258
25997    335
25998    184
25999    288
26000    266
        ... 
44863    391
44864    360
44865    281
44866    260
44867    409
Name: punCount, Length: 17497, dtype: int64>

In [193]:
# 3-5 ngrams, linear kernel, TF-IDF

print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

X = dataset["text"]
y = dataset["source"]
y = (y != 'persuade_corpus')

from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=100)

print("split created", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    


vectorizer = TfidfVectorizer(ngram_range=(3, 5),tokenizer=lambda x: re.findall(r'[^\W]+', x),token_pattern=None,strip_accents='unicode',)
vectorizer = vectorizer.fit(X_test)
test_x = vectorizer.fit_transform(X_test)
train_x = vectorizer.transform(X_train)

print("vectorization done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    


svm_classifier = SVC(kernel='linear', class_weight='balanced')
svm_classifier.fit(train_x, y_train)

print("model trained", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    


y_pred_svm = svm_classifier.predict(test_x)

print("classification done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    


print(classification_report(y_test, y_pred_svm)) 



start 18 : 20 : 28
split created 18 : 20 : 28
vectorization done 18 : 22 : 32
model trained 19 : 42 : 46
classification done 19 : 50 : 44
              precision    recall  f1-score   support

       False       0.94      0.96      0.95      6532
        True       0.94      0.92      0.93      4685

    accuracy                           0.94     11217
   macro avg       0.94      0.94      0.94     11217
weighted avg       0.94      0.94      0.94     11217



In [194]:
# 3-5 ngrams, CountVectorizer/TfidfTransformer/MultinomialNB model

print("start", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

text_clf = Pipeline([
        ('vect', CountVectorizer()),
        ('tfidf', TfidfTransformer()),
        ('clf', MultinomialNB()),
])

text_clf.fit(X_train,y_train)

print("model trained", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    


y_pred = text_clf.predict(X_test)

print("classification done", time.localtime()[3], ":", time.localtime()[4], ":", time.localtime()[5])    

print(classification_report(y_test, y_pred))

start 2 : 40 : 58
model trained 2 : 41 : 5
classification done 2 : 41 : 7
              precision    recall  f1-score   support

       False       0.89      0.98      0.93      6532
        True       0.97      0.83      0.90      4685

    accuracy                           0.92     11217
   macro avg       0.93      0.91      0.92     11217
weighted avg       0.92      0.92      0.92     11217

