In [1]:
import sys
import os
project_path = os.path.abspath('/Enter/Your/Project/Path/Here')
sys.path.append(project_path)
from src.API.mongo_utils import get_mongo_client, get_collection
from src.API.constant import AI, HUMAN, REUTER_COLLECTION, GEMINI_COLLECTION, GEMINI_IMPROVED_COLLECTION, OPEN_AI_COLLECTION, OPEN_AI_IMPROVED_COLLECTION
# IMPORTANT: 0 is AI, 1 is Human
# Initialize the client
client = get_mongo_client()
# Fetch collections
human_collection = get_collection(REUTER_COLLECTION)
 
Gemini_collection = get_collection(GEMINI_COLLECTION)
Gemini_improved_collection = get_collection(GEMINI_IMPROVED_COLLECTION)
openAI_collection = get_collection(OPEN_AI_COLLECTION)
openAI_improved_collection = get_collection(OPEN_AI_IMPROVED_COLLECTION)

# Fetch documents from the collections
gemini = Gemini_collection.find()
gemini_improved = Gemini_improved_collection.find()
openAI = openAI_collection.find()
openAI_improved = openAI_improved_collection.find()
print("Making the list of Document")
AI_documents = [doc for cursor in [gemini, gemini_improved, openAI, openAI_improved] for doc in cursor]
human_documents = human_collection.find()

Successfully created MongoDB client.
Making the list of Document


In [2]:
import random
# Shuffle the combined documents
random.shuffle(AI_documents)

# Keep only the first 5000
AI_documents = AI_documents[:5000]

In [3]:
# Constant for the field names
EMBEDDING_CLUSTERS = 'embedding_clustering'
EMBEDDING_CLASSIFICATION = 'embedding_classification'

In [4]:
from src.API.text_classifier import create_text_data_list
# print update
print("Starting to create text data list")

# Create a list of TextData objects
print("Creating text data list for AI")
# Open AI: 99% accuracy
# Gemini : 99.45% accuracy
AI_texts = create_text_data_list(AI_documents, AI)
print("Creating text data list for Human")
human_texts = create_text_data_list(human_documents, HUMAN)
# Shuffle AI_texts
random.shuffle(AI_texts)

Starting to create text data list
Creating text data list for AI
Creating text data list for Human


In [5]:
print(len(AI_texts))
print(len(human_texts))

5000
5000


In [6]:
from sklearn.model_selection import train_test_split

# Split the list into training and testing
AI_train, AI_test = train_test_split(AI_texts, test_size=0.2, random_state=42)
human_train, human_test = train_test_split(human_texts, test_size=0.2, random_state=42)
training_data = AI_train + human_train
test_data = AI_test + human_test

In [7]:
# Print the length of the training and testing data
print("Training data length: ", len(training_data))
print("Testing data length: ", len(test_data))

Training data length:  8000
Testing data length:  2000


# **K-Mean Clustering**

In [8]:
from src.API.machine_learning import k_mean_cluster

n_clusters = 2
print("Training the KMeans model with classification embeddings")
classification_kmeans, classification_feature_array, classification_cluster_majorities = k_mean_cluster(training_data, EMBEDDING_CLASSIFICATION, 'text_type', n_clusters, algorithm="lloyd")

Training the KMeans model with classification embeddings
Feature Array Shape: (8000, 256)
Classification Report:
              precision    recall  f1-score   support

          AI     0.9853    0.5377    0.6958      4000
       HUMAN     0.6821    0.9920    0.8084      4000

    accuracy                         0.7649      8000
   macro avg     0.8337    0.7649    0.7521      8000
weighted avg     0.8337    0.7649    0.7521      8000



In [9]:
# print the vote dictionary for the first text data
print(training_data[0].votes)
print(training_data[0].text_type)

{'KMeans2_embedding_classification': 0}
0


In [10]:
from src.API.machine_learning import plot_latest_result, plot_actual_label, plot_actual_label_3d
%matplotlib notebook
# Print update
print("Visualizing the result")
# Visualize the result
plot_latest_result(training_data, 'KMeans2_embedding_classification', 'embedding_classification')
plot_actual_label(training_data, 'embedding_classification')
plot_actual_label_3d(training_data, 'embedding_classification')

Visualizing the result


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
print(classification_cluster_majorities)

{np.int32(0): 0, np.int32(1): 1}


In [12]:
from src.API.machine_learning import generate_classification_report


def calculate_accuracy_and_generate_report(data_list):
    """
    Calculate accuracy based on text_data votes and generate a classification report.
    
    :param data_list: List of text_data objects. Each must have a `classify()` method and a `text_type` attribute.
    """
    if data_list is None or len(data_list) == 0:
        print("The data list is empty")
        return

    true_labels = []
    predicted_labels = []
    correct = 0

    for text_data in data_list:
        pred = text_data.classify()
        true = text_data.text_type
        predicted_labels.append(pred)
        true_labels.append(true)
        if pred == true:
            correct += 1

    accuracy = correct / len(data_list)
    print(f"Accuracy: {accuracy}")

    generate_classification_report(true_labels, predicted_labels, target_names=["AI", "HUMAN"])


calculate_accuracy_and_generate_report(training_data)

Accuracy: 0.764875
Classification Report:
              precision    recall  f1-score   support

          AI     0.9853    0.5377    0.6958      4000
       HUMAN     0.6821    0.9920    0.8084      4000

    accuracy                         0.7649      8000
   macro avg     0.8337    0.7649    0.7521      8000
weighted avg     0.8337    0.7649    0.7521      8000



In [13]:
from src.API.machine_learning import k_mean_cluster_test

# Test the model
print("Testing the model")

result_array, cluster_label = k_mean_cluster_test(classification_kmeans, test_data, EMBEDDING_CLASSIFICATION, 'text_type', classification_cluster_majorities)
#result_array, cluster_label = k_mean_cluster_test(small_kmeans, test_data, EMBEDDING_SMALL, 'text_type', small_cluster_majorities)

# Visualize the result
plot_latest_result(test_data,'KMeans2_embedding_classification', EMBEDDING_CLASSIFICATION)
plot_actual_label(test_data, EMBEDDING_CLASSIFICATION)
plot_actual_label_3d(test_data, EMBEDDING_CLASSIFICATION)
# Calculate the accuracy using the vote
calculate_accuracy_and_generate_report(test_data)

Testing the model
Classification Report:
              precision    recall  f1-score   support

          AI     0.9765    0.5400    0.6954      1000
       HUMAN     0.6821    0.9870    0.8067      1000

    accuracy                         0.7635      2000
   macro avg     0.8293    0.7635    0.7511      2000
weighted avg     0.8293    0.7635    0.7511      2000



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Accuracy: 0.7635
Classification Report:
              precision    recall  f1-score   support

          AI     0.9765    0.5400    0.6954      1000
       HUMAN     0.6821    0.9870    0.8067      1000

    accuracy                         0.7635      2000
   macro avg     0.8293    0.7635    0.7511      2000
weighted avg     0.8293    0.7635    0.7511      2000



In [14]:
# Save the model 
from src.API.machine_learning import save_kmeans_model
save_kmeans_model(classification_kmeans, EMBEDDING_CLASSIFICATION, classification_cluster_majorities, "kmean.pkl")

Model saved to kmean.pkl


# **Hierarchical Clustering**

In [15]:
from src.API.machine_learning import hierarchical_cluster, hierarchical_cluster_test

n_clusters = 4
distance_threshold = 0.75
# Train the Hierarchical model
print("Training the Hierarchical model")
hierarchical_model, hierarchical_feature_array, hierarchical_cluster_majorities, hierarchical_centroids = hierarchical_cluster(training_data, EMBEDDING_CLASSIFICATION, 'text_type', n_clusters, distance_threshold=distance_threshold)

# Test the model
print("Testing the Hierarchical model")
result_array, cluster_label = hierarchical_cluster_test(hierarchical_model, test_data, EMBEDDING_CLASSIFICATION, 'text_type', hierarchical_cluster_majorities, hierarchical_centroids)

# Calculate the accuracy using the vote
calculate_accuracy_and_generate_report(test_data)
print(test_data[0].votes)

Training the Hierarchical model
Feature Array Shape: (8000, 256)
Classification Report:
              precision    recall  f1-score   support

          AI     0.6822    0.5765    0.6249      4000
       HUMAN     0.6333    0.7315    0.6789      4000

    accuracy                         0.6540      8000
   macro avg     0.6578    0.6540    0.6519      8000
weighted avg     0.6578    0.6540    0.6519      8000

Testing the Hierarchical model
Classification Report:
              precision    recall  f1-score   support

          AI     0.6846    0.5730    0.6238      1000
       HUMAN     0.6328    0.7360    0.6805      1000

    accuracy                         0.6545      2000
   macro avg     0.6587    0.6545    0.6522      2000
weighted avg     0.6587    0.6545    0.6522      2000

Accuracy: 0.7635
Classification Report:
              precision    recall  f1-score   support

          AI     0.9765    0.5400    0.6954      1000
       HUMAN     0.6821    0.9870    0.8067      1000



In [16]:
# Save the hierarchical model 
from src.API.machine_learning import save_hierarchical_model
save_hierarchical_model(hierarchical_model, EMBEDDING_CLASSIFICATION, hierarchical_cluster_majorities, hierarchical_centroids, "hierarchical.pkl")

Model saved to hierarchical.pkl


# **Auto-Encoder**

In [17]:
# Create the dataloader
from src.API.machine_learning import create_dataloader
field = EMBEDDING_CLASSIFICATION #94% accuracy
# field = EMBEDDING_CLUSTERS 89% accuracy
human_dataloader = create_dataloader(human_train, field, 64)
AI_dataloader = create_dataloader(AI_train, field, 64)

In [18]:
# Initialize the model
# fetch the field size using the field variable
input_dim = len(getattr(human_texts[0], field))  # Size of the embeddings
print(input_dim)
latent_dim = 128  # Dimensionality of the latent space

# Create 2 encoders
from src.API.machine_learning import create_autoencoder
h_encoder, h_criterion, h_optimizer = create_autoencoder(input_dim, latent_dim, 0.001)
AI_encoder, AI_criterion, AI_optimizer = create_autoencoder(input_dim, latent_dim, 0.001)

256


In [19]:
# Train human encoder
from src.API.machine_learning import train_autoencoder
print("Training human encoder")
train_autoencoder(h_encoder, h_criterion, h_optimizer, human_dataloader, 20)

# Train AI encoder
print("Training AI encoder")
train_autoencoder(AI_encoder, AI_criterion, AI_optimizer, AI_dataloader, 20)

Training human encoder
Epoch 1, Loss: 0.015925511805107817
Epoch 2, Loss: 0.009860814490821213
Epoch 3, Loss: 0.007939218099636491
Epoch 4, Loss: 0.006718989978253376
Epoch 5, Loss: 0.005820985941682011
Epoch 6, Loss: 0.005142352463735733
Epoch 7, Loss: 0.004616583311872091
Epoch 8, Loss: 0.004223716343403794
Epoch 9, Loss: 0.003876814243994886
Epoch 10, Loss: 0.0035735026067413855
Epoch 11, Loss: 0.0033268861661781557
Epoch 12, Loss: 0.0031198731776385102
Epoch 13, Loss: 0.002930946371634491
Epoch 14, Loss: 0.002784107422485249
Epoch 15, Loss: 0.002633734002301935
Epoch 16, Loss: 0.0024982943941722624
Epoch 17, Loss: 0.002391658774286043
Epoch 18, Loss: 0.00229583251348231
Epoch 19, Loss: 0.002187485915783327
Epoch 20, Loss: 0.0021131530120328534
Training AI encoder
Epoch 1, Loss: 0.020885820777039044
Epoch 2, Loss: 0.012495819901232608
Epoch 3, Loss: 0.009891793728456832
Epoch 4, Loss: 0.008231364168750588
Epoch 5, Loss: 0.007076433619658928
Epoch 6, Loss: 0.006224478274816647
Epoch 

In [20]:
import numpy as np
# Test the encoder
encoder_test_set = np.concatenate((human_test, AI_test))

# Create a list x that contains the embeddings and a list y that contains the labels
x = []
y = []
for text_data in encoder_test_set:
    x.append(getattr(text_data, field))
    y.append(text_data.text_type)

In [21]:
import torch
# Convert the test data to tensor
X_test_tensor = torch.tensor(x, dtype=torch.float32)

In [22]:
# Evaluate each test embedding
method_name = "Auto-Encoder"+"_"+field
predictions = []
with torch.no_grad():
    for idx, embedding in enumerate(X_test_tensor):
        # Get reconstruction errors
        reconstructed_human = h_encoder(embedding)
        error_human = h_criterion(reconstructed_human, embedding).item()
        
        reconstructed_ai = AI_encoder(embedding)
        error_ai = AI_criterion(reconstructed_ai, embedding).item()
        
        # Classify based on reconstruction error
        if error_human < error_ai:
            classification = HUMAN  # Classified as Human-written
            predictions.append(HUMAN)
        else:
            classification = AI  # Classified as AI-generated
            predictions.append(AI)
        # Add the vote
        encoder_test_set[idx].add_vote(method_name, classification)

In [23]:
from sklearn.metrics import classification_report

# Calculate the accuracy using the vote
print("Vote Accuracy\n")
calculate_accuracy_and_generate_report(encoder_test_set)
# Convert predictions to a NumPy array
predictions = np.array(predictions)

# Print evaluation metrics
print("Auto_encoder Accuracy\n")
print(classification_report(y, predictions, target_names=["Human", "AI"]))

Vote Accuracy

Accuracy: 0.7635
Classification Report:
              precision    recall  f1-score   support

          AI     0.9800    0.5380    0.6946      1000
       HUMAN     0.6816    0.9890    0.8070      1000

    accuracy                         0.7635      2000
   macro avg     0.8308    0.7635    0.7508      2000
weighted avg     0.8308    0.7635    0.7508      2000

Auto_encoder Accuracy

              precision    recall  f1-score   support

       Human       0.71      0.96      0.82      1000
          AI       0.94      0.61      0.74      1000

    accuracy                           0.78      2000
   macro avg       0.82      0.78      0.78      2000
weighted avg       0.82      0.78      0.78      2000



In [24]:
# Save Auto-Encoders
from src.API.machine_learning import save_autoencoders
save_autoencoders(h_encoder, AI_encoder, h_optimizer, AI_optimizer, input_dim, latent_dim, filename="autoencoders.pth")

Autoencoders and metadata saved to autoencoders.pth


In [25]:
# Visualize the result
print("method_name: "+method_name+ " field: "+field)
plot_latest_result(encoder_test_set, method_name, field)
plot_actual_label(encoder_test_set, field)

method_name: Auto-Encoder_embedding_classification field: embedding_classification


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
def is_wrongly_classified(text_data):
    return text_data.classify() != text_data.text_type

In [27]:
inaccurate = 0
for text_data in encoder_test_set:
    if is_wrongly_classified(text_data):
        print('*'*50)
        inaccurate += 1
        print(text_data.id)
        print(text_data.text_type)
        print(text_data.votes)
        print('*'*50)
print(inaccurate)

**************************************************
CS297/dataset/reuter+50+50/C50train/SarahDavison/386644newsML.txt
1
{'KMeans2_embedding_classification': 0, 'Agglomerate231_embedding_classification': 0, 'Auto-Encoder_embedding_classification': 0}
**************************************************
**************************************************
CS297/dataset/reuter+50+50/C50train/HeatherScoffield/298748newsML.txt
1
{'KMeans2_embedding_classification': 0, 'Agglomerate231_embedding_classification': 0, 'Auto-Encoder_embedding_classification': 0}
**************************************************
**************************************************
CS297/dataset/reuter+50+50/C50test/KevinMorrison/31244newsML.txt
1
{'KMeans2_embedding_classification': 0, 'Agglomerate231_embedding_classification': 1, 'Auto-Encoder_embedding_classification': 0}
**************************************************
**************************************************
CS297/dataset/reuter+50+50/C50test/LynnleyBrow

**Simple POS Transition Matrix**

In [28]:
%load_ext autoreload
%autoreload 2

In [29]:
from src.API.machine_learning import compute_transition_matrix, evaluate_sequence

# Create mappings from pos_tags (as tuples) to text_data objects
human_pos_to_text_data = {tuple(text_data.pos_tags): text_data for text_data in human_train + human_test}
AI_pos_to_text_data = {tuple(text_data.pos_tags): text_data for text_data in AI_train + AI_test}


# fetch the attribute pos_tags for all the text_data objects
human_train_pos = [text_data.pos_tags for text_data in human_train]
AI_train_pos = [text_data.pos_tags for text_data in AI_train]
human_test_pos = [text_data.pos_tags for text_data in human_test]
AI_test_pos = [text_data.pos_tags for text_data in AI_test]

# Compute the transition matrix
human_transition_matrix = compute_transition_matrix(human_train_pos)
AI_transition_matrix = compute_transition_matrix(AI_train_pos)

# Evaluate the test sequences using the transition matrices
human_normalized_log_likelihoods = [evaluate_sequence(seq, human_transition_matrix) for seq in human_test_pos]
human_Ai_normalized_log_likelihoods = [evaluate_sequence(seq, AI_transition_matrix) for seq in human_test_pos]
AI_normalized_log_likelihoods = [evaluate_sequence(seq, AI_transition_matrix) for seq in AI_test_pos]
Ai_human_normalized_log_likelihoods = [evaluate_sequence(seq, human_transition_matrix) for seq in AI_test_pos]


In [30]:
# Compare human test sequences
human_comparisons = [
    (human_ll, ai_ll, HUMAN if human_ll > ai_ll else AI)
    for human_ll, ai_ll in zip(human_normalized_log_likelihoods, human_Ai_normalized_log_likelihoods)
]

# Compare AI test sequences
ai_comparisons = [
    (ai_ll, human_ll, AI if ai_ll > human_ll else HUMAN)
    for ai_ll, human_ll in zip(AI_normalized_log_likelihoods, Ai_human_normalized_log_likelihoods)
]

In [31]:
# Assign votes to human test text data
method_name = "Simple_POS_Transition_Matrix"
for pos_tags, (human_ll, ai_ll, vote) in zip(human_test_pos, human_comparisons):
    text_data = human_pos_to_text_data[tuple(pos_tags)]
    text_data.add_vote(method_name, vote)

# Assign votes to AI test text data
for pos_tags, (ai_ll, human_ll, vote) in zip(AI_test_pos, ai_comparisons):
    text_data = AI_pos_to_text_data[tuple(pos_tags)]
    text_data.add_vote(method_name, vote)


In [32]:
inaccurate = 0
for text_data in encoder_test_set:
    if is_wrongly_classified(text_data):
        print('*'*50)
        inaccurate += 1
        print(text_data.id)
        print(text_data.votes)
print("Inaccurate = " + str(inaccurate))
# Calculate the accuracy using the vote
calculate_accuracy_and_generate_report(encoder_test_set)

**************************************************
CS297/dataset/reuter+50+50/C50train/SarahDavison/386644newsML.txt
{'KMeans2_embedding_classification': 0, 'Agglomerate231_embedding_classification': 0, 'Auto-Encoder_embedding_classification': 0, 'Simple_POS_Transition_Matrix': 1}
**************************************************
CS297/dataset/reuter+50+50/C50train/HeatherScoffield/298748newsML.txt
{'KMeans2_embedding_classification': 0, 'Agglomerate231_embedding_classification': 0, 'Auto-Encoder_embedding_classification': 0}
**************************************************
CS297/dataset/reuter+50+50/C50train/SarahDavison/428346newsML.txt
{'KMeans2_embedding_classification': 1, 'Agglomerate231_embedding_classification': 0, 'Auto-Encoder_embedding_classification': 0, 'Simple_POS_Transition_Matrix': 0}
**************************************************
CS297/dataset/reuter+50+50/C50train/FumikoFujisaki/208614newsML.txt
{'KMeans2_embedding_classification': 1, 'Agglomerate231_embedding_

In [33]:
# Calculate the accuracy of the predictions making sure to use float division
human_accuracy = sum(1.0 for _, _, predicted in human_comparisons if predicted == HUMAN) / len(human_comparisons)
ai_accuracy = sum(1.0 for _, _, predicted in ai_comparisons if predicted == AI) / len(ai_comparisons)

# Calculate the number of times human is predicted as AI and vice versa
human_accuracy_wrong = sum(1 for _, _, predicted in human_comparisons if predicted == AI)
ai_accuracy_wrong = sum(1 for _, _, predicted in ai_comparisons if predicted == HUMAN)

In [34]:
# Print the results
print("Human accuracy:", human_accuracy)
print("AI accuracy:", ai_accuracy)
print("Number of time Human is predicted as AI:", human_accuracy_wrong)
print("Number of time AI is predicted as Human:", ai_accuracy_wrong)

Human accuracy: 0.991
AI accuracy: 0.874
Number of time Human is predicted as AI: 9
Number of time AI is predicted as Human: 126


In [35]:
# Compute the confusion matrix components
TP_human = sum(1 for _, _, predicted in human_comparisons if predicted == HUMAN)  # Correctly predicted as Human
FN_human = sum(1 for _, _, predicted in human_comparisons if predicted == AI)     # Incorrectly predicted as AI

TP_ai = sum(1 for _, _, predicted in ai_comparisons if predicted == AI)          # Correctly predicted as AI
FN_ai = sum(1 for _, _, predicted in ai_comparisons if predicted == HUMAN)       # Incorrectly predicted as Human

FP_human = FN_ai  # AI misclassified as Human
FP_ai = FN_human  # Human misclassified as AI

# Calculate Precision
precision_human = TP_human / (TP_human + FP_human) if (TP_human + FP_human) > 0 else 0
precision_ai = TP_ai / (TP_ai + FP_ai) if (TP_ai + FP_ai) > 0 else 0

# Calculate Recall
recall_human = TP_human / (TP_human + FN_human) if (TP_human + FN_human) > 0 else 0
recall_ai = TP_ai / (TP_ai + FN_ai) if (TP_ai + FN_ai) > 0 else 0

# Calculate F1-score
f1_human = 2 * (precision_human * recall_human) / (precision_human + recall_human) if (precision_human + recall_human) > 0 else 0
f1_ai = 2 * (precision_ai * recall_ai) / (precision_ai + recall_ai) if (precision_ai + recall_ai) > 0 else 0

# Calculate Overall Accuracy
total_samples = len(human_comparisons) + len(ai_comparisons)
accuracy = (TP_human + TP_ai) / total_samples if total_samples > 0 else 0

# Display results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (Human): {precision_human:.4f}")
print(f"Recall (Human): {recall_human:.4f}")
print(f"F1-score (Human): {f1_human:.4f}")
print(f"Precision (AI): {precision_ai:.4f}")
print(f"Recall (AI): {recall_ai:.4f}")
print(f"F1-score (AI): {f1_ai:.4f}")

Accuracy: 0.9325
Precision (Human): 0.8872
Recall (Human): 0.9910
F1-score (Human): 0.9362
Precision (AI): 0.9898
Recall (AI): 0.8740
F1-score (AI): 0.9283


In [36]:
# Save POS Matrix
from src.API.machine_learning import save_transition_matrix
save_transition_matrix(human_transition_matrix, "human_matrix.pkl")
save_transition_matrix(AI_transition_matrix, "AI_matrix.pkl")

Transition matrix saved to human_matrix.pkl
Transition matrix saved to AI_matrix.pkl


In [37]:
calculate_accuracy_and_generate_report(test_data)

Accuracy: 0.924
Classification Report:
              precision    recall  f1-score   support

          AI     0.9896    0.8570    0.9185      1000
       HUMAN     0.8739    0.9910    0.9288      1000

    accuracy                         0.9240      2000
   macro avg     0.9318    0.9240    0.9237      2000
weighted avg     0.9318    0.9240    0.9237      2000

