<a href="https://colab.research.google.com/github/swagatpati8/AI-detector-Experiment/blob/main/AI_detector_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Load Data
train_df = pd.read_csv('/content/drive/MyDrive/Training_data.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Test_data.csv')

# P-Hacked Heuristic: Counts occurrences of the word 'the'
def count_the(text):
    return text.lower().split().count('the')

# Apply heuristic to both datasets
train_df['The_Score'] = train_df['Document'].apply(count_the)
test_df['The_Score'] = test_df['Document'].apply(count_the)

# Function to Extract Scores for Human and AI
def get_scores(df, score_column):
    human_scores = df[df['Human OR AI'] == 'Human'][score_column]
    ai_scores = df[df['Human OR AI'] == 'AI'][score_column]
    return human_scores, ai_scores

# Extract scores
train_human, train_ai = get_scores(train_df, 'The_Score')
test_human, test_ai = get_scores(test_df, 'The_Score')

# Statistical Significance Tests (t-test)
train_t_stat, train_p_value = ttest_ind(train_human, train_ai, equal_var=False)
test_t_stat, test_p_value = ttest_ind(test_human, test_ai, equal_var=False)

# Display t-test Results
print("True P-Hacked Algorithm Evaluation (Word 'the')")
print(f"Training Data: t-stat={train_t_stat:.4f}, p-value={train_p_value:.4f}")
print(f"Test Data: t-stat={test_t_stat:.4f}, p-value={test_p_value:.4f}")

# Determine Threshold: Midpoint Between Human and AI Means
threshold_the_train = (train_human.mean() + train_ai.mean()) / 2
print(f"\nThreshold for 'the' (from training): {threshold_the_train:.4f}")

# Confusion Matrix Calculation Function (at 50% prevalence initially)
def build_confusion_matrix(df, score_column, threshold):
    df['Predicted AI'] = df[score_column] > threshold
    tp = np.sum((df['Predicted AI'] == True) & (df['Human OR AI'] == 'AI'))
    tn = np.sum((df['Predicted AI'] == False) & (df['Human OR AI'] == 'Human'))
    fp = np.sum((df['Predicted AI'] == True) & (df['Human OR AI'] == 'Human'))
    fn = np.sum((df['Predicted AI'] == False) & (df['Human OR AI'] == 'AI'))
    return {'TP': tp, 'TN': tn, 'FP': fp, 'FN': fn}

train_conf_matrix = build_confusion_matrix(train_df, 'The_Score', threshold_the_train)
test_conf_matrix = build_confusion_matrix(test_df, 'The_Score', threshold_the_train)

print("\nConfusion Matrix on Training Data (50% prevalence):", train_conf_matrix)
print("Confusion Matrix on Test Data (50% prevalence):", test_conf_matrix)

# Now run the prevalence-based confusion matrix code
def build_confusion_matrix_prevalence(df, score_column, threshold, ai_prevalence):
    num_total = len(df)
    num_ai = int(num_total * ai_prevalence)
    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)
    simulated_labels = ['AI'] * num_ai + ['Human'] * (num_total - num_ai)
    simulated_df = df_shuffled.copy()
    simulated_df['Simulated_Label'] = simulated_labels[:num_total]
    simulated_df['Predicted AI'] = simulated_df[score_column] > threshold

    tp = np.sum((simulated_df['Predicted AI'] == True) & (simulated_df['Simulated_Label'] == 'AI'))
    tn = np.sum((simulated_df['Predicted AI'] == False) & (simulated_df['Simulated_Label'] == 'Human'))
    fp = np.sum((simulated_df['Predicted AI'] == True) & (simulated_df['Simulated_Label'] == 'Human'))
    fn = np.sum((simulated_df['Predicted AI'] == False) & (simulated_df['Simulated_Label'] == 'AI'))

    return {'TP': tp, 'TN': tn, 'FP': fp, 'FN': fn}

prevalences = [0.5, 0.25, 0.06125]
prevalence_names = ["50%", "25%", "6.125%"]

print("\nConfusion Matrices for Training Data ('the' heuristic) at different prevalences:")
for prev, name in zip(prevalences, prevalence_names):
    cm_train = build_confusion_matrix_prevalence(train_df.copy(), 'The_Score', threshold_the_train, prev)
    print(f"Prevalence ({name}): {cm_train}")

print("\nConfusion Matrices for Test Data ('the' heuristic) at different prevalences:")
for prev, name in zip(prevalences, prevalence_names):
    cm_test = build_confusion_matrix_prevalence(test_df.copy(), 'The_Score', threshold_the_train, prev)
    print(f"Prevalence ({name}): {cm_test}")

True P-Hacked Algorithm Evaluation (Word 'the')
Training Data: t-stat=1.4904, p-value=0.1647
Test Data: t-stat=0.5382, p-value=0.6172

Threshold for 'the' (from training): 10.6875

Confusion Matrix on Training Data (50% prevalence): {'TP': np.int64(4), 'TN': np.int64(2), 'FP': np.int64(6), 'FN': np.int64(4)}
Confusion Matrix on Test Data (50% prevalence): {'TP': np.int64(2), 'TN': np.int64(2), 'FP': np.int64(2), 'FN': np.int64(2)}

Confusion Matrices for Training Data ('the' heuristic) at different prevalences:
Prevalence (50%): {'TP': np.int64(5), 'TN': np.int64(3), 'FP': np.int64(5), 'FN': np.int64(3)}
Prevalence (25%): {'TP': np.int64(2), 'TN': np.int64(4), 'FP': np.int64(8), 'FN': np.int64(2)}
Prevalence (6.125%): {'TP': np.int64(0), 'TN': np.int64(6), 'FP': np.int64(10), 'FN': np.int64(0)}

Confusion Matrices for Test Data ('the' heuristic) at different prevalences:
Prevalence (50%): {'TP': np.int64(3), 'TN': np.int64(3), 'FP': np.int64(1), 'FN': np.int64(1)}
Prevalence (25%): {'T

In [20]:
import numpy as np
from scipy.stats import ttest_ind
import pandas as pd

# Create the alternating labels and ZeroGPT scores manually
labels = ['Human', 'AI'] * 8  # 16 items, alternating
zerogpt_scores = [
    0.00, 1.00,  # 1. Human, 2. AI
    0.00, 1.00,  # 3. Human, 4. AI
    0.00, 0.70,  # 5. Human, 6. AI
    0.00, 1.00,  # 7. Human, 8. AI
    0.00, 0.25,  # 9. Human, 10. AI (FN)
    0.45, 1.00,  # 11. Human (FP), 12. AI
    0.00, 0.21,  # 13. Human, 14. AI (FN)
    0.00, 0.50   # 15. Human, 16. AI
]

# Create DataFrame
df_train = pd.DataFrame({'Label': labels, 'Score': zerogpt_scores})

# Split into human and AI groups
human_scores = df_train[df_train['Label'] == 'Human']['Score']
ai_scores = df_train[df_train['Label'] == 'AI']['Score']

# T-test
t_stat, p_val = ttest_ind(human_scores, ai_scores, equal_var=False)

# Determine threshold (midpoint between means)
threshold = (human_scores.mean() + ai_scores.mean()) / 2

# Confusion matrix at 50% prevalence
df_train['Predicted'] = df_train['Score'] > threshold
tp = np.sum((df_train['Predicted'] == True) & (df_train['Label'] == 'AI'))
tn = np.sum((df_train['Predicted'] == False) & (df_train['Label'] == 'Human'))
fp = np.sum((df_train['Predicted'] == True) & (df_train['Label'] == 'Human'))
fn = np.sum((df_train['Predicted'] == False) & (df_train['Label'] == 'AI'))

conf_matrix = {'TP': tp, 'TN': tn, 'FP': fp, 'FN': fn}

print(f"T-statistic (Training): {t_stat:.4f}")
print(f"P-value (Training): {p_val:.4f}")
print(f"Threshold (Training): {threshold:.4f}")
print("Confusion Matrix (Training):", conf_matrix)

T-statistic (Training): -4.8271
P-value (Training): 0.0007
Threshold (Training): 0.3819
Confusion Matrix (Training): {'TP': np.int64(6), 'TN': np.int64(7), 'FP': np.int64(1), 'FN': np.int64(2)}


In [11]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Manually input the ZeroGPT scores for the test data
labels_test = ['Human', 'AI', 'Human', 'AI', 'Human', 'AI', 'Human', 'AI']  # Alternating labels
zerogpt_scores_test = [
    0.00, 78.53,  # Human, AI
    0.00, 27.00,  # Human, AI (Weird case)
    0.00, 100.00,  # Human, AI
    0.00, 100.00   # Human, AI
]

# Create a DataFrame for the test data
df_test = pd.DataFrame({'Label': labels_test, 'Score': zerogpt_scores_test})

# Split into human and AI groups
human_scores_test = df_test[df_test['Label'] == 'Human']['Score']
ai_scores_test = df_test[df_test['Label'] == 'AI']['Score']


# T-test to check if there’s a statistically significant difference between Human and AI scores
t_stat_test, p_val_test = ttest_ind(human_scores_test, ai_scores_test, equal_var=False)

print(f"T-statistic (Test): {t_stat_test:.4f}")
print(f"P-value (Test): {p_val_test:.4f}")


# Calculate the threshold based on the mean of both Human and AI scores
threshold_test = (human_scores_test.mean() + ai_scores_test.mean()) / 2

print(f"Threshold (Test): {threshold_test:.4f}")


# Apply threshold to predict whether AI or Human
df_test['Predicted'] = df_test['Score'] > threshold_test

# Confusion matrix
tp_test = np.sum((df_test['Predicted'] == True) & (df_test['Label'] == 'AI'))
tn_test = np.sum((df_test['Predicted'] == False) & (df_test['Label'] == 'Human'))
fp_test = np.sum((df_test['Predicted'] == True) & (df_test['Label'] == 'Human'))
fn_test = np.sum((df_test['Predicted'] == False) & (df_test['Label'] == 'AI'))

conf_matrix_test = {'TP': tp_test, 'TN': tn_test, 'FP': fp_test, 'FN': fn_test}

print("Confusion Matrix (Test):", conf_matrix_test)

T-statistic (Test): -4.4354
P-value (Test): 0.0213
Threshold (Test): 38.1912
Confusion Matrix (Test): {'TP': np.int64(3), 'TN': np.int64(4), 'FP': np.int64(0), 'FN': np.int64(1)}
