<a href="https://colab.research.google.com/github/mysertkaya/AI-Generated-Text-Detection/blob/main/AI_Generated_Text_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Probability Model

In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton
!pip install scipy

In [2]:
%%capture
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# Paraphraser

In [3]:
import re
import random
import numpy as np
import itertools
from multiprocessing.pool import ThreadPool
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration

t5_model = transformers.AutoModelForSeq2SeqLM.from_pretrained("t5-large").to("cuda").half()
t5_tokenizer = T5Tokenizer.from_pretrained("t5-large", model_max_length=512)
def apply_extracted_fills(masked_texts, extracted_fills):
    texts = []
    for idx, (text, fills) in enumerate(zip(masked_texts, extracted_fills)):
        tokens = list(re.finditer("<extra_id_\d+>", text))
        if len(fills) < len(tokens):
            continue

        offset = 0
        for fill_idx in range(len(tokens)):
            start, end = tokens[fill_idx].span()
            text = text[:start+offset] + fills[fill_idx] + text[end+offset:]
            offset = offset - (end - start) + len(fills[fill_idx])
        texts.append(text)

    return texts

def unmasker(text, num_of_masks):
    num_of_masks = max(num_of_masks)
    stop_id = t5_tokenizer.encode(f"<extra_id_{num_of_masks}>")[0]
    tokens = t5_tokenizer(text, return_tensors="pt", padding=True)
    for key in tokens:
        tokens[key] = tokens[key].to("cuda")

    output_sequences = t5_model.generate(**tokens, max_length=512, do_sample=True, top_p=0.96, num_return_sequences=1, eos_token_id=stop_id)
    results = t5_tokenizer.batch_decode(output_sequences, skip_special_tokens=False)

    texts = [x.replace("<pad>", "").replace("</s>", "").strip() for x in results]
    pattern = re.compile("<extra_id_\d+>")
    extracted_fills = [pattern.split(x)[1:-1] for x in texts]
    extracted_fills = [[y.strip() for y in x] for x in extracted_fills]

    perturbed_texts = apply_extracted_fills(text, extracted_fills)

    return perturbed_texts

def replaceMask(text, num_of_masks):
    with torch.no_grad():
        list_generated_texts = unmasker(text, num_of_masks)

    return list_generated_texts

def isSame(text1, text2):
    return text1 == text2

def maskRandomWord(text, ratio):
    span = 2
    tokens = text.split(' ')
    mask_string = '<<<mask>>>'

    n_spans = ratio//(span + 2)

    n_masks = 0
    while n_masks < n_spans:
        start = np.random.randint(0, len(tokens) - span)
        end = start + span
        search_start = max(0, start - 1)
        search_end = min(len(tokens), end + 1)
        if mask_string not in tokens[search_start:search_end]:
            tokens[start:end] = [mask_string]
            n_masks += 1

    # replace each occurrence of mask_string with <extra_id_NUM>, where NUM increments
    num_filled = 0
    for idx, token in enumerate(tokens):
        if token == mask_string:
            tokens[idx] = f'<extra_id_{num_filled}>'
            num_filled += 1
    assert num_filled == n_masks, f"num_filled {num_filled} != n_masks {n_masks}"
    text = ' '.join(tokens)
    return text, n_masks

def multiMaskRandomWord(text, ratio, n):
    mask_texts = []
    list_num_of_masks = []
    for i in range(n):
        mask_text, num_of_masks = maskRandomWord(text, ratio)
        mask_texts.append(mask_text)
        list_num_of_masks.append(num_of_masks)
    return mask_texts, list_num_of_masks

def getGeneratedTexts(args):
    original_text = args[0]
    n = args[1]
    texts = list(re.finditer("[^\d\W]+", original_text))
    ratio = int(0.3 * len(texts))

    mask_texts, list_num_of_masks = multiMaskRandomWord(original_text, ratio, n)
    list_generated_sentences = replaceMask(mask_texts, list_num_of_masks)
    return list_generated_sentences

def mask( original_text, n=2, remaining=100):
    """
    text: string representing the sentence
    n: top n mask-filling to be choosen
    remaining: The remaining slots to be fill
    """

    if remaining <= 0:
        return []

    torch.manual_seed(0)
    np.random.seed(0)
    import time

    start_time = time.time()
    out_sentences = []
    pool = ThreadPool(remaining//n)
    out_sentences = pool.map(getGeneratedTexts, [(original_text, n) for _ in range(remaining//n)])
    out_sentences = list(itertools.chain.from_iterable(out_sentences))
    end_time = time.time()

    return out_sentences

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

# Scoring Algorithm

In [4]:
import torch
import torch.nn.functional as F

def calculate_rank_and_probability(model, tokenizer, text):
    final_results = {}
    final_results_raw = {}
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    inputs_len = inputs.input_ids.size(1)

    all_negative_logs = []
    last_end_pos = 0

    for start_pos in range(0, inputs_len, 50):
      end_pos = min(start_pos + 512, inputs_len)
      difference = end_pos - last_end_pos
      new_input_ids = inputs.input_ids[:, start_pos:end_pos].to("cuda")
      target_ids = new_input_ids.clone()
      target_ids[:, :-difference] = -100

      with torch.no_grad():
          outputs = model(
              new_input_ids,
              labels=target_ids,
          )

          negative_log = outputs.loss * difference

      all_negative_logs.append(negative_log)
      last_end_pos = end_pos
      if end_pos == inputs_len:
        break

    return -1* torch.stack(all_negative_logs).sum() / end_pos

def scoreText(text):
  text = " ".join(text.split())

  total_perturbarted_text_n = 50
  perturbated_texts = mask(text, 50, 50)
  probability_list = []
  for perturbated_text in perturbated_texts:
    tp = calculate_rank_and_probability(model, tokenizer, perturbated_text)

    probability_list.append(tp)
  original_prob = calculate_rank_and_probability(model, tokenizer, text)

  probability_list.append(original_prob)

  all_probabilities = 0
  for item in probability_list[:total_perturbarted_text_n]:
    all_probabilities += item

  average_all_probs =  all_probabilities / (len(probability_list)-1)

  average_d = probability_list[-1] - average_all_probs

  all_variance = 0
  for item in probability_list[:total_perturbarted_text_n]:
    all_variance += (item - average_all_probs)**2
  variance = all_variance / (len(probability_list)-2)

  return average_d/torch.sqrt(variance)

from scipy.stats import norm
def evaluate_score(score):
  threshold = 1.25
  return norm.cdf(abs(threshold - score.cpu().item()))*100, score.cpu().item() < threshold


# Run

In [5]:
ai_text="""
How Does AI Detection Work?

AI detection relies on advanced computational methods and algorithms to identify patterns or anomalies associated with AI-generated outputs. Common techniques include:

Feature Analysis: AI-generated content often has subtle differences from human-created content. For example, in text, these differences might include repetitive phrases, unnatural sentence structures, or statistical improbabilities.

Watermarking: Some AI systems embed identifiable markers or metadata into their outputs to signal that the content is AI-generated.

Machine Learning Models: Detection systems often use machine learning to train on datasets of human and AI-generated content, enabling them to predict the source of new inputs.

Contextual Evaluation: Examining the broader context, such as timing, frequency, and content alignment, can also help identify AI involvement.

Why is AI Detection Important?

The ability to detect AI usage has significant implications for various fields:

Academic Integrity: Ensuring students submit original work rather than relying on AI-generated essays or problem-solving.

Media Authenticity: Protecting against misinformation, fake news, and deepfake media, which can undermine trust and democracy.

Creative Ownership: Authenticating original work in art, music, or writing to safeguard intellectual property rights.

Cybersecurity: Detecting AI-powered bots that spread spam, conduct phishing attacks, or manipulate social media platforms.

Regulatory Compliance: Helping organizations adhere to transparency and ethical guidelines regarding AI usage.
"""

In [12]:
human_text = """
You light the cigarette and hold it to your mouth
The heat from burning the cigarette releases nicotine and makes tar (tobacco residue). As you bring the cigarette to your lips, the tar stains your nails. The smoke dries out and inflames your skin, deepening wrinkles. Inhaling smoke through your nose damages nerve endings. Over time, this reduces your sense of smell.

You inhale cigarette smoke through your mouth
When you take a puff from the cigarette, it goes through a filter. This mostly keeps you from breathing in large particles, but tar, nicotine and other chemicals still get through. The tar stains your teeth and coats your gums and tongue. It damages your tooth enamel, puts you at risk for tooth decay and gum disease, and reduces your ability to taste foods you love.
"""

In [13]:
run_result = scoreText(human_text)
percentage, is_human = evaluate_score(run_result)
print("This text is human written with a probability of", percentage, "%") if is_human else print("This text is AI-Generated with a probability of", percentage, "%")

This text is human written with a probability of 87.27728771825633 %


In [None]:
test