In [4]:
from transformers import pipeline

pipe = pipeline("text-classification", model="OU-Advacheck/deberta-v3-base-daigenc-mgt1a")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at OU-Advacheck/deberta-v3-base-daigenc-mgt1a and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("OU-Advacheck/deberta-v3-base-daigenc-mgt1a")
model = AutoModelForSequenceClassification.from_pretrained("OU-Advacheck/deberta-v3-base-daigenc-mgt1a")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at OU-Advacheck/deberta-v3-base-daigenc-mgt1a and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
texts_to_test = [
    """Data science is the process of analyzing and interpreting large amounts of data to discover useful insights. It involves collecting data from various sources, cleaning and organizing it to remove errors or inconsistencies, and then exploring it to identify patterns and trends. This exploration often includes visualizations and statistical summaries. After understanding the data, data scientists create models using techniques from machine learning and statistics to make predictions or decisions. These models are evaluated to ensure they are accurate and reliable. The final insights are usually communicated to stakeholders through reports or dashboards, helping guide business strategies or automate systems.
"""
]


In [7]:
inputs = tokenizer(texts_to_test, padding=True, truncation=True, return_tensors="pt")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [8]:
import torch
import numpy as np

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.numpy()
    # Apply softmax to get normalized probabilities that sum to 1
    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

human_prob, ai_prob = probabilities[0]
# Convert to percentages
human_percentage = human_prob * 100
ai_percentage = ai_prob * 100

In [9]:
print("Probabilities (human, machine):", human_prob, ai_prob)

Probabilities (human, machine): 0.42224014 0.57775986


In [10]:
# Simple threshold for AI detection
AI_THRESHOLD = 50.0  # threshold for AI classification

# Calculate percentage difference
percentage_diff = abs(ai_percentage - human_percentage)

# Determine confidence level based on difference
if percentage_diff <= 40:
    confidence = "Low"
elif percentage_diff <= 70:
    confidence = "Medium"
else:
    confidence = "High"

# Classification logic
if ai_percentage >= AI_THRESHOLD:
    label = "AI"
else:
    label = "Human"

if confidence == "Low":
    label = "Uncertain but it is likely to be" + " " + label

In [11]:
print(f"Detection Results")
print(f"{'-'*40}")
print(f"Human Probability: {human_percentage:.1f}%")
print(f"AI Probability: {ai_percentage:.1f}%")
print(f"{'-'*40}")
print(f"Classification: {label}")
print(f"Confidence: {confidence}")

Detection Results
----------------------------------------
Human Probability: 42.2%
AI Probability: 57.8%
----------------------------------------
Classification: Uncertain but it is likely to be AI
Confidence: Low
