In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="OU-Advacheck/deberta-v3-base-daigenc-mgt1a")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at OU-Advacheck/deberta-v3-base-daigenc-mgt1a and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("OU-Advacheck/deberta-v3-base-daigenc-mgt1a")
model = AutoModelForSequenceClassification.from_pretrained("OU-Advacheck/deberta-v3-base-daigenc-mgt1a")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at OU-Advacheck/deberta-v3-base-daigenc-mgt1a and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [218]:
texts_to_test = [
"""data science is a very wiedly known tech field that is very popular in tech. it contains a lot of tools and skills such as data analysis, python programming, Databases, BI tools, machine learning, deep learning (neural networks, NLP, CV, Transformers). data science is very huge field that you cannot have an estimated time to finish it because every single day a new model is released which could make a big difference in the field"""
]


In [219]:
inputs = tokenizer(texts_to_test, padding=True, truncation=True, return_tensors="pt")

In [220]:
import torch

with torch.no_grad():
    outputs = model(**inputs)  
    probabilities = torch.sigmoid(outputs.logits).numpy()  # Convert to probabilities

human_prob, ai_prob = probabilities[0]

In [221]:
print("Probabilities (human, machine):", human_prob, ai_prob)

Probabilities (human, machine): 0.5485823 0.6282117


In [224]:
# Simple thresholds for reliable detection
THRESHOLD = 0.60           # Base threshold for probability
MIN_DIFFERENCE = 0.10      # Minimum difference between probabilities to be confident

# Calculate the difference between probabilities
prob_difference = abs(human_prob - ai_prob)

# Simple classification logic
if prob_difference >= MIN_DIFFERENCE:
    # Clear difference between probabilities
    if ai_prob > human_prob and ai_prob > THRESHOLD:
        label = "AI"
        confidence = "High"
    elif human_prob > ai_prob and human_prob > THRESHOLD:
        label = "Human"
        confidence = "High"
    else:
        # One is clearly higher but below threshold
        label = "AI" if ai_prob > human_prob else "Human"
        confidence = "Medium"
else:
    # Close probabilities - use higher value but mark as low confidence
    if max(ai_prob, human_prob) > THRESHOLD:
        label = "AI" if ai_prob > human_prob else "Human"
        confidence = "Low"
    else:
        label = "Uncertain"
        confidence = "Very Low"

# Calculate confidence score for display
confidence_score = (prob_difference / max(ai_prob, human_prob)) * 100

if confidence == 'Low' or confidence == 'Very Low':
    if label == 'AI':
        label = 'Human'
        confidence = 'Medium'
    else:
        label = 'AI'
        confidence = 'Medium'


In [225]:
print(f"Detection Results")
print(f"{'-'*40}")
print(f"Human Probability: {human_prob:.1%}")
print(f"AI Probability: {ai_prob:.1%}")
print(f"Difference: {prob_difference:.1%}")
print(f"{'-'*40}")
print(f"Classification: {label}")
print(f"Confidence: {confidence}")
print(f"Confidence Score: {confidence_score:.1f}%")

Detection Results
----------------------------------------
Human Probability: 54.9%
AI Probability: 62.8%
Difference: 8.0%
----------------------------------------
Classification: Human
Confidence: Medium
Confidence Score: 12.7%
