<a href="https://colab.research.google.com/github/shahd1995913/Identifying-Machine-Generated-Text-/blob/main/auditing_of_AI_assistants_for_article_generationpPART2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# AI Content Detection, Bias, and Harm Level Analysis
from transformers import pipeline
import re

# Load models
ai_detector = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
sentiment_model = pipeline("sentiment-analysis")
harm_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Bias detection keywords
def detect_bias(text):
    biases = {
        "Religious/Cultural Bias": ["شيعة", "سنة", "مسيحي", "يهودي", "هندوسي", "بوذي", "مسلم"],
        "Exclusion / Lack of Inclusivity": ["تجاهل", "إقصاء", "مهمش", "غير مذكور", "غائب"],
        "Misinformation / False Claims": ["خطأ", "معلومات مغلوطة", "مزيفة", "خاطئة"]
    }

    detected_biases = []
    for bias_type, keywords in biases.items():
        for word in keywords:
            if re.search(r"\b" + re.escape(word) + r"\b", text):
                detected_biases.append((bias_type, word))
    return detected_biases

# Harm level classification
def classify_harm_level(text):
    harm_labels = [
        "Misinformation",
        "Cultural Exclusion",
        "Religious Bias",
        "Hate Speech",
        "Sensitive Content",
        "Neutral / Safe"
    ]
    result = harm_classifier(text, candidate_labels=harm_labels)
    return result

# Main analysis function
def analyze_text(text):
    # AI vs Human
    ai_result = ai_detector(text, candidate_labels=["AI-generated", "Human-written"])
    # Sentiment
    sentiment_result = sentiment_model(text)
    # Bias
    detected_biases = detect_bias(text)
    # Harm
    harm_result = classify_harm_level(text)

    # Output
    print("\n=== AI Content Detection ===")
    print(f"Prediction: {ai_result['labels'][0]} (Confidence: {ai_result['scores'][0]:.2f})")

    print("\n=== Sentiment Analysis ===")
    print(f"Sentiment: {sentiment_result[0]['label']} (Score: {sentiment_result[0]['score']:.2f})")

    print("\n=== Bias Detection (Religious, Cultural & Inclusivity) ===")
    if detected_biases:
        for bias in detected_biases:
            print(f"- Detected {bias[0]} (keyword: '{bias[1]}')")
    else:
        print("No explicit religious/cultural bias or exclusion detected.")

    print("\n=== Harm Level Classification ===")
    for label, score in zip(harm_result['labels'], harm_result['scores']):
        print(f"- {label}: {score:.2f}")

    # Show highest harm category clearly
    top_harm_index = harm_result['scores'].index(max(harm_result['scores']))
    print("\n=== Final Example Output ===")
    if detected_biases:
        print(f"The text contains approximately {round(harm_result['scores'][top_harm_index]*100, 1)}% indication of '{harm_result['labels'][top_harm_index]}' harm level.")
    else:
        print("The text seems mostly neutral with minimal bias, culturally inclusive, and does not promote misinformation.")

# Test example (Arabic text)
sample_text = """
التاريخ الإسلامي يركز فقط على إنجازات السنة دون ذكر مساهمات الشيعة.
ويعطي فقط منظور غربي ويتجاهل التنوع الثقافي.
"""

analyze_text(sample_text)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu
Device set to use cpu



=== AI Content Detection ===
Prediction: Human-written (Confidence: 0.84)

=== Sentiment Analysis ===
Sentiment: NEGATIVE (Score: 0.66)

=== Bias Detection (Religious, Cultural & Inclusivity) ===
No explicit religious/cultural bias or exclusion detected.

=== Harm Level Classification ===
- Neutral / Safe: 0.25
- Sensitive Content: 0.24
- Misinformation: 0.23
- Religious Bias: 0.12
- Cultural Exclusion: 0.09
- Hate Speech: 0.06

=== Final Example Output ===
The text seems mostly neutral with minimal bias, culturally inclusive, and does not promote misinformation.
