In [1]:
from transformers import pipeline
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
print("FAKE NEWS DETECTOR - SENSATIONALISM & RELIABILITY ANALYZER")



FAKE NEWS DETECTOR - SENSATIONALISM & RELIABILITY ANALYZER


In [2]:
# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# Initialize zero-shot classification for more granular detection
zero_shot_pipeline = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)

print("✓ Sentiment Analysis Model loaded")
print("✓ Zero-Shot Classification Model loaded")

# Define sensationalism markers
SENSATIONALISM_MARKERS = {
    'extreme_adjectives': ['shocking', 'stunning', 'unbelievable', 'incredible',
                          'amazing', 'crazy', 'insane', 'mind-blowing', 'explosive'],
    'urgency_words': ['breaking', 'urgent', 'alert', 'emergency', 'crisis',
                      'disaster', 'catastrophe', 'shocking'],
    'emotional_words': ['hate', 'love', 'outrage', 'furious', 'devastated',
                       'heartbroken', 'ecstatic'],
    'conspiracy_markers': ['cover-up', 'conspiracy', 'exposed', 'hidden truth',
                          'they don\'t want you to know', 'shocking revelation'],
    'ALL_CAPS': 'excessive capitalization',
    'multiple_exclamation': 'excessive punctuation'
}

print(f"✓ Sensationalism Markers loaded ({len(SENSATIONALISM_MARKERS)} categories)")

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


✓ Sentiment Analysis Model loaded
✓ Zero-Shot Classification Model loaded
✓ Sensationalism Markers loaded (6 categories)


In [3]:
headlines = [
    # RELIABLE NEWS (neutral, factual)
    "Federal Reserve announces interest rate increase of 0.25%",
    "Company reports Q3 earnings in line with analyst expectations",
    "Climate scientists publish new research on temperature trends",
    "Government approves new infrastructure budget",
    "Stock market closes down 1.2% amid global economic concerns",

    # SENSATIONALIST/CLICKBAIT (emotional, extreme language)
    "You WON'T BELIEVE What This Celebrity Just Did! The Internet is LOSING IT!",
    "SHOCKING: Doctors HATE This One Simple Trick!",
    "BREAKING: Shocking Secret EXPOSED - Click Now Before They Remove It!",
    "This INSANE Celebrity Meltdown Will DESTROY Their Career Forever!",
    "ALERT: URGENT Health Warning That Big Pharma Doesn't Want You to See!!!",

    # FAKE NEWS INDICATORS (conspiracy, misinformation)
    "Government Hiding Truth About UFOs - Leaked Documents Reveal Everything",
    "5G Towers Revealed to be Mind Control Devices by Secret Agents",
    "This Food is Destroying Your Brain - Big Food Companies Don't Want You to Know",
    "Hidden Camera Footage Shows Celebrity in Shocking Incident",
    "Leaked: Politicians Involved in Cover-Up That Could Change Everything",

    # MIXED/BORDERLINE
    "Stock Market Crashes: What You Need to Know",
    "New Study Suggests Coffee May Have Health Benefits",
    "Tech Giant Announces Major Product Launch Next Week",
    "Experts Warn of Potential Economic Downturn",
    "Scientists Discover Unexpected Finding in Latest Research"
]

print(f"Total headlines: {len(headlines)}\n")
for i, headline in enumerate(headlines, 1):
    print(f"{i:2d}. {headline}")

Total headlines: 20

 1. Federal Reserve announces interest rate increase of 0.25%
 2. Company reports Q3 earnings in line with analyst expectations
 3. Climate scientists publish new research on temperature trends
 4. Government approves new infrastructure budget
 5. Stock market closes down 1.2% amid global economic concerns
 6. You WON'T BELIEVE What This Celebrity Just Did! The Internet is LOSING IT!
 7. SHOCKING: Doctors HATE This One Simple Trick!
 8. BREAKING: Shocking Secret EXPOSED - Click Now Before They Remove It!
 9. This INSANE Celebrity Meltdown Will DESTROY Their Career Forever!
11. Government Hiding Truth About UFOs - Leaked Documents Reveal Everything
12. 5G Towers Revealed to be Mind Control Devices by Secret Agents
13. This Food is Destroying Your Brain - Big Food Companies Don't Want You to Know
14. Hidden Camera Footage Shows Celebrity in Shocking Incident
15. Leaked: Politicians Involved in Cover-Up That Could Change Everything
16. Stock Market Crashes: What You N

In [6]:
def count_sensationalism_features(headline):
    """Count sensationalism markers in headline"""
    features = {
        'extreme_adjectives': 0,
        'urgency_words': 0,
        'emotional_words': 0,  # FIXED: was 'emotional_word'
        'conspiracy_markers': 0,
        'all_caps_words': 0,
        'exclamation_marks': 0
    }

    headline_lower = headline.lower()

    # Count extreme adjectives
    for word in SENSATIONALISM_MARKERS['extreme_adjectives']:
        features['extreme_adjectives'] += headline_lower.count(word)

    # Count urgency words
    for word in SENSATIONALISM_MARKERS['urgency_words']:
        features['urgency_words'] += headline_lower.count(word)

    # Count emotional words (FIXED)
    for word in SENSATIONALISM_MARKERS['emotional_words']:
        features['emotional_words'] += headline_lower.count(word)  # FIXED KEY NAME

    # Count conspiracy markers
    for word in SENSATIONALISM_MARKERS['conspiracy_markers']:
        features['conspiracy_markers'] += headline_lower.count(word)

    # Count ALL CAPS words (3+ letters)
    words = headline.split()
    for word in words:
        if len(word) > 3 and word.isupper():
            features['all_caps_words'] += 1

    # Count exclamation marks
    features['exclamation_marks'] = headline.count('!')

    return features

def calculate_reliability_score(features, sentiment_score):
    """
    Calculate overall reliability score (0-100)
    Higher score = more reliable
    Lower score = more sensationalist/fake
    """
    sensationalism_penalty = (
        features['extreme_adjectives'] * 15 +
        features['urgency_words'] * 12 +
        features['emotional_words'] * 10 +  # FIXED KEY NAME
        features['conspiracy_markers'] * 20 +
        features['all_caps_words'] * 5 +
        features['exclamation_marks'] * 8
    )

    # Negative sentiment also indicates lower reliability
    if sentiment_score < 0.3:
        sensationalism_penalty += 20

    reliability_score = max(0, 100 - sensationalism_penalty)
    return reliability_score

In [7]:
print("\nAnalyzing each headline...\n")

results = []

for i, headline in enumerate(headlines, 1):
    print(f"[{i}/{len(headlines)}] {headline[:70]}...")

    try:
        # Sentiment analysis
        sentiment = sentiment_pipeline(headline)[0]
        sentiment_label = sentiment['label']
        sentiment_score = sentiment['score'] if sentiment_label == 'POSITIVE' else (1 - sentiment['score'])

        # Count sensationalism markers
        features = count_sensationalism_features(headline)

        # Calculate reliability score
        reliability = calculate_reliability_score(features, sentiment_score)

        # Determine classification
        if reliability > 70:
            category = "Reliable"
        elif reliability > 40:
            category = "Borderline"
        else:
            category = "Sensationalist/Fake"

        print(f"  ✓ Reliability: {reliability:.1f}/100 | Category: {category}")

        results.append({
            'Headline': headline,
            'Reliability Score': reliability,
            'Category': category,
            'Sentiment': sentiment_label,
            'Sensationalism Markers': features['extreme_adjectives'] +
                                     features['urgency_words'] +
                                     features['emotional_words'] +  # FIXED
                                     features['conspiracy_markers'],
            'ALL CAPS Words': features['all_caps_words'],
            'Exclamation Marks': features['exclamation_marks']
        })

    except Exception as e:
        print(f"  ✗ Error: {str(e)[:60]}")


Analyzing each headline...

[1/20] Federal Reserve announces interest rate increase of 0.25%...
  ✓ Reliability: 80.0/100 | Category: Reliable
[2/20] Company reports Q3 earnings in line with analyst expectations...
  ✓ Reliability: 100.0/100 | Category: Reliable
[3/20] Climate scientists publish new research on temperature trends...
  ✓ Reliability: 100.0/100 | Category: Reliable
[4/20] Government approves new infrastructure budget...
  ✓ Reliability: 100.0/100 | Category: Reliable
[5/20] Stock market closes down 1.2% amid global economic concerns...
  ✓ Reliability: 80.0/100 | Category: Reliable
[6/20] You WON'T BELIEVE What This Celebrity Just Did! The Internet is LOSING...
  ✓ Reliability: 49.0/100 | Category: Borderline
[7/20] SHOCKING: Doctors HATE This One Simple Trick!...
  ✓ Reliability: 25.0/100 | Category: Sensationalist/Fake
[8/20] BREAKING: Shocking Secret EXPOSED - Click Now Before They Remove It!...
  ✓ Reliability: 3.0/100 | Category: Sensationalist/Fake
[9/20] This INS

In [11]:
df_results = pd.DataFrame(results)

# Display results sorted by reliability score
print("\nHeadlines Ranked by Reliability Score:\n")
df_sorted = df_results.sort_values('Reliability Score', ascending=False)
print(df_sorted[['Headline', 'Reliability Score', 'Category']].to_string(index=False))

# Category breakdown
print("CATEGORY DISTRIBUTION")
print(df_results['Category'].value_counts())

# Average reliability by category
print("\nAVERAGE RELIABILITY SCORE BY CATEGORY")
print(df_results.groupby('Category')['Reliability Score'].agg(['mean', 'min', 'max']).round(2))

# Sensationalism markers analysis
print("\nSENSATIONALISM MARKERS ANALYSIS")
print(f"Average sensationalism markers per headline:")
print(f"  Reliable headlines: {df_results[df_results['Category']=='Reliable']['Sensationalism Markers'].mean():.2f}")
print(f"  Borderline headlines: {df_results[df_results['Category']=='Borderline']['Sensationalism Markers'].mean():.2f}")
print(f"  Sensationalist headlines: {df_results[df_results['Category']=='Sensationalist/Fake']['Sensationalism Markers'].mean():.2f}")


Headlines Ranked by Reliability Score:

                                                                      Headline  Reliability Score            Category
                 Company reports Q3 earnings in line with analyst expectations                100            Reliable
                 Climate scientists publish new research on temperature trends                100            Reliable
                                 Government approves new infrastructure budget                100            Reliable
       Government Hiding Truth About UFOs - Leaked Documents Reveal Everything                100            Reliable
                     Scientists Discover Unexpected Finding in Latest Research                100            Reliable
                            New Study Suggests Coffee May Have Health Benefits                100            Reliable
                           Tech Giant Announces Major Product Launch Next Week                100            Reliable
               

In [9]:
reliable_headlines = df_results[df_results['Category'] == 'Reliable']
sensationalist_headlines = df_results[df_results['Category'] == 'Sensationalist/Fake']

print(f"\n✓ RELIABLE HEADLINES ({len(reliable_headlines)}):")
if len(reliable_headlines) > 0:
    print(f"  Average Reliability: {reliable_headlines['Reliability Score'].mean():.1f}/100")
    print(f"  Avg Sensationalism Markers: {reliable_headlines['Sensationalism Markers'].mean():.1f}")
    print(f"  Examples:")
    for idx, row in reliable_headlines.head(2).iterrows():
        print(f"    - {row['Headline'][:75]}")

print(f"\n✗ SENSATIONALIST/FAKE HEADLINES ({len(sensationalist_headlines)}):")
if len(sensationalist_headlines) > 0:
    print(f"  Average Reliability: {sensationalist_headlines['Reliability Score'].mean():.1f}/100")
    print(f"  Avg Sensationalism Markers: {sensationalist_headlines['Sensationalism Markers'].mean():.1f}")
    print(f"  Examples:")
    for idx, row in sensationalist_headlines.head(2).iterrows():
        print(f"    - {row['Headline'][:75]}")


✓ RELIABLE HEADLINES (14):
  Average Reliability: 89.5/100
  Avg Sensationalism Markers: 0.1
  Examples:
    - Federal Reserve announces interest rate increase of 0.25%
    - Company reports Q3 earnings in line with analyst expectations

✗ SENSATIONALIST/FAKE HEADLINES (3):
  Average Reliability: 16.7/100
  Avg Sensationalism Markers: 3.0
  Examples:
    - SHOCKING: Doctors HATE This One Simple Trick!
    - BREAKING: Shocking Secret EXPOSED - Click Now Before They Remove It!


In [10]:
custom_headlines = [
    "Scientists Discover Cure for Disease",
    "This ONE TRICK Will SHOCK You - Doctors HATE It!!!",
    "Federal Reserve Maintains Interest Rates at Current Level"
]

print("\nTesting custom headlines:\n")

for headline in custom_headlines:
    try:
        features = count_sensationalism_features(headline)
        sentiment = sentiment_pipeline(headline)[0]
        sentiment_score = sentiment['score'] if sentiment['label'] == 'POSITIVE' else (1 - sentiment['score'])
        reliability = calculate_reliability_score(features, sentiment_score)

        if reliability > 70:
            category = "✓ Reliable"
        elif reliability > 40:
            category = "~ Borderline"
        else:
            category = "✗ Sensationalist"

        print(f"Headline: {headline}")
        print(f"  Reliability: {reliability:.1f}/100 | {category}")
        print()

    except Exception as e:
        print(f"Error: {str(e)}")

print("\nANALYSIS COMPLETE")


Testing custom headlines:

Headline: Scientists Discover Cure for Disease
  Reliability: 100.0/100 | ✓ Reliable

Headline: This ONE TRICK Will SHOCK You - Doctors HATE It!!!
  Reliability: 31.0/100 | ✗ Sensationalist

Headline: Federal Reserve Maintains Interest Rates at Current Level
  Reliability: 100.0/100 | ✓ Reliable


ANALYSIS COMPLETE
