# Fake News Detection - Example Notebook

This notebook demonstrates how to use the fake news detection model.

In [None]:
# Import required libraries
import pandas as pd
import sys
sys.path.append('..')

from src.preprocessing import TextPreprocessor
from src.model_trainer import FakeNewsClassifier, train_and_compare_models
from sklearn.model_selection import train_test_split

## 1. Load and Explore Data

In [None]:
# Create sample dataset
data = {
    'text': [
        'Scientists discover cure for all diseases',
        'Study shows drinking water is beneficial for health',
        'Aliens land on Earth and meet world leaders',
        'Research confirms exercise improves health',
        'Celebrity arrested for crimes never committed',
        'Economic report shows unemployment declining',
    ],
    'label': [1, 0, 1, 0, 1, 0]  # 0 = Real, 1 = Fake
}

df = pd.DataFrame(data)
print(f"Dataset shape: {df.shape}")
print(f"\nLabel distribution:")
print(df['label'].value_counts())
df.head()

## 2. Preprocess Text Data

In [None]:
# Initialize preprocessor
preprocessor = TextPreprocessor()

# Preprocess dataframe
df_processed = preprocessor.preprocess_dataframe(df, 'text', 'label')

# Show original vs processed
for i in range(min(3, len(df))):
    print(f"Original: {df['text'].iloc[i]}")
    print(f"Processed: {df_processed['processed_text'].iloc[i]}")
    print()

## 3. Train Model

In [None]:
# Split data
X = df_processed['processed_text']
y = df_processed['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

In [None]:
# Train a single model
classifier = FakeNewsClassifier(model_type='logistic')
metrics = classifier.train(X_train, y_train, X_test, y_test)

print(f"Training Accuracy: {metrics['train_accuracy']:.4f}")
print(f"Validation Accuracy: {metrics['val_accuracy']:.4f}")

## 4. Make Predictions

In [None]:
# Test with new examples
test_texts = [
    "Breaking news: Scientists make groundbreaking discovery",
    "Miracle cure will solve all your problems instantly",
]

# Preprocess
test_processed = [preprocessor.preprocess(text) for text in test_texts]

# Predict
predictions = classifier.predict(test_processed)
probabilities = classifier.predict_proba(test_processed)

# Display results
for i, text in enumerate(test_texts):
    label = "FAKE" if predictions[i] == 1 else "REAL"
    confidence = probabilities[i][predictions[i]] * 100
    
    print(f"Text: {text}")
    print(f"Prediction: {label} (Confidence: {confidence:.2f}%)")
    print(f"Probabilities - Real: {probabilities[i][0]:.2f}, Fake: {probabilities[i][1]:.2f}")
    print()

## 5. Model Evaluation

In [None]:
# Evaluate on test set
eval_metrics = classifier.evaluate(X_test, y_test)

print(f"Test Accuracy: {eval_metrics['accuracy']:.4f}")
print("\nClassification Report:")
print(eval_metrics['classification_report'])

## 6. Save Model

In [None]:
# Save the trained model
classifier.save('../models/model.pkl', '../models/vectorizer.pkl')
print("Model saved successfully!")