# Email Spam Detection - Inference

This notebook demonstrates how to use the trained spam detection model for inference.

In [None]:
import sys
from pathlib import Path

# Add src to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root / "src"))

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from spam_detector.config import settings
from spam_detector.predict import SpamDetectorPredictor

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load the Trained Model

In [None]:
# Initialize predictor
predictor = SpamDetectorPredictor(settings.model_path)
predictor.load_model()

print("Model loaded successfully!")

## 2. Single Email Prediction

In [None]:
# Test single prediction
email_text = "CONGRATULATIONS! You've won a FREE iPhone! Click here to claim your prize NOW!"

prediction, confidence = predictor.predict(email_text)

print(f"Email: {email_text}")
print(f"\nPrediction: {prediction}")
print(f"Confidence: {confidence:.4f}")

## 3. Batch Predictions

In [None]:
# Test batch predictions
test_emails = [
    "Hi, the quarterly meeting is scheduled for next Tuesday at 3pm. Please confirm your attendance.",
    "WINNER WINNER! You've been selected for a $5000 cash prize! Act now before it expires!",
    "Thanks for sending the project documentation. I've reviewed it and everything looks good.",
    "URGENT: Your credit card has been approved! Click here for instant cash!",
    "Following up on our conversation about the code review. The changes are ready.",
    "FREE VIAGRA! 90% discount! Limited time offer! Buy now!",
    "The delivery confirmation for your order #12345 is attached. Expected arrival: Friday.",
    "Make $10,000 per week working from home! No experience needed! Risk-free guarantee!"
]

results = predictor.predict_batch(test_emails)

# Create results dataframe
results_df = pd.DataFrame({
    'email': test_emails,
    'prediction': [r[0] for r in results],
    'confidence': [r[1] for r in results]
})

results_df

## 4. Visualize Predictions

In [None]:
# Prediction distribution
prediction_counts = results_df['prediction'].value_counts()

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
prediction_counts.plot(kind='bar', color=['green', 'red'])
plt.title('Prediction Distribution')
plt.xlabel('Prediction')
plt.ylabel('Count')
plt.xticks(rotation=0)

plt.subplot(1, 2, 2)
colors = ['red' if p == 'spam' else 'green' for p in results_df['prediction']]
plt.barh(range(len(results_df)), results_df['confidence'], color=colors, alpha=0.7)
plt.xlabel('Confidence')
plt.ylabel('Email Index')
plt.title('Prediction Confidence by Email')
plt.xlim([0, 1])

plt.tight_layout()
plt.show()

## 5. Interactive Testing

In [None]:
# Try your own email text
custom_email = input("Enter an email text to classify: ")

if custom_email.strip():
    prediction, confidence = predictor.predict(custom_email)
    
    print(f"\n{'='*60}")
    print(f"Email: {custom_email}")
    print(f"{'='*60}")
    print(f"Prediction: {prediction.upper()}")
    print(f"Confidence: {confidence:.2%}")
    print(f"{'='*60}")
    
    # Visual indicator
    if prediction == 'spam':
        print("\nðŸš¨ WARNING: This email appears to be SPAM!")
    else:
        print("\nâœ… This email appears to be legitimate (HAM).")
else:
    print("No input provided.")

## 6. Confidence Analysis

In [None]:
# Analyze confidence scores
plt.figure(figsize=(10, 6))

spam_confidences = results_df[results_df['prediction'] == 'spam']['confidence']
ham_confidences = results_df[results_df['prediction'] == 'ham']['confidence']

plt.hist([spam_confidences, ham_confidences], bins=10, 
         label=['Spam', 'Ham'], color=['red', 'green'], alpha=0.7)
plt.xlabel('Confidence Score')
plt.ylabel('Frequency')
plt.title('Distribution of Confidence Scores by Prediction')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nAverage confidence for spam predictions: {spam_confidences.mean():.4f}")
print(f"Average confidence for ham predictions: {ham_confidences.mean():.4f}")