In [1]:
!pip install transformers torch tensorflow scikit-learn pandas numpy matplotlib seaborn nltk imbalanced-learn wordcloud

In [2]:
!pip install datasets evaluate

In [7]:
# CELL 1: COMPLETE INSTALLATION
import sys
import subprocess
import importlib

def install_and_import(package):
    try:
        importlib.import_module(package)
        print(f"✓ {package} already installed")
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✓ {package} installed successfully")

# Install all required packages
packages = [
    'matplotlib',
    'seaborn', 
    'scikit-learn',
    'pandas',
    'numpy',
    'nltk',
    'imbalanced-learn',
    'transformers[torch]',
    'torch',
    'datasets',
    'wordcloud',
    'jupyter'
]

print("Installing required packages...")
for package in packages:
    install_and_import(package)

print("\n✅ All packages installed!")

Installing required packages...
Installing matplotlib...
✓ matplotlib installed successfully
Installing seaborn...
✓ seaborn installed successfully
Installing scikit-learn...
✓ scikit-learn installed successfully
✓ pandas already installed
✓ numpy already installed
Installing nltk...
✓ nltk installed successfully
Installing imbalanced-learn...
✓ imbalanced-learn installed successfully
Installing transformers[torch]...
✓ transformers[torch] installed successfully
✓ torch already installed
Installing datasets...
✓ datasets installed successfully
Installing wordcloud...
✓ wordcloud installed successfully
✓ jupyter already installed

✅ All packages installed!


In [8]:
# CELL 1: MINIMAL FAST INSTALLATION
import sys
import subprocess

# Install ONLY the essential packages
essential_packages = [
    'pandas',
    'numpy', 
    'scikit-learn',
    'nltk'
]

print("Installing essential packages only (this will be fast)...")
for package in essential_packages:
    print(f"Installing {package}...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])

print("✅ Essential packages installed!")

# Now install visualization packages separately (optional)
try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib", "seaborn", "--quiet"])
    print("✅ Visualization packages installed!")
except:
    print("⚠️  Visualization packages skipped (not essential)")

print("\n🚀 READY! Now let's proceed with the core model...")

Installing essential packages only (this will be fast)...
Installing pandas...
Installing numpy...
Installing scikit-learn...
Installing nltk...
✅ Essential packages installed!
✅ Visualization packages installed!

🚀 READY! Now let's proceed with the core model...


In [9]:
# CELL 2: SIMPLIFIED SCAM DETECTION MODEL (FAST)
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Download minimal NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

print("✅ Environment ready! Starting model training...")

# --- SIMPLIFIED DATASET GENERATION ---
def generate_fast_dataset(num_samples=5000):
    """Fast dataset generation with essential scam patterns"""
    
    scam_patterns = {
        'phishing': [
            "Urgent: Your account will be suspended. Verify now",
            "Security Alert: Unusual login detected",
            "Your bank account needs immediate attention",
            "Update your payment information to avoid service disruption"
        ],
        'fake_job': [
            "Congratulations! You're hired. Pay for training",
            "Work from home opportunity. Send money for starter kit",
            "High paying job no experience needed. Pay processing fee"
        ],
        'bank_scam': [
            "CBE Alert: Your account has been compromised",
            "Dashen Bank: Suspicious transaction. Verify your PIN",
            "Bank of Abyssinia: Account freeze warning"
        ],
        'investment_fraud': [
            "Double your money in 24 hours. Invest now",
            "Guaranteed 500% return on crypto investment"
        ],
        'legitimate': [
            "Your order has been shipped and will arrive tomorrow",
            "Meeting reminder: Team sync at 2 PM",
            "Your monthly statement is ready for review",
            "Thank you for your application"
        ]
    }
    
    data = []
    labels = []
    
    for category, patterns in scam_patterns.items():
        samples_per_pattern = num_samples // (len(patterns) * len(scam_patterns))
        
        for pattern in patterns:
            for _ in range(samples_per_pattern):
                # Simple variation
                message = pattern
                if np.random.random() > 0.5 and category != 'legitimate':
                    message = message + " immediately!"
                
                data.append(message)
                labels.append(category)
    
    return pd.DataFrame({'text': data, 'label': labels})

print("Generating dataset...")
df = generate_fast_dataset(3000)  # Smaller dataset for speed
print(f"Dataset ready: {len(df)} samples")

# --- SIMPLE PREPROCESSING ---
def simple_clean(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

df['cleaned_text'] = df['text'].apply(simple_clean)

# --- FAST TRAINING ---
print("Training model...")

# TF-IDF features
vectorizer = TfidfVectorizer(max_features=1000, ngram_range=(1, 2))
X = vectorizer.fit_transform(df['cleaned_text'])

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(df['label'])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=50, random_state=42)  # Smaller for speed
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ Model trained! Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# --- SIMPLE PREDICTION FUNCTION ---
def predict_scam(message):
    cleaned = simple_clean(message)
    features = vectorizer.transform([cleaned])
    prediction = model.predict(features)[0]
    probability = model.predict_proba(features)[0]
    
    return {
        'prediction': le.inverse_transform([prediction])[0],
        'confidence': np.max(probability),
        'is_scam': le.inverse_transform([prediction])[0] != 'legitimate',
        'all_probabilities': dict(zip(le.classes_, probability))
    }

# Test the model
test_messages = [
    "You've won $500000! Pay $50 processing fee to claim your prize",
    "Your Amazon order has been shipped",
    "Urgent: Your bank account will be suspended. Verify now",
    "Meeting reminder: Team sync at 3 PM tomorrow"
]

print("\n🧪 Testing the model:")
print("=" * 50)
for msg in test_messages:
    result = predict_scam(msg)
    status = "🚨 SCAM" if result['is_scam'] else "✅ LEGITIMATE"
    print(f"Message: {msg}")
    print(f"Result: {status} - {result['prediction']} (Confidence: {result['confidence']:.2f})")
    print("-" * 40)

✅ Environment ready! Starting model training...
Generating dataset...
Dataset ready: 3000 samples
Training model...
✅ Model trained! Accuracy: 1.0000

Classification Report:
                  precision    recall  f1-score   support

       bank_scam       1.00      1.00      1.00       122
        fake_job       1.00      1.00      1.00       113
investment_fraud       1.00      1.00      1.00       111
      legitimate       1.00      1.00      1.00       113
        phishing       1.00      1.00      1.00       141

        accuracy                           1.00       600
       macro avg       1.00      1.00      1.00       600
    weighted avg       1.00      1.00      1.00       600


🧪 Testing the model:
Message: You've won $500000! Pay $50 processing fee to claim your prize
Result: 🚨 SCAM - fake_job (Confidence: 0.60)
----------------------------------------
Message: Your Amazon order has been shipped
Result: ✅ LEGITIMATE - legitimate (Confidence: 0.56)
------------------------

In [10]:
# CELL 3: SAVE THE MODEL AND CREATE INTEGRATION FILES
import joblib
import json
import os

# Create directory for model files
os.makedirs('scam_model', exist_ok=True)

# Save all components
joblib.dump(model, 'scam_model/random_forest_model.pkl')
joblib.dump(vectorizer, 'scam_model/tfidf_vectorizer.pkl')
joblib.dump(le, 'scam_model/label_encoder.pkl')

# Save model metadata
model_info = {
    "model_type": "RandomForest",
    "version": "1.0",
    "training_date": pd.Timestamp.now().strftime("%Y-%m-%d"),
    "accuracy": float(accuracy),
    "classes": le.classes_.tolist(),
    "feature_count": X.shape[1],
    "training_samples": len(df)
}

with open('scam_model/model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)

print("✅ Model saved successfully!")
print("📁 Files created:")
for file in os.listdir('scam_model'):
    print(f"   - {file}")

✅ Model saved successfully!
📁 Files created:
   - label_encoder.pkl
   - model_info.json
   - random_forest_model.pkl
   - tfidf_vectorizer.pkl


In [11]:
# CELL 4: CREATE FLASK API FOR NEXT.JS INTEGRATION
flask_api_code = '''
from flask import Flask, request, jsonify
from flask_cors import CORS
import joblib
import numpy as np
import re

app = Flask(__name__)
CORS(app)  # Enable CORS for Next.js frontend

class ScamDetector:
    def __init__(self):
        # Load model components
        self.model = joblib.load('scam_model/random_forest_model.pkl')
        self.vectorizer = joblib.load('scam_model/tfidf_vectorizer.pkl')
        self.le = joblib.load('scam_model/label_encoder.pkl')
    
    def clean_text(self, text):
        text = text.lower()
        text = re.sub(r'[^a-zA-Z\\s]', '', text)
        return text
    
    def predict(self, message):
        cleaned = self.clean_text(message)
        features = self.vectorizer.transform([cleaned])
        prediction = self.model.predict(features)[0]
        probability = self.model.predict_proba(features)[0]
        
        return {
            'prediction': self.le.inverse_transform([prediction])[0],
            'confidence': float(np.max(probability)),
            'is_scam': self.le.inverse_transform([prediction])[0] != 'legitimate',
            'all_probabilities': dict(zip(self.le.classes_, probability.tolist()))
        }

# Initialize detector
detector = ScamDetector()

@app.route('/api/predict', methods=['POST'])
def predict_scam():
    try:
        data = request.get_json()
        
        if not data or 'message' not in data:
            return jsonify({'error': 'No message provided'}), 400
        
        message = data['message']
        
        if not message.strip():
            return jsonify({'error': 'Empty message'}), 400
        
        result = detector.predict(message)
        return jsonify(result)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/health', methods=['GET'])
def health_check():
    return jsonify({'status': 'healthy', 'model': 'scam_detector_v1'})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=False)
'''

# Save the Flask API
with open('scam_api.py', 'w') as f:
    f.write(flask_api_code)

print("✅ Flask API created: scam_api.py")

✅ Flask API created: scam_api.py


In [12]:
# CELL 5: TEST THE API (Simulate Next.js calling the API)
import requests
import json

def test_api_locally():
    """Test our API with sample messages"""
    
    # This simulates what your Next.js app would do
    api_url = "http://localhost:5000/api/predict"
    
    test_messages = [
        "You won $1,000,000! Pay $50 to claim your prize now!",
        "Your package will be delivered tomorrow",
        "Urgent: Your bank account needs verification immediately",
        "Meeting scheduled for 3 PM in conference room"
    ]
    
    print("🧪 Testing API integration:")
    print("=" * 60)
    
    for message in test_messages:
        try:
            response = requests.post(api_url, json={'message': message})
            
            if response.status_code == 200:
                result = response.json()
                status = "🚨 SCAM" if result['is_scam'] else "✅ LEGITIMATE"
                print(f"Message: {message}")
                print(f"Result: {status} - {result['prediction']}")
                print(f"Confidence: {result['confidence']:.2f}")
                print("-" * 40)
            else:
                print(f"❌ Error: {response.status_code} - {response.text}")
                
        except Exception as e:
            print(f"❌ API call failed: {e}")

print("API test function created. Run test_api_locally() after starting the server.")

API test function created. Run test_api_locally() after starting the server.


In [14]:
# ALTERNATIVE: Keep emojis with proper encoding
nextjs_code = '''
// Next.js API Route: pages/api/check-scam.js
export default async function handler(req, res) {
  if (req.method !== 'POST') {
    return res.status(405).json({ error: 'Method not allowed' });
  }

  try {
    const { message } = req.body;

    if (!message) {
      return res.status(400).json({ error: 'Message is required' });
    }

    // Call our Python Flask API
    const flaskResponse = await fetch('http://localhost:5000/api/predict', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({ message }),
    });

    if (!flaskResponse.ok) {
      throw new Error('Failed to get prediction from AI service');
    }

    const prediction = await flaskResponse.json();
    
    res.status(200).json(prediction);
  } catch (error) {
    console.error('Scam detection error:', error);
    res.status(500).json({ error: 'Internal server error' });
  }
}

// React Component Example: components/ScamChecker.js
import { useState } from 'react';

export default function ScamChecker() {
  const [message, setMessage] = useState('');
  const [result, setResult] = useState(null);
  const [loading, setLoading] = useState(false);

  const checkScam = async () => {
    if (!message.trim()) return;
    
    setLoading(true);
    try {
      const response = await fetch('/api/check-scam', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({ message }),
      });
      
      const data = await response.json();
      setResult(data);
    } catch (error) {
      console.error('Error:', error);
      setResult({ error: 'Failed to check message' });
    } finally {
      setLoading(false);
    }
  };

  return (
    <div className="p-6 max-w-md mx-auto bg-white rounded-xl shadow-md">
      <h2 className="text-2xl font-bold mb-4">Scam Detection</h2>
      
      <textarea
        value={message}
        onChange={(e) => setMessage(e.target.value)}
        placeholder="Paste message to check for scams..."
        className="w-full p-3 border border-gray-300 rounded-md mb-4"
        rows="4"
      />
      
      <button
        onClick={checkScam}
        disabled={loading}
        className="w-full bg-blue-600 text-white py-2 px-4 rounded-md hover:bg-blue-700 disabled:bg-gray-400"
      >
        {loading ? 'Checking...' : 'Check for Scams'}
      </button>
      
      {result && (
        <div className={`mt-4 p-4 rounded-md ${
          result.is_scam ? 'bg-red-100 border border-red-300' : 'bg-green-100 border border-green-300'
        }`}>
          <h3 className={`font-bold ${result.is_scam ? 'text-red-800' : 'text-green-800'}`}>
            {result.is_scam ? '🚨 Potential Scam Detected' : '✅ Likely Legitimate'}
          </h3>
          <p>Type: {result.prediction}</p>
          <p>Confidence: {(result.confidence * 100).toFixed(1)}%</p>
        </div>
      )}
    </div>
  );
}
'''

# Force UTF-8 encoding
try:
    with open('nextjs_integration_examples.js', 'w', encoding='utf-8') as f:
        f.write(nextjs_code)
    print("✅ Next.js integration examples created with emojis!")
except Exception as e:
    print(f"Error with emojis: {e}")
    # Fallback without emojis
    nextjs_code = nextjs_code.replace('🚨', 'ALERT:').replace('✅', 'SAFE:')
    with open('nextjs_integration_examples.js', 'w', encoding='utf-8') as f:
        f.write(nextjs_code)
    print("✅ Next.js integration examples created (emoji-free version)!")

✅ Next.js integration examples created with emojis!
