<a href="https://colab.research.google.com/github/ketanp23/scsd-ddm-class/blob/main/One_Class_SVM_Anomaly_Detection_with_Flask_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
End-to-End Anomaly Detection System using One-Class SVM and Flask.

This script covers:
1.  Synthetic dataset generation (normal vs. anomaly).
2.  Model Training using One-Class SVM.
3.  Model Testing and Validation with a labeled test set.
4.  Model Evaluation (Accuracy, Precision, Recall, F1-Score, Confusion Matrix).
5.  Model Serving via a Flask API.

Requirements:
- scikit-learn
- numpy
- flask

Install with: pip install scikit-learn numpy flask
"""

import numpy as np
import joblib
from sklearn.svm import OneClassSVM
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)
from flask import Flask, request, jsonify

# --- Global Configuration ---
MODEL_FILE = 'one_class_svm_model.joblib'
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

def create_dataset():
    """
    Creates a synthetic 2D dataset.

    - Training data: 200 points from a "normal" Gaussian distribution.
    - Test data: 100 points (50 normal, 50 anomalies).

    Returns:
        X_train (np.array): Training data (normal points only).
        X_test (np.array): Test data (mixed normal and anomaly).
        y_test (np.array): Labels for test data (0=normal, 1=anomaly).
    """
    print("Generating synthetic dataset...")

    # 1. Training Data (Normal)
    # 200 points centered at (0, 0)
    X_train = 0.5 * np.random.randn(200, 2) + np.array([0, 0])

    # 2. Test Data (Mixed)
    # 50 "normal" points, close to the training cluster
    X_test_normal = 0.5 * np.random.randn(50, 2) + np.array([0, 0])
    y_test_normal = np.zeros(50, dtype=int) # Label 0 for normal

    # 50 "anomaly" points, far from the training cluster
    X_test_anomaly = 4 * np.random.rand(50, 2) - np.array([2, 2])
    # Ensure anomalies are spread out, e.g., in all four quadrants
    X_test_anomaly[12:25] += np.array([4, 0])
    X_test_anomaly[25:37] += np.array([0, 4])
    X_test_anomaly[37:50] += np.array([4, 4])
    y_test_anomaly = np.ones(50, dtype=int) # Label 1 for anomaly

    # Combine test data
    X_test = np.vstack((X_test_normal, X_test_anomaly))
    y_test = np.concatenate((y_test_normal, y_test_anomaly))

    return X_train, X_test, y_test

def train_and_evaluate_model():
    """
    Trains, saves, and evaluates the One-Class SVM model.
    """
    X_train, X_test, y_test = create_dataset()

    # --- 1. Model Training ---
    print("Training One-Class SVM model...")

    # Initialize One-Class SVM.
    # 'nu' is a key parameter:
    # - An upper bound on the fraction of training errors.
    # - A lower bound on the fraction of support vectors.
    # - We set it to 0.1, assuming ~10% of our training data might be noisy.
    svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.1)

    # Train the model ONLY on the "normal" data.
    svm.fit(X_train)

    print(f"Model trained and saved to {MODEL_FILE}")
    joblib.dump(svm, MODEL_FILE)

    # --- 2. Model Testing and Validation ---
    print("\n--- Model Evaluation ---")

    # Predict on the mixed test set.
    # The model returns:
    #  1 for inliers (predicted as "normal")
    # -1 for outliers (predicted as "anomaly")
    y_pred_raw = svm.predict(X_test)

    # --- 3. Model Evaluation Parameters ---

    # We need to map the model's output to our ground truth labels:
    # Model: -1 (anomaly) -> Our Label: 1 (anomaly)
    # Model:  1 (normal)  -> Our Label: 0 (normal)
    y_pred = [1 if p == -1 else 0 for p in y_pred_raw]

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print(f"Accuracy:  {accuracy * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}% (How many predicted anomalies were real)")
    print(f"Recall:    {recall * 100:.2f}% (How many real anomalies were caught)")
    print(f"F1-Score:  {f1 * 100:.2f}%")

    print("\nConfusion Matrix:")
    print("         Pred Normal  Pred Anomaly")
    print(f"True Normal:   {cm[0][0]:>5}      {cm[0][1]:>10}")
    print(f"True Anomaly:  {cm[1][0]:>5}      {cm[1][1]:>10}")
    print("---------------------------------")


# --- 4. Flask API for Serving ---

app = Flask(__name__)
model = None

def load_model():
    """Load the trained model from disk."""
    global model
    try:
        model = joblib.load(MODEL_FILE)
        print(f"Model {MODEL_FILE} loaded successfully for Flask app.")
    except FileNotFoundError:
        print(f"Error: Model file '{MODEL_FILE}' not found.")
        print("Please run the script directly first to train and save the model.")
        model = None
    except Exception as e:
        print(f"Error loading model: {e}")
        model = None

@app.route('/predict', methods=['POST'])
def predict_anomaly():
    """
    API endpoint to predict if new data is an anomaly.

    Expects JSON payload:
    {
        "features": [1.2, 2.3]
    }

    Returns JSON response:
    {
        "prediction": "normal" | "anomaly"
    }
    or
    {
        "error": "Error message"
    }
    """
    if model is None:
        return jsonify({"error": "Model is not loaded."}), 500

    try:
        data = request.get_json()

        if 'features' not in data:
            return jsonify({"error": "Missing 'features' key in JSON payload."}), 400

        features = data['features']

        # Ensure features are in the correct format (list or 1D array)
        if not isinstance(features, list) or len(features) != 2:
            return jsonify({"error": "Features must be a list of 2 numbers."}), 400

        # Convert to 2D numpy array for sklearn prediction
        features_np = np.array(features).reshape(1, -1)

        # Get prediction (1 for normal, -1 for anomaly)
        prediction_raw = model.predict(features_np)

        # Convert to human-readable string
        result = "anomaly" if prediction_raw[0] == -1 else "normal"

        return jsonify({"prediction": result})

    except Exception as e:
        return jsonify({"error": f"An error occurred during prediction: {str(e)}"}), 500

# --- Main execution ---
if __name__ == '__main__':
    # Step 1: Train, evaluate, and save the model
    train_and_evaluate_model()

    # Step 2: Load the model for the API
    load_model()

    # Step 3: Run the Flask server
    if model is not None:
        print("\nStarting Flask server... Access at http://127.0.0.1:5000")
        print("Test the API with:")
        print("curl -X POST -H \"Content-Type: application/json\" -d \"{\\\"features\\\": [0.1, 0.2]}\" http://127.0.0.1:5000/predict")
        print("curl -X POST -H \"Content-Type: application/json\" -d \"{\\\"features\\\": [8.0, 8.0]}\" http://127.0.0.1:5000/predict")
        app.run(debug=True, port=5000)
    else:
        print("\nFlask server not started due to model loading failure.")

Generating synthetic dataset...
Training One-Class SVM model...
Model trained and saved to one_class_svm_model.joblib

--- Model Evaluation ---
Accuracy:  93.00%
Precision: 89.09% (How many predicted anomalies were real)
Recall:    98.00% (How many real anomalies were caught)
F1-Score:  93.33%

Confusion Matrix:
         Pred Normal  Pred Anomaly
True Normal:      44               6
True Anomaly:      1              49
---------------------------------
Model one_class_svm_model.joblib loaded successfully for Flask app.

Starting Flask server... Access at http://127.0.0.1:5000
Test the API with:
curl -X POST -H "Content-Type: application/json" -d "{\"features\": [0.1, 0.2]}" http://127.0.0.1:5000/predict
curl -X POST -H "Content-Type: application/json" -d "{\"features\": [8.0, 8.0]}" http://127.0.0.1:5000/predict
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with watchdog (inotify)
