### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
from sklearn.model_selection import train_test_split
from flask import Flask, request, jsonify

### Data preparation

In [2]:
def preprocess_data(df, numerical_cols=None, categorical_cols=None, target=None):
    data = df.copy()
    data = handle_missing_values(data)
    
    if categorical_cols:
        data = encode_categorical_features(data, categorical_cols)
    
    if numerical_cols is None:
        numerical_cols = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
    
    scaler = StandardScaler()
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
    
    if target:
        X = data.drop(target, axis=1).values
        y = data[target].values
        return X, y, scaler
    else:
        X = data.values
        return X, scaler

In [3]:
def handle_missing_values(df):
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns
    for col in num_cols:
        if df[col].isnull().sum() > 0:
            df[col].fillna(df[col].median(), inplace=True)
    
    cat_cols = df.select_dtypes(include=['object']).columns
    for col in cat_cols:
        if df[col].isnull().sum() > 0:
            df[col].fillna(df[col].mode()[0], inplace=True)
    
    return df

In [4]:
def encode_categorical_features(df, categorical_cols):
    for col in categorical_cols:
        df = pd.get_dummies(df, columns=[col], drop_first=True)
    return df

In [5]:
def train_isolation_forest(X, contamination=0.1, random_state=42):
    model = IsolationForest(
        contamination=contamination,
        random_state=random_state,
        n_estimators=100,
        max_samples='auto'
    )
    model.fit(X)
    return model

### Evaluation

In [6]:
def evaluate_anomaly_detection(y_true, y_pred, verbose=True):
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='binary', pos_label=1
    )
    
    cm = confusion_matrix(y_true, y_pred)
    
    metrics = {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': cm
    }
    
    if verbose:
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"Confusion Matrix:")
        print(cm)
    
    return metrics

In [7]:
class IsolationForestSystem:
    def __init__(self, contamination=0.1, random_state=42):
        self.contamination = contamination
        self.random_state = random_state
        self.model = None
        self.scaler = None
        self.feature_names = None
        
    def fit(self, X, feature_names=None):
        if isinstance(X, pd.DataFrame):
            self.feature_names = X.columns.tolist()
            X_values = X.values
        else:
            self.feature_names = feature_names
            X_values = X
        
        self.scaler = StandardScaler()
        X_scaled = self.scaler.fit_transform(X_values)
        
        self.model = IsolationForest(
            contamination=self.contamination,
            random_state=self.random_state,
            n_estimators=100,
            max_samples='auto'
        )
        self.model.fit(X_scaled)
        
        return self
    
    def predict(self, X):
        if isinstance(X, pd.DataFrame):
            X_values = X.values
        else:
            X_values = X
        
        X_scaled = self.scaler.transform(X_values)
        
        anomaly_scores = self.model.decision_function(X_scaled)
        predictions = self.model.predict(X_scaled)
        y_pred = np.where(predictions == -1, 1, 0)
        
        return y_pred, anomaly_scores
    
    def save_model(self, filepath):
        import joblib
        
        model_data = {
            'model': self.model,
            'scaler': self.scaler,
            'feature_names': self.feature_names,
            'contamination': self.contamination,
            'random_state': self.random_state
        }
        
        joblib.dump(model_data, filepath)
        print(f"Model saved to {filepath}")
    
    @classmethod
    def load_model(cls, filepath):
        import joblib
        
        model_data = joblib.load(filepath)
        
        instance = cls(
            contamination=model_data['contamination'],
            random_state=model_data['random_state']
        )
        instance.model = model_data['model']
        instance.scaler = model_data['scaler']
        instance.feature_names = model_data['feature_names']
        
        return instance
    
    def explain_predictions(self, X, n_top_features=3):
        if not isinstance(X, pd.DataFrame):
            if self.feature_names is None:
                raise ValueError("Feature names must be provided for explanation")
            X = pd.DataFrame(X, columns=self.feature_names)
        
        y_pred, anomaly_scores = self.predict(X)
        
        explanations = {}
        anomaly_indices = np.where(y_pred == 1)[0]
        
        for idx in anomaly_indices:
            sample = X.iloc[idx]
            scaled_sample = self.scaler.transform(sample.values.reshape(1, -1))[0]
            
            feature_contribs = []
            for i, (feature, value) in enumerate(zip(X.columns, scaled_sample)):
                contribution = abs(value)
                feature_contribs.append((feature, contribution, sample[feature]))
            
            feature_contribs.sort(key=lambda x: x[1], reverse=True)
            top_features = feature_contribs[:n_top_features]
            
            explanations[idx] = {
                'anomaly_score': anomaly_scores[idx],
                'top_contributing_features': top_features
            }
        
        return explanations

In [8]:
class AnomalyMonitoringService:
    def __init__(self, model_path=None, model=None):
        if model is not None:
            self.model = model
        elif model_path is not None:
            self.model = IsolationForestSystem.load_model(model_path)
        else:
            raise ValueError("Either model or model_path must be provided")
        
        self.alert_threshold = 0.9
        self.anomalies_detected = []
        self.last_check_time = None
    
    def check_data(self, new_data):
        import time
        self.last_check_time = time.time()
        
        y_pred, anomaly_scores = self.model.predict(new_data)
        anomaly_indices = np.where(y_pred == 1)[0]
        
        if len(anomaly_indices) > 0:
            explanations = self.model.explain_predictions(new_data)
            
            for idx in anomaly_indices:
                anomaly_data = {
                    'timestamp': self.last_check_time,
                    'index': idx,
                    'data': new_data.iloc[idx].to_dict() if isinstance(new_data, pd.DataFrame) else dict(zip(self.model.feature_names, new_data[idx])),
                    'anomaly_score': anomaly_scores[idx],
                    'explanation': explanations.get(idx)
                }
                self.anomalies_detected.append(anomaly_data)
                
                if anomaly_scores[idx] > self.alert_threshold:
                    self._send_alert(anomaly_data)
        
        return [self.anomalies_detected[i] for i in range(len(self.anomalies_detected) - len(anomaly_indices), len(self.anomalies_detected))]
    
    def _send_alert(self, anomaly_data):
        print(f"ALERT: Critical anomaly detected!")
        print(f"Anomaly score: {anomaly_data['anomaly_score']:.4f}")
        print("Data:", anomaly_data['data'])
        if anomaly_data['explanation']:
            print("Contributing factors:")
            for feature, contribution, value in anomaly_data['explanation']['top_contributing_features']:
                print(f"  - {feature}: Value = {value}, Contribution = {contribution:.4f}")
    
    def get_anomaly_history(self, start_time=None, end_time=None):
        if not self.anomalies_detected:
            return []
        
        if start_time is None and end_time is None:
            return self.anomalies_detected
        
        filtered_anomalies = []
        for anomaly in self.anomalies_detected:
            timestamp = anomaly['timestamp']
            if start_time and timestamp < start_time:
                continue
            if end_time and timestamp > end_time:
                continue
            filtered_anomalies.append(anomaly)
        
        return filtered_anomalies
    
    def set_alert_threshold(self, threshold):
        if 0 <= threshold <= 1:
            self.alert_threshold = threshold
        else:
            raise ValueError("Threshold must be between 0 and 1")
    
    def clear_history(self):
        self.anomalies_detected = []

In [None]:
def create_flask_api(model_path):    
    app = Flask(__name__)
    monitoring_service = AnomalyMonitoringService(model_path=model_path)
    
    @app.route('/predict', methods=['POST'])
    def predict():
        try:
            data = request.json
            df = pd.DataFrame(data['data'])
            
            y_pred, anomaly_scores = monitoring_service.model.predict(df)
            
            result = {
                'predictions': y_pred.tolist(),
                'anomaly_scores': anomaly_scores.tolist(),
                'anomaly_indices': np.where(y_pred == 1)[0].tolist()
            }
            
            if np.sum(y_pred) > 0:
                explanations = monitoring_service.model.explain_predictions(df)
                result['explanations'] = {
                    str(idx): {
                        'anomaly_score': float(explanations[idx]['anomaly_score']),
                        'contributing_features': [
                            {
                                'feature': str(feature),
                                'contribution': float(contribution),
                                'value': float(value)
                            }
                            for feature, contribution, value in explanations[idx]['top_contributing_features']
                        ]
                    }
                    for idx in explanations
                }
            
            return jsonify(result)
        
        except Exception as e:
            return jsonify({'error': str(e)}), 400
    
    @app.route('/monitor', methods=['POST'])
    def monitor():
        try:
            data = request.json
            df = pd.DataFrame(data['data'])
            
            anomalies = monitoring_service.check_data(df)
            
            result = {
                'anomalies_detected': len(anomalies),
                'anomalies': [
                    {
                        'index': anomaly['index'],
                        'anomaly_score': float(anomaly['anomaly_score']),
                        'timestamp': anomaly['timestamp']
                    }
                    for anomaly in anomalies
                ]
            }
            
            return jsonify(result)
        
        except Exception as e:
            return jsonify({'error': str(e)}), 400
    
    return app