In [None]:
"""
Simple Anomaly Detection Model Training
Logs model to MLflow with 'Production' stage
"""

import os
import numpy as np
from datetime import datetime
import mlflow
import requests
from datetime import datetime
from sklearn.ensemble import IsolationForest

INFERENCE_URL = "http://localhost:8000"
TRAINING_URL = "http://localhost:8030"
MLFLOW_URL = 'http://localhost:5000'
MINIO_URL = 'http://localhost:9000'

# MinIO Configuration
os.environ.update({
    'AWS_ACCESS_KEY_ID': 'minio',
    'AWS_SECRET_ACCESS_KEY': 'minio123',
    'MLFLOW_S3_ENDPOINT_URL': MINIO_URL,
    'MLFLOW_S3_IGNORE_TLS': 'true',
})

In [None]:
mlflow.set_tracking_uri(MLFLOW_URL)

### Functions

In [None]:
import numpy as np

def generate_fake_vibration_data(n_points=100, start_timestamp=1700000000):
    """Generate fake vibration sensor data with Unix timestamps."""
    
    # Generate Unix timestamps (100Hz sampling rate - every 0.01s)
    timestamps = [int(start_timestamp + (i * 0.01)) for i in range(n_points)]
    
    # Generate vibration values: multiple frequency components + noise
    t = np.arange(n_points)
    
    # Base vibration (e.g., machine operating frequency ~50Hz)
    base_freq = 0.05 * np.sin(2 * np.pi * t * 50 / 100)
    
    # Harmonic component (bearing defect ~120Hz)
    harmonic = 0.03 * np.sin(2 * np.pi * t * 120 / 100)
    
    # Low frequency component (imbalance ~10Hz)
    low_freq = 0.02 * np.sin(2 * np.pi * t * 10 / 100)
    
    # Random noise
    noise = np.random.normal(0, 0.01, n_points)
    
    # Combine components (values in m/s¬≤ or g's)
    values = (base_freq + harmonic + low_freq + noise + 0.1).tolist()
    
    return {
        "timestamps": timestamps,
        "values": values
    }

def test_prediction(series_id: str, value: float):
    """Test anomaly detection endpoint"""
    url = f"{INFERENCE_URL}/predict/{series_id}"

    payload = {
        "timestamp": int(datetime.now().timestamp()),
        "value": value
    }
    
    response = requests.post(url, json=payload)
    response.raise_for_status()  # Raise exception for bad status codes

    return response.json()

def send_training_request(series_id, data=None):
    """Test training API pipeline"""
    
    if not data:
        raise ValueError("Data should not be empty")
    
    url = f"{TRAINING_URL}/fit/{series_id}"
    response = requests.post(url, json=data)
    response.raise_for_status()  # Raise exception for bad status codes
    
    return response.json()

def restore_deleted_experiment(exp_name):
    client = mlflow.tracking.MlflowClient()

    try:
        exp = client.get_experiment_by_name(exp_name)
        if exp and exp.lifecycle_stage == "deleted":
            print(f"   ‚ö†Ô∏è  Restoring deleted experiment...")
            client.restore_experiment(exp.experiment_id)
            print(f"   ‚úì Experiment restored\n")
    except:
        pass

### Configuration

In [None]:
# Configuration
SERIES_ID = "sensor_001_radial"
# Default model name pattern
MODEL_NAME = f"model_od_{SERIES_ID}"
# Name of the experiment
EXP_NAME = f"modeling-{SERIES_ID}"

### Generate synthetic training data


In [None]:
np.random.seed(42)
X_train = np.random.normal(0.315, 0.087, size=(32678, 1))
X_test = np.random.normal(0.315, 0.087, size=(1, 1))

### Train model

In [None]:
model = IsolationForest(contamination=0.003, random_state=42)
model.fit(X_train)

### Log model

In [None]:
restore_deleted_experiment(EXP_NAME) # Just is case you delete an experiment
mlflow.set_experiment(EXP_NAME)

# Logging to MLflow
with mlflow.start_run():
    # Log parameters
    mlflow.log_params({
        "series_id": SERIES_ID,
        "algorithm": "isolation_forest",
        "training_points": len(X_train)
    })
    
    # Log metrics
    mlflow.log_metrics({
        "contamination": 0.003,
        "mean": float(np.mean(X_train)),
        "std": float(np.std(X_train))
    })
    
    # Log model with registered name
    mlflow.sklearn.log_model(
        sk_model=model,
        name="model",
        registered_model_name=MODEL_NAME
    )
    
    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}\n")

### Try to get a prediction

In [None]:
test_prediction("sensor_001_radial", X_test[0][0])

In [None]:
test_prediction("sensor_001_radial", 0)

In [None]:
test_prediction("sensor_001_radial", 100)

### Promote to Production

In [None]:
client = mlflow.tracking.MlflowClient()
versions = client.search_model_versions(f"name='{MODEL_NAME}'")
versions

In [None]:
if versions:
    latest_version = max(versions, key=lambda v: int(v.version))
    
    client.set_registered_model_alias(
        name=MODEL_NAME,
        alias="production",
        version=latest_version.version
    )

print(f"Version {latest_version.version} ‚Üí production\n")

### Verify if the new production model is being served

In [None]:
test_prediction("sensor_001_radial", X_test[0][0])

In [None]:
test_prediction("sensor_001_radial", 0)

In [None]:
test_prediction("sensor_001_radial", 100)

### Using the Training API

In [None]:
data = generate_fake_vibration_data()
series_id = 'sensor_010_horizontal'
send_training_request(series_id, data)

In [None]:
print("üß™ View experiment at: http://localhost:5000/#/experiments/1")
print(f"Or search manualy by http://localhost:5000/ ‚Üí anomlaly-detection-training ‚Üí train_{series_id}_xxxxxxx") 