In [12]:
import numpy as np
from sklearn.cluster import KMeans, DBSCAN
from sklearn.neighbors import LocalOutlierFactor


def generate_sensor_data(n_normal=180, n_anomaly=20, random_state=42):
    """Generuje dane czujnikow z anomaliami."""
    np.random.seed(random_state)

    # Normalne tramwaje - 3 klastry operacyjne
    normal = np.vstack([
        np.random.randn(60, 3) * 0.5 + [50, 10, 30],  # temp, vibration, speed
        np.random.randn(60, 3) * 0.5 + [55, 12, 35],
        np.random.randn(60, 3) * 0.5 + [48, 8, 28],
    ])

    # Anomalie - nietypowe zachowania
    anomalies = np.vstack([
        np.random.randn(10, 3) * 0.3 + [80, 25, 15],  # przegrzanie
        np.random.randn(10, 3) * 0.3 + [30, 30, 5],   # awaria silnika
    ])

    X = np.vstack([normal, anomalies])
    y_true = np.array([0] * n_normal + [1] * n_anomaly)  # 0 = normal, 1 = anomaly
    return X, y_true


def detect_anomalies_kmeans(X, k=3, percentile=95):
    """Wykrywanie anomalii metoda K-Means."""
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    distances = np.min(kmeans.transform(X), axis=1)
    tres = np.percentile(distances, percentile)
    anomalies = np.where(distances > tres)[0]
    return anomalies


def detect_anomalies_dbscan(X, eps=5, min_samples=5):
    """Wykrywanie anomalii metoda DBSCAN."""
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(X)
    anomalies = np.where(labels == -1)[0]
    return anomalies

def detect_anomalies_lof(X, n_neighbors=20, contamination=0.1):
    lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination)
    labels = lof.fit_predict(X)
    anomalies = np.where(labels == -1)[0]
    return anomalies

from sklearn.metrics import precision_recall_fscore_support

def eval(y_true, anomalies):
    y_pred = np.zeros_like(y_true)
    y_pred[anomalies] = 1

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    return precision, recall, f1
    

In [None]:
X, y_true = generate_sensor_data()

anom_kmeans = detect_anomalies_kmeans(X, k=3, percentile=95)
p_km, r_km, f_km = eval(y_true, anom_kmeans)

anom_dbscan = detect_anomalies_dbscan(X, eps=15, min_samples=20)
p_db, r_db, f_db = eval(y_true, anom_dbscan)

anom_lof = detect_anomalies_lof(X, n_neighbors=20, contamination=0.1)
p_lof, r_lof, f_lof = eval(y_true, anom_lof)

print("K-Means:  precision =", p_km, " recall =", r_km, " F1 =", f_km)
print("DBSCAN:   precision =", p_db, " recall =", r_db, " F1 =", f_db)
print("LOF:      precision =", p_lof, " recall =", r_lof, " F1 =", f_lof)


K-Means:  precision = 1.0  recall = 0.5  F1 = 0.6666666666666666
DBSCAN:   precision = 1.0  recall = 1.0  F1 = 1.0
LOF:      precision = 1.0  recall = 1.0  F1 = 1.0
