In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, DBSCAN
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler

In [None]:
## Взять эмбедеры из main.py

In [None]:
logs = [
    "ERROR: failed to connect to database",
    "INFO: user login successful",
    "WARN: disk space low",
    # ... много логов ...
]

In [None]:
embedder = TfidfEmbedder(max_features=5000)
log_embeddings = embedder.embed(logs)

In [None]:
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(log_embeddings)

In [None]:
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(log_embeddings)

In [None]:
scaler = StandardScaler()
embeddings_scaled = scaler.fit_transform(log_embeddings)

In [None]:
iso_forest = IsolationForest(contamination=0.05, random_state=42)
anomaly_scores_if = iso_forest.fit_predict(embeddings_scaled)

In [None]:
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
anomaly_scores_lof = lof.fit_predict(embeddings_scaled)

In [None]:
ocsvm = OneClassSVM(nu=0.05, kernel='rbf', gamma='scale')
ocsvm.fit(embeddings_scaled)
anomaly_scores_ocsvm = ocsvm.predict(embeddings_scaled)

In [None]:
def plot_clusters(embeddings, labels, title='Кластеры'):
    from sklearn.decomposition import PCA
    pca = PCA(n_components=2)
    emb_2d = pca.fit_transform(embeddings)
    plt.figure(figsize=(8,6))
    scatter = plt.scatter(emb_2d[:,0], emb_2d[:,1], c=labels, cmap='tab10')
    plt.legend(*scatter.legend_elements(), title="Кластеры")
    plt.title(title)
    plt.show()

In [None]:
plot_clusters(log_embeddings, clusters, title='KMeans Кластеры логов')
plot_clusters(log_embeddings, dbscan_labels, title='DBSCAN Кластеры логов')

In [None]:
def print_anomalies(logs, scores, method_name):
    print(f"\nАномалии по методу {method_name}:")
    for log, score in zip(logs, scores):
        if score == -1:
            print(f"  {log}")

In [None]:
print_anomalies(logs, anomaly_scores_if, "Isolation Forest")
print_anomalies(logs, anomaly_scores_lof, "Local Outlier Factor")
print_anomalies(logs, anomaly_scores_ocsvm, "One-Class SVM")