In [9]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import mlflow


In [10]:
df = pd.read_csv("../data/logs.csv")
df.head()


Unnamed: 0,timestamp,device_id,error_code,cpu_temp,bandwidth,status
0,2025-06-11 14:52:30.318750,device_0,366,54.87,150.88,OK
1,2025-06-11 14:52:20.318750,device_1,613,43.19,936.3,WARNING
2,2025-06-11 14:52:10.318750,device_2,292,42.88,558.25,OK
3,2025-06-11 14:52:00.318750,device_3,352,80.43,663.77,WARNING
4,2025-06-11 14:51:50.318750,device_4,837,46.22,600.56,OK


In [11]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Label encoding (status: OK → 0, WARNING → 1, ERROR → 2)
le = LabelEncoder()
df['status_encoded'] = le.fit_transform(df['status'])

# Özellikler
features = ['cpu_temp', 'bandwidth']
X = df[features]
y = df['status_encoded']

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [12]:
iso_model = IsolationForest(contamination=0.1, random_state=42)
df['anomaly'] = iso_model.fit_predict(X_scaled)
# -1 = anomali, 1 = normal


In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

clf = XGBClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred, target_names=le.classes_))


              precision    recall  f1-score   support

       ERROR       0.00      0.00      0.00      1970
          OK       0.70      1.00      0.83     14066

    accuracy                           0.70     20000
   macro avg       0.29      0.33      0.28     20000
weighted avg       0.53      0.70      0.58     20000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [14]:
# XGBoost modelini logladık
mlflow.set_experiment("OpsGuardian-ML")

with mlflow.start_run():
    mlflow.log_param("model", "XGBoost")
    mlflow.log_metric("accuracy", clf.score(X_test, y_test))
    mlflow.sklearn.log_model(clf, "xgb_model")




In [15]:
# Isolation Forest modelini logla
with mlflow.start_run(run_name="isolation_forest"):
    mlflow.log_param("model", "IsolationForest")
    mlflow.log_param("contamination", 0.1)
    mlflow.sklearn.log_model(iso_model, "anomaly_detector_model")


