In [None]:
# ml_anomaly_detection.ipynb
# Esempio con Isolation Forest su dataset NSL-KDD (o simile)

import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Carica dataset (ad esempio nsl_kdd_train.csv e nsl_kdd_test.csv)
train_df = pd.read_csv('nsl_kdd_train.csv')
test_df = pd.read_csv('nsl_kdd_test.csv')

# Preprocessing: encoding di colonne categoriche (protocol_type, service, flag, ecc.)
cat_cols = ['protocol_type', 'service', 'flag']
for c in cat_cols:
    le = LabelEncoder()
    train_df[c] = le.fit_transform(train_df[c])
    test_df[c] = le.transform(test_df[c])

# Separiamo feature e label
# Supponendo che la colonna "label" sia tipo "normal" vs "attack"
y_train = train_df['label'].apply(lambda x: 0 if x=='normal' else 1)
y_test = test_df['label'].apply(lambda x: 0 if x=='normal' else 1)

X_train = train_df.drop(['label'], axis=1)
X_test = test_df.drop(['label'], axis=1)

# Standardizzazione
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# IsolationForest di base
iso_forest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
iso_forest.fit(X_train_scaled)

# Predizione: -1 = anomalia, 1 = normale
y_pred_train = iso_forest.predict(X_train_scaled)
y_pred_test = iso_forest.predict(X_test_scaled)

# Convertiamo -1/1 in 1/0 per confrontarci con y_test
# In anomaly detection: -1 → anomalia (1), 1 → normale (0)
y_pred_test_converted = [1 if x==-1 else 0 for x in y_pred_test]

print(confusion_matrix(y_test, y_pred_test_converted))
print(classification_report(y_test, y_pred_test_converted))
