# SVM
Laden des auf IDS18 trainierten Modells  
Test auf IDS17

In [None]:
import os
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

In [None]:
# Maximale Zeilen und Spalten anzeigen
pd.set_option('display.max_rows', None)  # Zeilen
pd.set_option('display.max_columns', None)  # Spalten

# Logging Parameter
logging.basicConfig(
    #filename='',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Verzeichnis der CSV-Dateien
parquet_verzeichnis_ids17 = '../01_Datensaetze/improved_cic-ids-2017/ids17_parquet'
parquet_verzeichnis_ids18 = '../01_Datensaetze/improved_cse-cic-ids-2018/ids18_parquet'

### Laden von IDS17

In [None]:
# IDS17 Datensatz einlesen
ids17 = pd.read_parquet(os.path.join(parquet_verzeichnis_ids17 + '_prep_0'))
logging.info("Class distribution\n{}".format(ids17.Label.value_counts()))

In [None]:
print(ids17.shape)
print(ids17.columns)

### Trennen von Features und Labels

In [None]:
X = ids17.iloc[:, :-1]  # Alle Spalten außer der letzten
print(f"Form von X: {X.shape}")
y = ids17.iloc[:, -1]   # Die letzte Spalte
print(f"Form von y: {y.shape}")

### Label Encoding für y

In [None]:
label_encoder = joblib.load('label_encoder.pkl')
y_encoded = label_encoder.fit_transform(y)

print(f"Einzigartige Labels: {label_encoder.classes_}")
print(f"Kodierte Labels: {np.unique(y_encoded)}")

### Skallierung von X

In [None]:
scaler = joblib.load('scaler.pkl')
X_scaled = scaler.fit_transform(X)
print(f"Form von X: {X.shape}")
print(f"Form von X_scaled: {X_scaled.shape}")

In [None]:
X_test, y_test = X_scaled, y_encoded
X_test.shape, y_test.shape

### Überprüfen der Klassenverteilung

In [None]:
def print_class_distribution(y, dataset_name):
    unique, counts = np.unique(y, return_counts=True)
    total = len(y)
    print(f"Klassenverteilung in {dataset_name}:")
    for cls, count in zip(unique, counts):
        print(f"  Klasse {cls}: {count} Beispiele ({(count/total)*100:.2f}%)")
    print()

print_class_distribution(y_test, "Testdatensatz")

### Laden des Modells

In [None]:
model = joblib.load('decision_tree_model.pkl')

### Evaluierung des Modells auf dem Testdatensatz

In [None]:
y_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)
logging.info(f"Test-Accuracy: {test_accuracy * 100:.2f}%")
logging.info("\nTest Classification Report:\n" + test_report)

### Klassifikationsbericht und eine Konfusionsmatrix

In [None]:
# Klassifikationsbericht
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

# Konfusionsmatrix
cm = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(14, 12))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Vorhergesagte Klasse')
plt.ylabel('Wahre Klasse')
plt.title('Konfusionsmatrix')
plt.show()
