# CICIDS2017 - Détection avec Random Forest

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## 🔹 Chargement des données simulées

In [None]:
sample_data = {'Protocol': ['TCP', 'UDP', 'TCP', 'ICMP', 'UDP'], 'Src Port': [12345, 53, 443, 0, 67], 'Dst Port': [80, 80, 22, 8, 53], 'Flow Duration': [10000, 5000, 20000, 15000, 8000], 'Tot Fwd Pkts': [10, 5, 20, 15, 8], 'Tot Bwd Pkts': [15, 3, 25, 10, 9], 'Label': ['BENIGN', 'BENIGN', 'DoS', 'BENIGN', 'DoS']}
df = pd.DataFrame(sample_data)
df.head()

## 🔹 Encodage des variables catégorielles

In [None]:
label_enc = LabelEncoder()
df['Protocol'] = label_enc.fit_transform(df['Protocol'])
df['Label'] = label_enc.fit_transform(df['Label'])

## 🔹 Séparation des features et de la cible + normalisation

In [None]:
X = df.drop(columns=['Label'])
y = df['Label']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## 🔹 Séparation des données en train/test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.4, random_state=42)

## 🔹 Entraînement du modèle Random Forest

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

## 🔹 Prédiction et évaluation du modèle

In [None]:
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

## 🔹 Affichage de la matrice de confusion

In [None]:
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', xticklabels=['BENIGN', 'DoS'], yticklabels=['BENIGN', 'DoS'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()