**DEP TASK 4**


---


**Data Collection And preprocessing**

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

In [None]:
# Load the network traffic data
data = pd.read_csv('/content/network_traffic_data.csv')

In [None]:
numerical_cols = data.select_dtypes(include=['number']).columns

# Scale numerical columns only
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

In [None]:
# Example feature extraction
data['packet_size_diff'] = data['PacketCount'].diff().fillna(0)
data['time_interval'] = data['Duration'].diff().fillna(0)

**Using Isolalation Forest Algo**

In [None]:

# Train Isolation Forest model
model = IsolationForest(contamination=0.01)
model.fit(data[numerical_cols])

# Predict anomalies
data['anomaly'] = model.predict(data[numerical_cols])

In [None]:
data['anomaly'] = data['anomaly'].map({-1: 'Attack', 1: 'Normal'})

ground_truth = data['Label']

print(classification_report(ground_truth, data['anomaly']))

              precision    recall  f1-score   support

      Attack       0.55      0.01      0.02      1022
      Normal       0.49      0.99      0.66       978

    accuracy                           0.49      2000
   macro avg       0.52      0.50      0.34      2000
weighted avg       0.52      0.49      0.33      2000



**Using Autoencoder Algo**

In [33]:
pip install tensorflow



In [41]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.model_selection import train_test_split
import numpy as np

In [38]:
X_train, X_test = train_test_split(data[numerical_cols], test_size=0.2, random_state=42)

In [45]:
# Define the autoencoder architecture
input_dim = X_train.shape[1]
encoding_dim = 14  # Dimension of the latent space

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
decoder = Dense(input_dim, activation="sigmoid")(encoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the autoencoder
history = autoencoder.fit(X_train, X_train,
                          epochs=50,
                          batch_size=16,
                          validation_data=(X_test, X_test),
                          shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [46]:
# Reconstruct the test data
X_test_pred = autoencoder.predict(X_test)

# Calculate the reconstruction error
mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)

# Determine a threshold for anomaly detection
threshold = np.percentile(mse, 95)

# Identify anomalies
anomalies = mse > threshold



In [47]:
ground_truth = data['Label'][len(X_train):]  # Use the test set portion

anomalies_str = np.where(anomalies, 'Attack', 'Normal')

print(classification_report(ground_truth, anomalies_str))

              precision    recall  f1-score   support

      Attack       0.50      0.05      0.09       198
      Normal       0.51      0.95      0.66       202

    accuracy                           0.51       400
   macro avg       0.50      0.50      0.38       400
weighted avg       0.50      0.51      0.38       400

