In [1]:
!pip install pandas tensorflow scikit-learn




In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [6]:
from sklearn.datasets import fetch_kddcup99
import pandas as pd

# Load KDDCup99 dataset from sklearn
kddcup99 = fetch_kddcup99(subset='SA', shuffle=True, percent10=True)

# Column names (remove 'label' from here since it's provided separately)
cols = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
        'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins',
        'logged_in', 'num_compromised', 'root_shell', 'su_attempted',
        'num_root', 'num_file_creations', 'num_shells', 'num_access_files',
        'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
        'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate',
        'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate',
        'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate',
        'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
        'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
        'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate']

# Create DataFrame from the data (41 features)
df = pd.DataFrame(kddcup99.data, columns=cols)

# Add the label column separately
df['label'] = kddcup99.target

# Display first few rows
df.head()


Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,b'tcp',b'http',b'SF',203,2674,0,0,0,0,...,255,1.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,b'normal.'
1,0,b'udp',b'private',b'SF',105,147,0,0,0,0,...,238,0.93,0.01,0.01,0.0,0.0,0.0,0.0,0.0,b'normal.'
2,0,b'tcp',b'http',b'SF',209,1262,0,0,0,0,...,255,1.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,b'normal.'
3,0,b'tcp',b'smtp',b'SF',1118,333,0,0,0,0,...,170,0.62,0.02,0.0,0.01,0.0,0.0,0.04,0.05,b'normal.'
4,0,b'tcp',b'http',b'SF',366,16222,0,0,0,0,...,255,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,b'normal.'


In [7]:
# Encoding categorical columns
le_protocol = LabelEncoder()
df['protocol_type'] = le_protocol.fit_transform(df['protocol_type'])

le_service = LabelEncoder()
df['service'] = le_service.fit_transform(df['service'])

le_flag = LabelEncoder()
df['flag'] = le_flag.fit_transform(df['flag'])

# Encode labels (attack types)
df['label'] = df['label'].apply(lambda x: 0 if x == 'normal.' else 1)

# Split features and labels
X = df.drop('label', axis=1)
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [8]:
# Build neural network model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))  # Input layer
model.add(Dense(32, activation='relu'))  # Hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9931 - loss: 0.0517 - val_accuracy: 1.0000 - val_loss: 4.2473e-05
Epoch 2/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 1.0000 - loss: 2.8807e-05 - val_accuracy: 1.0000 - val_loss: 9.4130e-06
Epoch 3/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 1.0000 - loss: 7.2125e-06 - val_accuracy: 1.0000 - val_loss: 3.3723e-06
Epoch 4/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 1.0000 - loss: 2.7140e-06 - val_accuracy: 1.0000 - val_loss: 1.4337e-06
Epoch 5/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 1.0000 - loss: 1.1775e-06 - val_accuracy: 1.0000 - val_loss: 6.5995e-07
Epoch 6/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 1.0000 - loss: 5.4906e-07 - val_accuracy: 1.

<keras.src.callbacks.history.History at 0x7d04aceb85b0>

In [9]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy:.4f}')

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Model accuracy: {accuracy:.4f}')


[1m630/630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 2.0859e-08
Test accuracy: 1.0000
[1m630/630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Model accuracy: 1.0000
