<a href="https://colab.research.google.com/github/singh-azad/project/blob/main/ddos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Detection of DDoS attack using Attention base Machine Learning**

## **Libraries imported**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

## **Dataset**

In [None]:
df = pd.read_csv('/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

## **Data** **Preprocessing**

**Converting IP adddresses into int**

In [None]:
import ipaddress

In [None]:
df['NumericalSourceIP'] = df[' Source IP'].apply(lambda x: int(ipaddress.IPv4Address(x)))
df['NumericalDestinationIP'] = df[' Destination IP'].apply(lambda x: int(ipaddress.IPv4Address(x)))

**Converting timestamp to int and storing in 'Timestamp'**

In [None]:
df['Timestamp'] = pd.to_datetime(df[' Timestamp']).astype(int) / 10**9

**Before droping the columns in dataframe**

In [None]:
df.columns

**droping the columns Flow Id, Source IP, Destination IP, Timestamp**

In [None]:
columns_to_drop = ['Flow ID', ' Source IP', ' Destination IP', ' Timestamp']
df = df.drop(columns_to_drop, axis=1)

**After droping the columns**

In [None]:
df.columns

**Deleting infinite values and null values**

In [None]:
df = df[~np.isinf(df['Flow Bytes/s'])]
df.dropna(inplace=True)

In [None]:
X = df.drop(' Label', axis=1)
y = df[' Label']

**Spliting the dataset in *train* and *test* set**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Checking the shape of the data set**

In [None]:
num_columns = X.shape[1]
print(num_columns)

### **Features selection**


>**Best 15 features is selected and stored in X_train_selected and X_test_selected**



In [None]:
k = 15  # Number of features to select
selector = SelectKBest(score_func=f_classif, k=k)
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)



> **Selected features**



In [None]:
selected_feature_names = X.columns[selector.get_support()]
selected_feature_names

## **KNN**

---



> **Model**


In [None]:
from sklearn.neighbors import KNeighborsClassifier


res1 = time.time()

# Create a K-Nearest Neighbors classifier
knn = KNeighborsClassifier()
knn = knn.fit(X_train_selected , y_train)
knn

res2 = time.time()

print('KNN took ',res2-res1,'seconds')

> **Validation**

In [None]:
#validating with kfold method
# Define the number of folds (K)
k = 5

# Create a K-Fold cross-validator
kf = KFold(n_splits=k)

# Perform K-fold cross-validation
scores = cross_val_score(knn, X_train_selected, y_train, cv=kf)

# Print the accuracy for each fold
for fold_idx, score in enumerate(scores):
    print(f"Fold {fold_idx + 1} accuracy: {score}")

# Compute the mean accuracy and standard deviation across all folds
mean_accuracy = np.mean(scores)

print(f"\nMean accuracy: {mean_accuracy}")

> **Testing Model**

In [None]:
#testing 

y_pred1 = knn.predict(X_test_selected)

print('Accuracy score= {:.8f}'.format(knn.score(X_test_selected, y_test)))

> **Precision**

In [None]:
# Calculate the precision score
precision = precision_score(y_test, y_pred1, average='weighted')
print("Precision score:", precision)

> **Recall**

In [None]:
# Calculate recall
recall = recall_score(y_test , y_pred1, pos_label='DDoS')

print("Recall:", recall)

>**F1** **Score**

In [None]:
# Calculate the F1 score
f1 = f1_score(y_test, y_pred1, pos_label='DDoS')

print("F1 Score:", f1)


> **Confusion Matrix**



In [None]:
# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred1)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)

## **Random Forest**



> **Model** 



In [None]:
res1 = time.time()

rf = RandomForestClassifier()
rf.fit(X_train_selected , y_train)
res2 = time.time()
print('RandomForest  took ',res2-res1,'seconds')



> **Validation**



In [None]:
#validating with kfold method
# Define the number of folds (K)
k = 5

# Create a K-Fold cross-validator
kf = KFold(n_splits=k)

# Perform K-fold cross-validation
scores = cross_val_score(rf, X_train_selected, y_train, cv=kf)

# Print the accuracy for each fold
for fold_idx, score in enumerate(scores):
    print(f"Fold {fold_idx + 1} accuracy: {score}")

# Compute the mean accuracy and standard deviation across all folds
mean_accuracy = np.mean(scores)

print(f"\nMean accuracy: {mean_accuracy}")



> **Testing** **Model**



In [None]:
y_pred1 = rf.predict(X_test_selected)

#Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred1)
print("Accuracy score:", accuracy)




> **Precision**



In [None]:
# Calculate the precision score
precision = precision_score(y_test, y_pred1, average='weighted')
print("Precision score:", precision)




> **Recall**



In [None]:
# Calculate recall
recall = recall_score(y_test , y_pred1, pos_label='DDoS')

print("Recall:", recall)



> **F1** **Score**



In [None]:
# Calculate the F1 score
f1 = f1_score(y_test, y_pred1, pos_label='DDoS')

print("F1 Score: ", f1)



> **Confusion Matrix**



In [None]:
# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred1)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)

## **NB**



> **Model** 



In [None]:
from sklearn.naive_bayes import GaussianNB

res1 = time.time()

# Create a Gaussian Naive Bayes classifier
gnb = GaussianNB()

gnb.fit(X_train_selected , y_train)

res2 = time.time()

print('GNB  took ',res2-res1,'seconds')



> **Validation**



In [None]:
#validating with kfold method
# Define the number of folds (K)
k = 5

# Create a K-Fold cross-validator
kf = KFold(n_splits=k)

# Perform K-fold cross-validation
scores = cross_val_score(gnb, X_train_selected, y_train, cv=kf)

# Print the accuracy for each fold
for fold_idx, score in enumerate(scores):
    print(f"Fold {fold_idx + 1} accuracy: {score}")

# Compute the mean accuracy and standard deviation across all folds
mean_accuracy = np.mean(scores)

print(f"\nMean accuracy: {mean_accuracy}")



> **Testing** **Model**



In [None]:
y_pred1 = gnb.predict(X_test_selected)

#Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred1)
print("Accuracy score:", accuracy)




> **Precision**



In [None]:
# Calculate the precision score
precision = precision_score(y_test, y_pred1, average='weighted')
print("Precision score:", precision)




> **Recall**



In [None]:
# Calculate recall
recall = recall_score(y_test , y_pred1, pos_label='DDoS')

print("Recall:", recall)



> **F1** **Score**



In [None]:
# Calculate the F1 score
f1 = f1_score(y_test, y_pred1, pos_label='DDoS')

print("F1 Score: ", f1)



> **Confusion Matrix**



In [None]:
# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred1)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)

## **SVM**



> **Model** 



In [None]:
from sklearn import svm

res1 = time.time()

# Create an SVM classifier
clf = svm.SVC(kernel='linear')

clf.fit(X_train_selected , y_train)

res2 = time.time()

print('SVM  took ',res2-res1,'seconds')



> **Validation**



In [None]:
#validating with kfold method
# Define the number of folds (K)
k = 5

# Create a K-Fold cross-validator
kf = KFold(n_splits=k)

# Perform K-fold cross-validation
scores = cross_val_score(clf, X_train_selected, y_train, cv=kf)

# Print the accuracy for each fold
for fold_idx, score in enumerate(scores):
    print(f"Fold {fold_idx + 1} accuracy: {score}")

# Compute the mean accuracy and standard deviation across all folds
mean_accuracy = np.mean(scores)

print(f"\nMean accuracy: {mean_accuracy}")



> **Testing** **Model**



In [None]:
y_pred1 = clf.predict(X_test_selected)

#Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred1)
print("Accuracy score:", accuracy)




> **Precision**



In [None]:
# Calculate the precision score
precision = precision_score(y_test, y_pred1, average='weighted')
print("Precision score:", precision)




> **Recall**



In [None]:
# Calculate recall
recall = recall_score(y_test , y_pred1, pos_label='DDoS')

print("Recall:", recall)



> **F1** **Score**



In [None]:
# Calculate the F1 score
f1 = f1_score(y_test, y_pred1, pos_label='DDoS')

print("F1 Score: ", f1)



> **Confusion Matrix**



In [None]:
# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred1)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)