# **ARP Spoof Detection - ML MODEL**

## Reading the CSV file and installing importing dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, TimeDistributed, Flatten

In [None]:
import tensorflow
from tensorflow.keras.layers import InputLayer, GRU, BatchNormalization, Conv1D, MaxPooling1D, Bidirectional, Dense, Flatten, PReLU, LSTM, ReLU, LeakyReLU
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import keras

In [40]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/arpcode.csv')

In [None]:
print(df.head())

       Time                 Source        Destination Protocol  Length  Info
0  0.000000  PCSSystemtec_3a:59:87  7e:57:ab:7a:2b:f5      ARP      42     0
1  2.001191  PCSSystemtec_3a:59:87  7e:57:ab:7a:2b:f5      ARP      42     0
2  4.002254  PCSSystemtec_3a:59:87  7e:57:ab:7a:2b:f5      ARP      42     0
3  6.002604  PCSSystemtec_3a:59:87  7e:57:ab:7a:2b:f5      ARP      42     0
4  8.003296  PCSSystemtec_3a:59:87  7e:57:ab:7a:2b:f5      ARP      42     0


## Making the Model (Wrapping CNN in Time Distributed Layers using CNN-LSTM Architecture)

In [41]:
sequence_length = 10
window_size = 1
feature_count = 3
lrcn_model = Sequential()

# first convolutional block
lrcn_model.add(InputLayer(input_shape=(window_size, feature_count)))
lrcn_model.add(Conv1D(filters=32, kernel_size=3, padding='causal', activation='ReLU'))
lrcn_model.add(LeakyReLU())
lrcn_model.add(MaxPooling1D(pool_size=1))
lrcn_model.add(BatchNormalization())

#Adding LSTM layers
lrcn_model.add(LSTM(30, activation='relu', dropout=0.3))
lrcn_model.add(BatchNormalization())

# output dense layer
lrcn_model.add(Dense(1, activation='sigmoid'))
lrcn_model.compile(optimizer='adagrad', loss='binary_crossentropy', metrics=['accuracy'])
lrcn_model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_10 (Conv1D)          (None, 1, 32)             320       
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 1, 32)             0         
                                                                 
 max_pooling1d_6 (MaxPoolin  (None, 1, 32)             0         
 g1D)                                                            
                                                                 
 batch_normalization_12 (Ba  (None, 1, 32)             128       
 tchNormalization)                                               
                                                                 
 lstm_6 (LSTM)               (None, 30)                7560      
                                                                 
 batch_normalization_13 (Ba  (None, 30)              

## Splitting the Dataset into Testing and Training

In [46]:
df['Time'] = pd.to_datetime(df['Time'], unit='s')

df['HourOfDay'] = df['Time'].dt.hour + df['Time'].dt.minute / 60 + df['Time'].dt.second / 3600

df['TimeSin'] = np.sin(2 * np.pi * df['HourOfDay']/24.0)
df['TimeCos'] = np.cos(2 * np.pi * df['HourOfDay']/24.0)

y = df['Info']
X = df[['Length', 'TimeSin', 'TimeCos']]

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape((2528,1,3))
X_test = X_test.reshape((632, 1, 3))

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2528, 1, 3) (632, 1, 3) (2528,) (632,)


## Training and Testing the data

In [47]:
history = lrcn_model.fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
model_preds = lrcn_model(X_test)

In [45]:
len(model_preds), len(y_test)

(632, 632)

In [49]:
for x, y in zip(model_preds, y_test):
  if y==1:
    print(x, y)

tf.Tensor([[0.28170305]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.2264389]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.29738575]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.11445358]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.2858905]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.2914685]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.28292933]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.27509218]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.23596704]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.30708066]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.20760183]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.28070927]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.18611397]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.27509218]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.30373457]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.29308727]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.15655237]], shape=(1, 1), dtype=float32) 1
tf.Tensor([[0.25217062]], shape=(1

In [None]:
a = df[['Length', 'TimeSin', 'TimeCos', 'Info']]
a['Info'].value_counts()

0    2894
1     266
Name: Info, dtype: int64

## Result

In [53]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100, random_state=42)

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

clf.fit(X_train_flat, y_train)
y_pred = clf.predict(X_test_flat)

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:",100*accuracy,"%")


Accuracy: 91.93037974683544 %
