# Internet Firewall

## Isometric mapping (Isomap)

In [3]:
# IMPORT DEPENDENCIES 
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import sklearn
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression 
from matplotlib.colors import ListedColormap
import mpl_toolkits.mplot3d 
import plotly.express as px
import plotly.graph_objects as go
import plotly as plty
from sklearn.manifold import Isomap
from scipy import sparse 
from sklearn.neighbors import radius_neighbors_graph

### Load data

In [4]:
# READ DATA 
internet_firewall = pd.read_csv('/content/log2.csv') 
#internet_firewall[internet_firewall.isnull().any(axis=1)]
internet_firewall

Unnamed: 0,Source Port,Destination Port,NAT Source Port,NAT Destination Port,Action,Bytes,Bytes Sent,Bytes Received,Packets,Elapsed Time (sec),pkts_sent,pkts_received
0,57222,53,54587,53,allow,177,94,83,2,30,1,1
1,56258,3389,56258,3389,allow,4768,1600,3168,19,17,10,9
2,6881,50321,43265,50321,allow,238,118,120,2,1199,1,1
3,50553,3389,50553,3389,allow,3327,1438,1889,15,17,8,7
4,50002,443,45848,443,allow,25358,6778,18580,31,16,13,18
...,...,...,...,...,...,...,...,...,...,...,...,...
65527,63691,80,13237,80,allow,314,192,122,6,15,4,2
65528,50964,80,13485,80,allow,4680740,67312,4613428,4675,77,985,3690
65529,54871,445,0,0,drop,70,70,0,1,0,1,0
65530,54870,445,0,0,drop,70,70,0,1,0,1,0


In [5]:
# SUBSET DATA 
internet_firewall_S = internet_firewall.sample(n=20000) 

In [6]:
# SPLIT DATA INTO TEST AND TRAIN SETS 
features = ['Destination Port', 'NAT Source Port', 'Elapsed Time (sec)', 'pkts_received'] #top 4
X = internet_firewall_S[features]
y = internet_firewall_S[['Action']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
X_train.shape

(16000, 4)

### Preprocess data

In [8]:
# STANDARD SCALAR NORMALIZATION OF FEATURES 
sc = MinMaxScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

### Isomap: 3 components

In [7]:
# ISOMAP 
X_train_sparse = sparse.lil_matrix(X_train_sc) 
isomap = Isomap(n_neighbors=150, n_components = 3)
isomap.fit(X_train_sparse)
iso = isomap.transform(X_train_sparse)

In [8]:
print('Shape before Isomap: ', X_train_sc.shape)
print('Shape after Isomap: ', iso.shape)

Shape before Isomap:  (16000, 4)
Shape after Isomap:  (16000, 3)


In [9]:
# NUM FEATURES SEEN DURING FIT
isomap.n_features_in_

4

In [10]:
y_train.reset_index(drop=True, inplace=True)
df1 = pd.DataFrame(data = iso, columns = ['IC1', 'IC2', 'IC3'])
df2 = pd.concat([df1,y_train], axis = 1)
df2

Unnamed: 0,IC1,IC2,IC3,Action
0,-0.051053,-0.209014,0.021633,allow
1,-0.002924,0.602935,0.072265,allow
2,0.091563,-0.132994,0.025543,allow
3,-0.054890,-0.211841,0.022085,allow
4,-0.168449,-0.302818,0.019540,deny
...,...,...,...,...
15995,0.316264,-0.002034,0.005861,allow
15996,0.344004,0.018102,-0.004798,allow
15997,0.616247,0.143374,0.047735,allow
15998,-0.190433,-0.273678,-0.029507,drop


In [14]:
# PLOT COMPONENTS
fig = px.scatter_3d(df2, x='IC1', y='IC2', z='IC3',color='Action', opacity=0.5)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.89,
    xanchor="left",
    x=0.6), title={
        'text': 'Isomap Components',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}) 
fig.show()

## Prediction

In [15]:
# PREDICTION: RF
X_test_iso = isomap.transform(X_test_sc)
y_train.reset_index(drop=True, inplace=True)
classifier = RandomForestClassifier(max_depth=3, random_state=0)
classifier.fit(iso, np.squeeze(y_train))
y_pred = classifier.predict(X_test_iso) #predict 

In [16]:
# EVALUATE PERFORMANCE 
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc21 = accuracy_score(y_test, y_pred)
print('Accuracy: ', acc21)

[[2248   67    0    0]
 [   8  881    3    0]
 [   0    0  790    0]
 [   0    3    0    0]]
Accuracy:  0.97975


In [17]:
# F1-SCORE 
f2a =  f1_score(y_test, y_pred,average='micro') 
f2b = f1_score(y_test, y_pred,average='macro')
f2c = f1_score(y_test, y_pred,average='weighted')
print('F1-score: ',f2a)
print('F1-score: ', f2b)
print('F1-score: ', f2c)

F1-score:  0.97975
F1-score:  0.7344367486406868
F1-score:  0.9795788350879786


In [18]:
# PREDICTION: LR
classifier = LogisticRegression(random_state = 0,max_iter=1000)
classifier.fit(iso, y_train.values.ravel())
y_pred = classifier.predict(X_test_iso) #predict 

In [19]:
# EVALUATE PERFORMANCE 
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc22 =  accuracy_score(y_test, y_pred)
print('Accuracy: ',acc22)

[[2231   36   48    0]
 [   2  828   62    0]
 [   0    0  790    0]
 [   0    3    0    0]]
Accuracy:  0.96225


In [20]:
# F1-SCORES
f2d =  f1_score(y_test, y_pred,average='micro') 
f2e = f1_score(y_test, y_pred,average='macro')
f2f = f1_score(y_test, y_pred,average='weighted')
print('F1-score: ',f2d)
print('F1-score: ', f2e)
print('F1-score: ', f2f)

F1-score:  0.96225
F1-score:  0.7143614585368946
F1-score:  0.962393161461325


### Isomap: 2 components

In [9]:
# ISOMAP 
X_train_sparse = sparse.lil_matrix(X_train_sc) 
isomap = Isomap(n_neighbors=90, n_components = 2)
isomap.fit(X_train_sparse)
iso = isomap.transform(X_train_sparse)

In [10]:
# NUM FEATURES SEEN DURING FIT
isomap.n_features_in_

4

In [13]:
y_train.reset_index(drop=True, inplace=True)
df1 = pd.DataFrame(data = iso, columns = ['IC1', 'IC2'])
df2 = pd.concat([df1,y_train], axis = 1)
df2

Unnamed: 0,IC1,IC2,Action
0,0.306495,-0.023005,allow
1,0.162610,-0.099325,allow
2,0.566546,0.114808,allow
3,0.707663,0.186925,allow
4,0.381052,0.016559,allow
...,...,...,...
15995,-0.217800,-0.279841,drop
15996,-0.164055,-0.310893,deny
15997,0.015234,-0.181556,allow
15998,0.085514,-0.138787,allow


In [19]:
# PLOT COMPONENTS
fig = px.scatter(df2, x='IC1', y='IC2', color='Action', opacity=0.5)
fig.update_layout(title={
        'text': 'Isomap Components',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}) 
fig.show()