<a href="https://colab.research.google.com/github/phananh285/IDSusingMachineLearning/blob/main/IDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: Mount my drive

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score



In [None]:
file1_path = '/content/drive/MyDrive/IDS_using_ML/UNSW_NB15_testing-set.csv'  # Update with correct path
file2_path = '/content/drive/MyDrive/IDS_using_ML/UNSW_NB15_training-set.csv'  # Update with correct path
train_dl = pd.read_csv(file1_path)
test_dl = pd.read_csv(file2_path)

In [None]:
columns_to_keep = ['ct_dst_sport_ltm','ct_src_dport_ltm','dur','proto','state','spkts','dpkts','sbytes','dbytes','sttl','dttl','sload','dload','sloss','dloss','synack','ackdat','smean','dmean','tcprtt']
train_dl = train_dl[columns_to_keep]
test_dl = test_dl[columns_to_keep]
train_dl.head()
test_dl.shape

(175341, 20)

In [None]:

proto_mapping = {
'tcp': 6,
'udp': 17,
'arp': 2054,
'ospf': 89,
'icmp': 1,
'igmp': 2,
                 'rtp': 200,
                  'ddp': 36,
                  'ipv6-frag': 44,
                  'cftp': 62,
                  'wsn': 78,
                  'pvp': 20,
                  'wb-expak': 79,
                  'mtp': 92,
                  'pri-enc': 66,
                  'sat-mon': 102,
                  'cphb': 73,
                  'sun-nd': 76,
                  'iso-ip': 4,
                  'xtp': 36,
                  'il': 40,
                  'unas': 80,
                  'mfe-nsp': 31,
                  '3pc': 34,
                  'ipv6-route': 43,
                  'idrp': 45,
                  'bna': 49,
                  'swipe': 53,
                  'kryptolan': 41,
                  'cpnx': 67,
                  'rsvp': 46,
                  'wb-mon': 81,
                  'vmtp': 81,
                  'ib': 35,
                  'dgp': 86,
                  'eigrp': 88,
                  'ax.25': 93,
                  'gmtp': 100,
                  'pnni': 102,
                  'sep': 65,
                  'pgm': 113,
                  'idpr-cmtp': 39,
                  'zero': 0,
                  'rvd': 66,
                  'mobile': 55,
                  'narp': 54,
                  'fc': 133,
                  'pipe': 108,
                  'ipcomp': 108,
                  'ipv6-no': 59,
                  'sat-expak': 64,
                  'ipv6-opts': 60,
                  'snp': 109,
                  'ipcv': 111,
                  'br-sat-mon': 76,
                  'ttp': 84,
                  'tcf': 87,
                  'nsfnet-igp': 58,
                  'sprite-rpc': 90,
                  'aes-sp3-d': 104,
                  'sccopmce': 128,
                  'sctp': 132,
                  'qnx': 105,
                  'scps': 128,
                  'etherip': 97,
                  'aris': 108,
                  'pim': 103,
                  'compaq-peer': 110,
                  'vrrp': 112,
                  'iatp': 117,
                  'stp': 118,
                  'l2tp': 115,
                  'srp': 119,
                  'sm': 122,
                  'isis': 124,
                  'smp': 121,
                  'fire': 125,
                  'ptp': 123,
                  'crtp': 126,
                  'sps': 130,
                  'merit-inp': 128,
                  'idpr': 28,
                  'skip': 57,
                  'any': 0,
                  'larp': 91,
                  'ipip': 4,
                  'micp': 95,
                  'encap': 98,
                  'ifmp': 101,
                  'tp++': 39,
                  'a/n': 47,
                  'ipv6': 41, 'i-nlsp': 52, 'ipx-n-ip': 111, 'sdrp': 42, 'tlsp': 56, 'gre': 47, 'mhrp': 48,
                  'ddx': 103, 'ippc': 121, 'visa': 70, 'secure-vmtp': 82, 'uti': 120, 'vines': 53, 'crudp': 76,
                  'iplt': 129, 'ggp': 3, 'ip': 0, 'ipnip': 4, 'st2': 5, 'argus': 11, 'bbn-rcc': 10, 'egp': 8, 'emcon': 14,
                  'igp': 9, 'nvp': 11, 'pup': 12, 'xnet': 15, 'chaos': 16, 'mux': 18, 'dcn': 19, 'hmp': 20, 'prm': 21,
                  'trunk-1': 23, 'xns-idp': 22, 'leaf-1': 24, 'leaf-2': 25, 'rdp': 27, 'irtp': 28, 'iso-tp4': 29,
                  'netblt': 30, 'trunk-2': 31, 'cbt': 7

}

# Create a mapping for state (order matters for numerical encoding)
state_mapping = {
    'FIN': 0,
    'INT': 1,
    'CON': 2,
    'ECO': 3,
    'REQ': 4,
    'RST': 5,
    'PAR': 6,
    'URN': 7,
    'no': 8
    # Add other states in the desired order
}


def encode_data(df):
  # Convert 'proto' to numerical values using the mapping
  df['proto'] = df['proto'].map(proto_mapping).fillna(-1) #Fill NaN with -1 or handle it according to your needs.

  # Convert 'state' to numerical values using the mapping
  df['state'] = df['state'].map(state_mapping).fillna(-1) #Fill NaN with -1 or handle it according to your needs.

  return df


# Apply the encoding function to both training and testing data
train_dl = encode_data(train_dl)
test_dl = encode_data(test_dl)

train_dl.head()
test_dl.head()

Unnamed: 0,ct_dst_sport_ltm,ct_src_dport_ltm,dur,proto,state,spkts,dpkts,sbytes,dbytes,sttl,dttl,sload,dload,sloss,dloss,synack,ackdat,smean,dmean,tcprtt
0,1,1,0.121478,6,0,6,4,258,172,252,254,14158.94238,8495.365234,0,0,0.0,0.0,43,43,0.0
1,1,1,0.649902,6,0,14,38,734,42014,62,252,8395.112305,503571.3125,2,17,0.0,0.0,52,1106,0.0
2,1,1,1.623129,6,0,8,16,364,13186,62,252,1572.271851,60929.23047,1,6,0.061458,0.050439,46,824,0.111897
3,1,1,1.681642,6,0,12,12,628,770,62,252,2740.178955,3358.62207,1,3,0.0,0.0,52,64,0.0
4,1,2,0.449454,6,0,10,6,534,268,254,252,8561.499023,3987.059814,2,1,0.071147,0.057234,53,45,0.128381
