In [2]:
import pandas as pd
import os
import numpy as np
# import qgrid

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


pd.set_option("display.max_rows", None)

data_path = './data/snort/'
extracted_path = './extracted/'
alerts_csv = 'alerts.csv'
alerts_csv = 'alerts_with_IP_class.csv'
gen_map = 'gen-msg.map'


### Generate dictionary that maps generator id and sig id to the alerts

In [3]:
# generates sig_generator, sig_id dictionary
with open(os.path.join(data_path, gen_map), 'r') as fp:
    lines = fp.readlines()
    generator_dict = {}

    for line in lines:
        if line[0] == '#' or line[0] == '\n' or line == '\f\n':
            continue
        generatorid, alertid, MSG = line.split(' || ')
        if generatorid in generator_dict:
            generator_dict[generatorid][alertid] = MSG
        else:
            generator_dict[generatorid] = {alertid: MSG}

### Load alerts.csv

In [4]:
df = pd.read_csv(os.path.join(data_path, alerts_csv), sep=',')
# qgrid_widget = qgrid.show_grid(df, show_toolbar=True)
# qgrid_widget
df.head(100)

Unnamed: 0.1,Unnamed: 0,sig_generator,sig_id,timestamp,src,srcport,dst,dstport,proto,msg,src_ip_class,dst_ip_class
0,0,122,4,04/26-02:03:06.136363,10.7.3.154,,52.73.172.8,,,(portscan) TCP Distributed Portscan,internal,external
1,1,129,8,04/26-02:03:07.533370,78.187.72.121,39636.0,151.216.7.36,443.0,TCP,Data sent on stream after TCP Reset sent,external,internal
2,2,129,15,04/26-02:03:07.533371,78.187.72.121,39636.0,151.216.7.36,443.0,TCP,Reset outside window,external,internal
3,3,122,3,04/26-02:03:11.892463,10.7.3.156,,52.73.172.8,,,(portscan) TCP Portsweep,internal,external
4,4,122,3,04/26-02:03:46.305879,10.7.3.154,,52.73.172.8,,,(portscan) TCP Portsweep,internal,external
5,5,129,15,04/26-02:03:48.723141,2a07:1182:300:1:ee65:ccd6:6581:b22,37343.0,2a07:1182:7:1::5,443.0,TCP,Reset outside window,unknown internal,internal
6,6,129,15,04/26-02:03:48.724507,2a07:1182:7:1::5,443.0,2a07:1182:300:1:ee65:ccd6:6581:b22,37343.0,TCP,Reset outside window,internal,unknown internal
7,7,122,4,04/26-02:04:07.115619,10.7.3.154,,52.73.172.8,,,(portscan) TCP Distributed Portscan,internal,external
8,8,129,15,04/26-02:04:13.989437,2a07:1182:7:1::4,995.0,2a07:1182:300:1:ee65:ccd6:6581:b22,52611.0,TCP,Reset outside window,internal,unknown internal
9,9,129,15,04/26-02:04:25.938756,2a07:1182:300:1:ee65:ccd6:6581:b22,38746.0,2a07:1182:7:1::5,443.0,TCP,Reset outside window,unknown internal,internal


### Count different alert types

In [5]:
messages = df['msg'].value_counts().keys().tolist()
counts = df['msg'].value_counts().tolist()

print('count - alert')
for msg, cnt in zip(messages,counts):
    print('{} - {}'.format(cnt,msg))
    
# df_f = df.drop_duplicates(['sig_generator', 'sig_id'])
# sig_gens = df_f['sig_generator'].value_counts().keys().tolist()
# sig_ids = df_f['sig_id'].value_counts().keys().tolist()
# df_f


count - alert
81500 - Reset outside window
76219 - (spp_sip) Maximum dialogs within a session reached
21356 - Consecutive TCP small segments exceeding threshold
15301 - (portscan) TCP Portsweep
10882 - (spp_sip) Content length mismatch
3892 - (portscan) UDP Portsweep
3862 - (http_inspect) NO CONTENT-LENGTH OR TRANSFER-ENCODING IN HTTP RESPONSE
3414 - (http_inspect) INVALID CONTENT-LENGTH OR CHUNK SIZE
3378 - TCP Timestamp is missing
2083 - (portscan) TCP Distributed Portscan
1606 - (portscan) UDP Portscan
1238 - (portscan) TCP Portscan
1148 - (http_inspect) UNESCAPED SPACE IN HTTP URI
1146 - Data sent on stream after TCP Reset sent
835 - TCP Timestamp is outside of PAWS window
637 - (http_inspect) LONG HEADER
205 - Bad segment, adjusted size <= 0
183 - (spp_ssh) Protocol mismatch
117 - (portscan) UDP Distributed Portscan
90 - ACK number is greater than prior FIN
79 - Limit on number of overlapping TCP packets reached
66 - (portscan) TCP Decoy Portscan
49 - (http_inspect) NON-RFC DEFINE

In [6]:
filter = df['src_ip_class'].str.contains('external|unknown')
filter = filter & df['dst_ip_class'].str.contains('internal')
filter = filter & df['msg'].str.contains('TCP Portsweep')
df[filter].head(100)

Unnamed: 0.1,Unnamed: 0,sig_generator,sig_id,timestamp,src,srcport,dst,dstport,proto,msg,src_ip_class,dst_ip_class
3705,3705,122,3,04/26-03:17:42.691601,2a07:1182:300:1:a26c:f9eb:c4af:e15b,,2a07:1182:7:1::8,,,(portscan) TCP Portsweep,unknown internal,internal
9934,9934,122,3,04/26-04:26:45.993265,2a07:1182:7:2:497d:2231:a73:6896,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
15719,15719,122,3,04/26-05:14:03.283518,2a07:1182:7:2:39fd:70fb:5afe:fe99,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
15806,15806,122,3,04/26-05:14:29.403722,2a07:1182:7:2:b9fd:c261:c58f:3c84,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
21737,21737,122,3,04/26-05:56:05.872726,2a07:1182:7:2:e8f9:c0f2:5b73:2c9e,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
23206,23206,122,3,04/26-06:05:09.211147,2a07:1182:7:2:8c0c:7458:591:f48b,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
24774,24774,122,3,04/26-06:14:15.777270,2a07:1182:7:2:2d48:f9b8:d770:5142,,2a07:1182:7:2:20b0:1a78:535:2e80,,,(portscan) TCP Portsweep,unknown internal,unknown internal
25791,25791,122,3,04/26-06:19:38.828367,2a07:1182:300:123:349b:500c:d0b6:f70c,,2a07:1182:7:1::8,,,(portscan) TCP Portsweep,unknown internal,internal
25835,25835,122,3,04/26-06:20:01.652310,2a07:1182:7:2:9c50:c57b:cb06:8450,,2a07:1182:1000:1002::114,,,(portscan) TCP Portsweep,unknown internal,unknown internal
26523,26523,122,3,04/26-06:23:57.720802,2a07:1182:300:123:349b:500c:d0b6:f70c,,2a07:1182:7:1::8,,,(portscan) TCP Portsweep,unknown internal,internal
