## Query Output Preprocessing

The graph database Dgraph returns query results in JSON fromat. The queries consist of getting all `originated` and all `responded` connections of a specified host. The `query_handler` tool converts these JSON outputs to csv files (two csv files for each host with some IP address, one for each connection direction (`originated`, `responded`)). 

This Jupyter notebook is used to:

1. Compute the neighbourhoods of these hosts. *(For each connection compute its neighbourhood from connections in a given time interval.)*
2. Assign labels. *(All connections from day 1 are considered to be "normal traffic", in second day the labels are assigned based on a combination of attacker IP and a timestamp corresponding to a particular attack. The timestamps were discovered using an automated network intrusion detection tool [Snort](https://www.snort.org/).)*
3. Concat DataFrames to one final `df` & write the result to a single file, ready for ML preprocessing (feature engineering).

## Neighbourhood Computation

### 1. Load the data

In [1]:
import pandas as pd
import numpy as np
import os

PREFIX = '/home/sramkova/diploma_thesis_data/test_stats/'
DAY_1_DIR_PATH_ORIG = PREFIX + 'day_1/originated'
DAY_1_DIR_PATH_RESP = PREFIX + 'day_1/responded'
DAY_2_DIR_PATH_ORIG = PREFIX + 'day_2/originated'
DAY_2_DIR_PATH_RESP = PREFIX + 'day_2/responded'


file_list_day1_orig = []
file_list_day1_resp = []
file_list_day2_orig = []
file_list_day2_resp = []

def get_file_names(file_list, dir_path):
    for filename in os.listdir(dir_path):
        # only IPv4: 
        if 'f' not in filename and filename.endswith('.csv'):
            # (if there is an 'f' present in the name of the file, it means that the file contains 
            # connections of a host with IPv6 address)
            file_list.append(filename)

# load filenames to lists:
get_file_names(file_list_day1_orig, DAY_1_DIR_PATH_ORIG)
get_file_names(file_list_day1_resp, DAY_1_DIR_PATH_RESP)
get_file_names(file_list_day2_orig, DAY_2_DIR_PATH_ORIG)
get_file_names(file_list_day2_resp, DAY_2_DIR_PATH_RESP)

print(len(file_list_day1_orig))
print(len(file_list_day1_resp))
print(len(file_list_day2_orig))
print(len(file_list_day2_resp))

218
1483
253
2139


In [2]:
# load as dataframes to a dictionary for easier processing:

# elements of the dictionary are in a form: { host.ip -> df with connections of corresponding host }
dfs_day1_orig = {}
dfs_day1_resp = {}
dfs_day2_orig = {}
dfs_day2_resp = {}

def load_files_to_dfs(dfs_dict, file_list, dir_path, prefix):
    prefix_name = 'output-' + prefix
    for filename in file_list:
        file_ip = filename
        file_ip = file_ip.replace(prefix_name, '').replace('.csv', '')
        df_conns = pd.read_csv(dir_path + '/' + filename)

        df_conns['connection.time'] = pd.to_datetime(df_conns['connection.ts'])
        
        # missing connection.service value means that Zeek wasn't able to extract the service => nulls can 
        # be treated as a new category
        df_conns['connection.service'].fillna('none', inplace = True)

        dfs_dict[file_ip] = df_conns

load_files_to_dfs(dfs_day1_orig, file_list_day1_orig, DAY_1_DIR_PATH_ORIG, 'o-')
load_files_to_dfs(dfs_day1_resp, file_list_day1_resp, DAY_1_DIR_PATH_RESP, 'r-')
load_files_to_dfs(dfs_day2_orig, file_list_day2_orig, DAY_2_DIR_PATH_ORIG, 'o-')
load_files_to_dfs(dfs_day2_resp, file_list_day2_resp, DAY_2_DIR_PATH_RESP, 'r-')

print(len(dfs_day1_orig))
print(len(dfs_day1_resp))
print(len(dfs_day2_orig))
print(len(dfs_day2_resp))

218
1483
253
2139


In [3]:
# check first 5 originated connections of a specified host from day 1:
pd.set_option('display.max_columns', None)
dfs_day1_orig['9.66.44.14'].head()

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time
0,9.66.44.14,0x16da,Cwvmv42aPClat5oybg,SF,0.024747,1586,1838,49741,6,tcp,300,552,80,6,http,2019-03-19T15:44:42.499711Z,104.103.90.39,0,0,1,0,1,[],[],[],[],['POST'],[302],[None],[],[],[],[],['HTTP'],['ea9308cc373d2a262676999bbba6c922'],[],[],"[{'http.method': 'POST', 'http.status_code': 3...",[],[{'file.md5s': {'ea9308cc373d2a262676999bbba6c...,2019-03-19 15:44:42.499711+00:00
1,9.66.44.14,0x176c,CfIPYj19F1yUoE2Yvl,RSTO,17.835625,713,1085,49167,9,tcp,7284,5028,80,4,http,2019-03-19T15:54:05.194616Z,104.18.25.243,0,0,1,0,2,[],[],[],[],['GET'],[0],[None],[],[],[],[],['HTTP'],"['', 'ff4864cfea7f9734be00146e55021a8d']",[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],"[{'file.md5s': {'', 'ff4864cfea7f9734be00146e5...",2019-03-19 15:54:05.194616+00:00
2,9.66.44.14,0x17d1,CbEMi92Gx6JsU5zZbc,SF,108.769572,7611,8263,49282,16,tcp,2168,2580,80,10,http,2019-03-19T15:54:38.082582Z,104.40.210.32,0,0,1,0,4,[],[],[],[],['POST'],[200],[None],[],[],[],[],['HTTP'],"['32023bb33cfb2a1990a4ef2d85b6ac16', '217273a7...",[],[],"[{'http.method': 'POST', 'http.status_code': 2...",[],[{'file.md5s': {'32023bb33cfb2a1990a4ef2d85b6a...,2019-03-19 15:54:38.082582+00:00
3,9.66.44.14,0x17e1,CPqVHM0uObvaUnmF4,SF,111.905415,232,484,49296,6,tcp,868,1040,80,4,http,2019-03-19T15:54:38.889633Z,195.113.232.75,0,0,1,0,1,[],[],[],[],['GET'],[200],[None],[],[],[],[],['HTTP'],['c88a4046b83d858fb75deafc58d654df'],[],[],"[{'http.method': 'GET', 'http.status_code': 20...",[],[{'file.md5s': {'c88a4046b83d858fb75deafc58d65...,2019-03-19 15:54:38.889633+00:00
4,9.66.44.14,0x1824,C9l4Tr2TMCP0gAIwda,SF,109.275102,697,1109,55182,10,tcp,2364,2616,80,6,http,2019-03-19T15:54:41.530321Z,93.184.220.29,0,0,1,0,3,[],[],[],[],['GET'],[200],[None],[],[],[],[],['HTTP'],"['d1bc1c4ff1007d2ab8bae0dd1ddf2458', '0f0f1172...",[],[],"[{'http.method': 'GET', 'http.status_code': 20...",[],[{'file.md5s': {'d1bc1c4ff1007d2ab8bae0dd1ddf2...,2019-03-19 15:54:41.530321+00:00


In [4]:
dfs_day2_orig['9.66.44.14'].head()

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time
0,9.66.44.14,0x1d52,CRLKZR3kjOL9569MMe,SF,151.136956,4707,5359,56562,16,tcp,4854,4877,443,8,ssl,2019-03-20T08:32:44.639334Z,40.115.119.185,0,0,0,1,2,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384'],[None],[None],['SSL'],"['62455357dd57cb80c32ab295743cccc0', 'b5704cd8...",[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'62455357dd57cb80c32ab295743cc...,2019-03-20 08:32:44.639334+00:00
1,9.66.44.14,0x1be54,CPQEr21jC7wcD8tDqk,RSTR,127.179843,4351,4683,56648,8,tcp,4236,4528,443,7,ssl,2019-03-20T14:15:48.459073Z,40.85.78.63,0,0,0,1,2,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384'],[None],[None],['SSL'],"['62455357dd57cb80c32ab295743cccc0', 'b5704cd8...",[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'62455357dd57cb80c32ab295743cc...,2019-03-20 14:15:48.459073+00:00
2,9.66.44.14,0x1d339,CbJA5m48U1gfv6czgh,S0,2.997738,0,104,56640,2,tcp,0,0,443,0,none,2019-03-20T13:27:51.707239Z,65.55.252.93,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 13:27:51.707239+00:00
3,9.66.44.14,0x34be2,C9dqeg4JbYwpPDTTci,S0,0.0,0,48,56524,1,tcp,0,0,443,0,none,2019-03-20T08:16:27.95407Z,65.55.252.93,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 08:16:27.954070+00:00
4,9.66.44.14,0x4e082,CNqJfu3lYUrBHqg6Pl,S0,3.001753,0,104,56637,2,tcp,0,0,443,0,none,2019-03-20T13:27:09.656278Z,65.55.252.93,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 13:27:09.656278+00:00


### 2. Compute neighbourhoods for each row based on a time interval

(e.g. time interval: +- 5 minutes)

In [5]:
# various stat functions on attributes from neighbourhood:

def get_counts(df, prefix):
    # counts (overall + counts of different protocols): 
    proto_tcp_count = 0
    proto_udp_count = 0
    proto_icmp_count = 0
            
    if 'connection.proto' in df:
        proto_counts = df['connection.proto'].value_counts()
        proto_tcp_count = proto_counts['tcp'] if 'tcp' in proto_counts else 0
        proto_udp_count = proto_counts['udp'] if 'udp' in proto_counts else 0
        proto_icmp_count = proto_counts['icmp'] if 'icmp' in proto_counts else 0
    
    return {prefix + '_total': len(df.index),
            prefix + '_proto_tcp_count': proto_tcp_count,
            prefix + '_proto_udp_count': proto_udp_count,
            prefix + '_proto_icmp_count': proto_icmp_count
           }

def get_modes(df, prefix):
    # .mode()[0] return the value of a categorical variable that appeared the most times
    return {prefix + '_connection.protocol_mode': df['connection.proto'].mode()[0] if 'connection.proto' in df else '-',
            prefix + '_connection.service_mode': df['connection.service'].mode()[0] if 'connection.service' in df else '-',
            prefix + '_connection.conn_state_mode': df['connection.conn_state'].mode()[0] if 'connection.conn_state' in df else '-'
           }

def get_means(df, prefix):
    # .mean() returns mean of the corresponding numerical attribute variable values
    return {prefix + '_connection.time_mean': df['connection.time'].mean() if 'connection.time' in df else cur_time,
            prefix + '_connection.duration_mean': df['connection.duration'].mean() if 'connection.duration' in df else 0, 
            # prefix + '_connection.orig_p_mean': df['connection.orig_p'].mean() if 'connection.orig_p' in df else 0, 
            prefix + '_connection.orig_bytes_mean': df['connection.orig_bytes'].mean() if 'connection.orig_bytes' in df else 0,
            prefix + '_connection.orig_pkts_mean': df['connection.orig_pkts'].mean() if 'connection.orig_pkts' in df else 0, 
            # prefix + '_connection.resp_p_mean': df['connection.resp_p'].mean() if 'connection.resp_p' in df else 0,
            prefix + '_connection.resp_bytes_mean': df['connection.resp_bytes'].mean() if 'connection.resp_bytes' in df else 0,
            prefix + '_connection.resp_pkts_mean': df['connection.resp_pkts'].mean() if 'connection.resp_pkts' in df else 0
           }

def get_stats_means(df, prefix):
    # .mean() returns mean of the corresponding numerical attribute variable values
    return {prefix + '_dns_count_mean': df['dns_count'].mean() if 'dns_count' in df else 0,
            prefix + '_ssh_count_mean': df['ssh_count'].mean() if 'ssh_count' in df else 0, 
            prefix + '_http_count_mean': df['http_count'].mean() if 'http_count' in df else 0,
            prefix + '_ssl_count_mean': df['ssl_count'].mean() if 'ssl_count' in df else 0,
            prefix + '_files_count_mean': df['files_count'].mean() if 'files_count' in df else 0
           }

def get_medians(df, prefix):
    # .median() returns median of the corresponding numerical attribute variable values
    return {prefix + '_connection.time_median': df['connection.time'].median() if 'connection.time' in df else cur_time,
            prefix + '_connection.duration_median': df['connection.duration'].median() if 'connection.duration' in df else 0, 
            # prefix + '_connection.orig_p_median': df['connection.orig_p'].median() if 'connection.orig_p' in df else 0,
            prefix + '_connection.orig_bytes_median': df['connection.orig_bytes'].median() if 'connection.orig_bytes' in df else 0,
            prefix + '_connection.orig_pkts_median': df['connection.orig_pkts'].median() if 'connection.orig_pkts' in df else 0, 
            # prefix + '_connection.resp_p_median': df['connection.resp_p'].median() if 'connection.resp_p' in df else 0,
            prefix + '_connection.resp_bytes_median': df['connection.resp_bytes'].median() if 'connection.resp_bytes' in df else 0,
            prefix + '_connection.resp_pkts_median': df['connection.resp_pkts'].median() if 'connection.resp_pkts' in df else 0
           }

def get_orig_ports(df, prefix):
    # count orig_p categories:
    orig_well_known_count = 0
    orig_reg_or_dyn_count = 0
    unique_orig_p_list = df['connection.orig_p'].unique().tolist()
    values_orig_p = df['connection.orig_p'].value_counts()
    
    for uniq_p in unique_orig_p_list:
        if uniq_p < 1024:
            orig_well_known_count += values_orig_p[uniq_p]
        else:
            orig_reg_or_dyn_count += values_orig_p[uniq_p]
            
    return {prefix + '_orig_p_well_known_count': orig_well_known_count,
            prefix + '_orig_p_reg_or_dyn_count': orig_reg_or_dyn_count}

def get_resp_ports(df, prefix):
    # count resp_p categories:
    common_ports = {21: 0, 
                    22: 0, 
                    53: 0, 
                    80: 0, 
                    123: 0, 
                    443: 0, 
                    3389: 0}
    resp_well_known = 0
    resp_reg = 0
    resp_dyn = 0
    unique_resp_p_list = df['connection.resp_p'].unique().tolist()
    values_resp_p = df['connection.resp_p'].value_counts()
    
    for uniq_p in unique_resp_p_list:
        if uniq_p in common_ports.keys():
            common_ports[uniq_p] += values_resp_p[uniq_p]
        elif uniq_p < 1024:
            resp_well_known += values_resp_p[uniq_p]
        elif uniq_p < 49152:
            resp_reg += values_resp_p[uniq_p]
        else:
            resp_dyn += values_resp_p[uniq_p]
            
    return {prefix + '_resp_p_21_count': common_ports[21],
            prefix + '_resp_p_22_count': common_ports[22],
            prefix + '_resp_p_53_count': common_ports[53], 
            prefix + '_resp_p_80_count': common_ports[80],
            prefix + '_resp_p_123_count': common_ports[123],
            prefix + '_resp_p_443_count': common_ports[443],
            prefix + '_resp_p_3389_count': common_ports[3389],
            prefix + '_resp_p_well_known_count': resp_well_known,
            prefix + '_resp_p_reg_count': resp_reg,
            prefix + '_resp_p_dyn_count': resp_dyn}

In [6]:
def generate_duration_filter(duration_val):
    # based on constants from data_exploration.ipynb
    if duration_val <= 0.0:
        return 0.000001, None
    elif duration_val <= 0.0001:
        return 0.000001, 0.001
    elif duration_val <= 0.009:
        return 0.001, 0.05
    elif duration_val <= 0.5:
        return 0.05, 1.5
    elif duration_val <= 5:
        return 1.5, 10
    elif duration_val <= 15:
        return 10, 20
    elif duration_val <= 30:
        return 20, 40
    elif duration_val <= 50:
        return 40, 60
    elif duration_val <= 75:
        return 60, 90
    elif duration_val <= 100:
        return 75, 110
    return None, 100

def generate_bytes_filter(bytes_val):
    if bytes_val == 0:
        return 0, 0
    elif bytes_val <= 1450:
        return bytes_val - 50, bytes_val + 50
    elif bytes_val <= 35000:
        return bytes_val - 500, bytes_val + 500
    else:
        return None, bytes_val - 1000

In [7]:
def get_similar_count(df, row, prefix):
    # protocol filter
    mask = (df['connection.proto'] == row['connection.proto'])
    df_filtered = df.loc[mask]
    
    # service filter
    mask = (df_filtered['connection.service'] == row['connection.service'])
    df_filtered = df_filtered.loc[mask]
    
    # conn_state filter
    mask = (df_filtered['connection.conn_state'] == row['connection.conn_state'])
    df_filtered = df_filtered.loc[mask]
    
    # duration filter
    lower, upper = generate_duration_filter(row['connection.duration'])
    if lower:
        mask = df_filtered['connection.duration'] >= lower
        df_filtered = df_filtered.loc[mask]
    if upper:
        mask = df_filtered['connection.duration'] <= upper
        df_filtered = df_filtered.loc[mask]
        
    # _bytes filter
    lower, upper = generate_duration_filter(row['connection.orig_bytes'])
    if lower:
        mask = df_filtered['connection.orig_bytes'] >= lower
        df_filtered = df_filtered.loc[mask]
    if upper:
        mask = df_filtered['connection.orig_bytes'] <= upper
        df_filtered = df_filtered.loc[mask]
        
    lower, upper = generate_duration_filter(row['connection.resp_bytes'])
    if lower:
        mask = df_filtered['connection.resp_bytes'] >= lower
        df_filtered = df_filtered.loc[mask]
    if upper:
        mask = df_filtered['connection.resp_bytes'] <= upper
        df_filtered = df_filtered.loc[mask]
    
    # _ip_bytes filter
    mask = (df_filtered['connection.orig_ip_bytes'] >= row['connection.orig_ip_bytes'] - 50) & (df_filtered['connection.orig_ip_bytes'] <= row['connection.orig_ip_bytes'] + 50)
    df_filtered = df_filtered.loc[mask]
    mask = (df_filtered['connection.resp_ip_bytes'] >= row['connection.resp_ip_bytes'] - 50) & (df_filtered['connection.resp_ip_bytes'] <= row['connection.resp_ip_bytes'] + 50)
    df_filtered = df_filtered.loc[mask]
    
    # remove original connection from neighbourhood (empty will have size 0 instead of 1)
    mask = (df_filtered['connection.uid'] != row['connection.uid'])
    df_filtered = df_filtered.loc[mask]

    return {prefix + '_similar_conns_count': df_filtered.shape[0]}

In [8]:
def check_attr_value(x, attr_str, row_attr_vals_list):
    if isinstance(x, float) and np.isnan(x):
        return False
    
    if isinstance(x, list) and len(x) < 1:
        return False
    
    if isinstance(x, str) and x == '[]':
        return False
    
    if isinstance(row_attr_vals_list, list) and len(row_attr_vals_list) > 0:
        for attribute in x:
            if attribute in row_attr_vals_list:
                return True
    return False

def get_similar_attributes_count(df, row, prefix):
    neighbourhood_attributes_dict = {}
    attributes = ['dns_qtype', 'dns_rcode', 'ssh_auth_attempts', 'ssh_host_key', 'http_method', 'http_status_code', 
                  'http_user_agent', 'ssl_version', 'ssl_cipher', 'ssl_curve', 'ssl_validation_status', 'files_source',
                  'file_md5']
    
    for attr in attributes:
        if not row[attr]:
            # attribute value list is empty, no similarity is counted
            attr_dict = {prefix + '_similar_' + attr + '_count': 0}
            neighbourhood_attributes_dict.update(attr_dict)
        else:
            # filter
            mask = df[attr].apply(lambda x: check_attr_value(x, attr, row[attr]))
            df_filtered = df.loc[mask]

            # remove original connection from neighbourhood (empty will have size 0 instead of 1)
            mask = (df_filtered['connection.uid'] != row['connection.uid'])
            df_filtered = df_filtered.loc[mask]

            # add attribute count to dictionary that contains all counts
            attr_dict = {prefix + '_similar_' + attr + '_count': df_filtered.shape[0]}
            neighbourhood_attributes_dict.update(attr_dict)
    
    return neighbourhood_attributes_dict

In [9]:
def compute_time_neighbourhood(host_ip, dfs_list, time_col_name, cur_time, time_start, time_end, row, prefix):
    if host_ip in dfs_list:
        ip_df = dfs_list[host_ip]
        mask = (ip_df[time_col_name] > time_start) & (ip_df[time_col_name] <= time_end)
        df = ip_df.loc[mask]

        if len(df) > 0:
            neighbourhood_dict = {}

            neighbourhood_counts = get_counts(df, prefix)
            neighbourhood_modes = get_modes(df, prefix)
            neighbourhood_means = get_means(df, prefix)
            # neighbourhood_medians = get_medians(df, prefix)
            neighbourhood_orig_ports = get_orig_ports(df, prefix)
            neighbourhood_resp_ports = get_resp_ports(df, prefix)
            neighbourhood_stats_means = get_stats_means(df, prefix)
            neighbourhood_similar_count = get_similar_count(df, row, prefix)
            neighbourhood_similar_attributes_count = get_similar_attributes_count(df, row, prefix)
            
            neighbourhood_dict.update(neighbourhood_counts)
            neighbourhood_dict.update(neighbourhood_modes)
            neighbourhood_dict.update(neighbourhood_means)
            # neighbourhood_dict.update(neighbourhood_medians)
            neighbourhood_dict.update(neighbourhood_orig_ports)
            neighbourhood_dict.update(neighbourhood_resp_ports)
            neighbourhood_dict.update(neighbourhood_stats_means)
            neighbourhood_dict.update(neighbourhood_similar_count)
            neighbourhood_dict.update(neighbourhood_similar_attributes_count)
            
            return neighbourhood_dict

    return {prefix + '_total': 0,
            prefix + '_proto_tcp_count': 0,
            prefix + '_proto_udp_count': 0,
            prefix + '_proto_icmp_count': 0,
            prefix + '_connection.protocol_mode': '-',
            prefix + '_connection.service_mode': '-',
            prefix + '_connection.conn_state_mode': '-',
            prefix + '_connection.time_mean': cur_time, # time_mean: 0 could not be here => problem later with time conversion (missing year) 
                                                        # (but does it make sense as a default value?)
            prefix + '_connection.duration_mean': 0, 
            # prefix + '_connection.orig_p_mean': 0,
            prefix + '_connection.orig_bytes_mean': 0,
            prefix + '_connection.orig_pkts_mean': 0,
            # prefix + '_connection.resp_p_mean': 0,
            prefix + '_connection.resp_bytes_mean': 0,
            prefix + '_connection.resp_pkts_mean': 0,
            # prefix + '_connection.time_median': cur_time,
            # prefix + '_connection.duration_median': 0, 
            # prefix + '_connection.orig_p_median': 0,
            # prefix + '_connection.orig_bytes_median': 0,
            # prefix + '_connection.orig_pkts_median': 0, 
            # prefix + '_connection.resp_p_median': 0,
            # prefix + '_connection.resp_bytes_median': 0,
            # prefix + '_connection.resp_pkts_median': 0, 
            prefix + '_orig_p_well_known_count': 0,
            prefix + '_orig_p_reg_or_dyn_count': 0,
            prefix + '_resp_p_21_count': 0,
            prefix + '_resp_p_22_count': 0,
            prefix + '_resp_p_53_count': 0, 
            prefix + '_resp_p_80_count': 0,
            prefix + '_resp_p_123_count': 0,
            prefix + '_resp_p_443_count': 0,
            prefix + '_resp_p_3389_count': 0,
            prefix + '_resp_p_well_known_count': 0,
            prefix + '_resp_p_reg_count': 0,
            prefix + '_resp_p_dyn_count': 0,
            prefix + '_dns_count_mean': 0,
            prefix + '_ssh_count_mean': 0,
            prefix + '_http_count_mean': 0,
            prefix + '_ssl_count_mean': 0,
            prefix + '_files_count_mean': 0,
            prefix + '_similar_conns_count': 0,
            prefix + '_similar_dns_qtype_count': 0,
            prefix + '_similar_dns_rcode_count': 0,
            prefix + '_similar_ssh_auth_attempts_count': 0,
            prefix + '_similar_ssh_host_key_count': 0,
            prefix + '_similar_http_method_count': 0,
            prefix + '_similar_http_status_code_count': 0,
            prefix + '_similar_http_user_agent_count': 0,
            prefix + '_similar_ssl_version_count': 0,
            prefix + '_similar_ssl_cipher_count': 0,
            prefix + '_similar_ssl_curve_count': 0,
            prefix + '_similar_ssl_validation_status_count': 0,
            prefix + '_similar_files_source_count': 0,
            prefix + '_similar_file_md5_count': 0
           }

In [10]:
NEIGHBOURHOOD_TIME_WINDOW_MINUTES_ORIG_DIRECTION = 5
NEIGHBOURHOOD_TIME_WINDOW_MINUTES_RESP_DIRECTION = 2

def compute_neighbourhoods(cur_orig_ip, dfs_list_orig, dfs_list_resp):
    df_result = pd.DataFrame()
    print('[{}]: Computing neighbourhood for connections of originator {:15} ({})'.format(datetime.now().strftime("%H:%M:%S"), cur_orig_ip, str(len(dfs_list_orig[cur_orig_ip]))))
    # iterate over rows in originated connections df of host with cur_orig_ip IP address:
    for index, row in dfs_list_orig[cur_orig_ip].iterrows():
        cur_row_dict = row.to_dict()
        cur_time = row['connection.time']
        
        time_start_orig = cur_time - pd.Timedelta(minutes=NEIGHBOURHOOD_TIME_WINDOW_MINUTES_ORIG_DIRECTION)
        time_end_orig = cur_time + pd.Timedelta(minutes=NEIGHBOURHOOD_TIME_WINDOW_MINUTES_ORIG_DIRECTION)
        time_start_resp = cur_time - pd.Timedelta(minutes=NEIGHBOURHOOD_TIME_WINDOW_MINUTES_RESP_DIRECTION)
        time_end_resp = cur_time + pd.Timedelta(minutes=NEIGHBOURHOOD_TIME_WINDOW_MINUTES_RESP_DIRECTION)
        ip_responder = row['responded_ip']
        try:
            # compute neighbourhoods (from originated connections for originator, from responded connections for responder):
            originator_neighbourhood = compute_time_neighbourhood(cur_orig_ip, dfs_list_orig, 'connection.time', cur_time, time_start_orig, time_end_orig, row, 'orig_orig')
            originator_neighbourhood2 = compute_time_neighbourhood(cur_orig_ip, dfs_list_resp, 'connection.time', cur_time, time_start_resp, time_end_resp, row, 'orig_resp')
            responder_neighbourhood = compute_time_neighbourhood(ip_responder, dfs_list_orig, 'connection.time', cur_time, time_start_orig, time_end_orig, row, 'resp_orig')
            responder_neighbourhood2 = compute_time_neighbourhood(ip_responder, dfs_list_resp, 'connection.time', cur_time, time_start_resp, time_end_resp, row, 'resp_resp')

            cur_row_dict.update(originator_neighbourhood)
            cur_row_dict.update(originator_neighbourhood2)
            cur_row_dict.update(responder_neighbourhood)
            cur_row_dict.update(responder_neighbourhood2)
            
            # concat to one long row and to df_result:
            row_df = pd.DataFrame([cur_row_dict])
            df_result = pd.concat([df_result, row_df], axis=0, ignore_index=True)
        except: 
            print('Problem with originator {} and responder {} ({})'.format(cur_orig_ip, ip_responder, row['connection.uid']))
            pass
    return df_result

In [11]:
# small input for development purposes:
smaller_dfs_orig = {}
smaller_dfs_resp = {}

# smaller_dfs_orig['4.122.55.115'] = dfs_day2_orig['4.122.55.115'].loc[dfs_day2_orig['4.122.55.115']['connection.uid'] == 'CNXOFP2e8fzmzjEjP6']
# # smaller_dfs_orig['10.2.4.44'] = dfs_day2_orig['10.2.4.44'].sample(20)
# smaller_dfs_resp['4.122.55.115'] = dfs_day2_resp['4.122.55.115']
# smaller_dfs_resp['10.5.5.2'] = dfs_day2_resp['10.5.5.2']

smaller_dfs_orig['4.122.55.115'] = dfs_day1_orig['4.122.55.115'].loc[dfs_day1_orig['4.122.55.115']['connection.uid'] == 'CRVwBM2CVD3ZPKvGF6']
smaller_dfs_resp['4.122.55.115'] = dfs_day1_resp['4.122.55.115']
smaller_dfs_resp['10.5.5.2'] = dfs_day1_resp['10.5.5.2']

In [12]:
from datetime import datetime
import multiprocessing
from multiprocessing import Pool
from functools import partial
from contextlib import contextmanager

@contextmanager
def poolcontext(*args, **kwargs):
    pool = multiprocessing.Pool(*args, **kwargs)
    yield pool
    pool.terminate()

# compute neighbourhoods using multiple threads (time optimalization):
print('Start at ' + datetime.now().strftime("%H:%M:%S") + '.')
with poolcontext(processes=32) as pool:
    ### for development purposes:
#     dfs_with_neighbourhoods = pool.map(partial(compute_neighbourhoods, 
#                                                dfs_list_orig=smaller_dfs_orig, 
#                                                dfs_list_resp=smaller_dfs_resp), 
#                                        smaller_dfs_orig.keys())
    ###
    
    dfs_with_neighbourhoods_day1 = pool.map(
        partial(compute_neighbourhoods, 
                dfs_list_orig=dfs_day1_orig, 
                dfs_list_resp=dfs_day1_resp), 
        dfs_day1_orig.keys())
    
    dfs_with_neighbourhoods_day2 = pool.map(
        partial(compute_neighbourhoods, 
                dfs_list_orig=dfs_day2_orig, 
                dfs_list_resp=dfs_day2_resp), 
        dfs_day2_orig.keys())

print('Done at ' + datetime.now().strftime("%H:%M:%S") + '.')

Start at 11:53:12.
[11:53:16]: Computing neighbourhood for connections of originator 10.2.4.44       (1246)
[11:53:18]: Computing neighbourhood for connections of originator 10.0.4.44       (7)
[11:53:18]: Computing neighbourhood for connections of originator 10.2.4.49       (3318)
[11:53:19]: Computing neighbourhood for connections of originator 10.4.2.26       (602)
[11:53:21]: Computing neighbourhood for connections of originator 10.1.4.46       (3584)
[11:53:22]: Computing neighbourhood for connections of originator 4.122.55.115    (14)
[11:53:24]: Computing neighbourhood for connections of originator 10.1.4.49       (214)
[11:53:24]: Computing neighbourhood for connections of originator 193.150.14.191  (2)
[11:53:25]: Computing neighbourhood for connections of originator 10.3.4.49       (299)
[11:53:27]: Computing neighbourhood for connections of originator 10.5.3.34       (11)
[11:53:28]: Computing neighbourhood for connections of originator 78.108.102.237  (12)
[11:53:28]: Compu

In [13]:
# pd.set_option('display.max_columns', None)
# dfs_with_neighbourhoods[0].head()

In [14]:
# for col in dfs_with_neighbourhoods[0].columns:
#     print(col)

In [15]:
# print(type(dfs_with_neighbourhoods))
print(type(dfs_with_neighbourhoods_day1))
print(type(dfs_with_neighbourhoods_day2))

<class 'list'>
<class 'list'>


In [16]:
# print(len(dfs_with_neighbourhoods))
print(len(dfs_with_neighbourhoods_day1))
print(len(dfs_with_neighbourhoods_day2))

218
253


In [17]:
dfs_with_neighbourhoods_day1[0].head()

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time,orig_orig_total,orig_orig_proto_tcp_count,orig_orig_proto_udp_count,orig_orig_proto_icmp_count,orig_orig_connection.protocol_mode,orig_orig_connection.service_mode,orig_orig_connection.conn_state_mode,orig_orig_connection.time_mean,orig_orig_connection.duration_mean,orig_orig_connection.orig_bytes_mean,orig_orig_connection.orig_pkts_mean,orig_orig_connection.resp_bytes_mean,orig_orig_connection.resp_pkts_mean,orig_orig_orig_p_well_known_count,orig_orig_orig_p_reg_or_dyn_count,orig_orig_resp_p_21_count,orig_orig_resp_p_22_count,orig_orig_resp_p_53_count,orig_orig_resp_p_80_count,orig_orig_resp_p_123_count,orig_orig_resp_p_443_count,orig_orig_resp_p_3389_count,orig_orig_resp_p_well_known_count,orig_orig_resp_p_reg_count,orig_orig_resp_p_dyn_count,orig_orig_dns_count_mean,orig_orig_ssh_count_mean,orig_orig_http_count_mean,orig_orig_ssl_count_mean,orig_orig_files_count_mean,orig_orig_similar_conns_count,orig_orig_similar_dns_qtype_count,orig_orig_similar_dns_rcode_count,orig_orig_similar_ssh_auth_attempts_count,orig_orig_similar_ssh_host_key_count,orig_orig_similar_http_method_count,orig_orig_similar_http_status_code_count,orig_orig_similar_http_user_agent_count,orig_orig_similar_ssl_version_count,orig_orig_similar_ssl_cipher_count,orig_orig_similar_ssl_curve_count,orig_orig_similar_ssl_validation_status_count,orig_orig_similar_files_source_count,orig_orig_similar_file_md5_count,orig_resp_total,orig_resp_proto_tcp_count,orig_resp_proto_udp_count,orig_resp_proto_icmp_count,orig_resp_connection.protocol_mode,orig_resp_connection.service_mode,orig_resp_connection.conn_state_mode,orig_resp_connection.time_mean,orig_resp_connection.duration_mean,orig_resp_connection.orig_bytes_mean,orig_resp_connection.orig_pkts_mean,orig_resp_connection.resp_bytes_mean,orig_resp_connection.resp_pkts_mean,orig_resp_orig_p_well_known_count,orig_resp_orig_p_reg_or_dyn_count,orig_resp_resp_p_21_count,orig_resp_resp_p_22_count,orig_resp_resp_p_53_count,orig_resp_resp_p_80_count,orig_resp_resp_p_123_count,orig_resp_resp_p_443_count,orig_resp_resp_p_3389_count,orig_resp_resp_p_well_known_count,orig_resp_resp_p_reg_count,orig_resp_resp_p_dyn_count,orig_resp_dns_count_mean,orig_resp_ssh_count_mean,orig_resp_http_count_mean,orig_resp_ssl_count_mean,orig_resp_files_count_mean,orig_resp_similar_conns_count,orig_resp_similar_dns_qtype_count,orig_resp_similar_dns_rcode_count,orig_resp_similar_ssh_auth_attempts_count,orig_resp_similar_ssh_host_key_count,orig_resp_similar_http_method_count,orig_resp_similar_http_status_code_count,orig_resp_similar_http_user_agent_count,orig_resp_similar_ssl_version_count,orig_resp_similar_ssl_cipher_count,orig_resp_similar_ssl_curve_count,orig_resp_similar_ssl_validation_status_count,orig_resp_similar_files_source_count,orig_resp_similar_file_md5_count,resp_orig_total,resp_orig_proto_tcp_count,resp_orig_proto_udp_count,resp_orig_proto_icmp_count,resp_orig_connection.protocol_mode,resp_orig_connection.service_mode,resp_orig_connection.conn_state_mode,resp_orig_connection.time_mean,resp_orig_connection.duration_mean,resp_orig_connection.orig_bytes_mean,resp_orig_connection.orig_pkts_mean,resp_orig_connection.resp_bytes_mean,resp_orig_connection.resp_pkts_mean,resp_orig_orig_p_well_known_count,resp_orig_orig_p_reg_or_dyn_count,resp_orig_resp_p_21_count,resp_orig_resp_p_22_count,resp_orig_resp_p_53_count,resp_orig_resp_p_80_count,resp_orig_resp_p_123_count,resp_orig_resp_p_443_count,resp_orig_resp_p_3389_count,resp_orig_resp_p_well_known_count,resp_orig_resp_p_reg_count,resp_orig_resp_p_dyn_count,resp_orig_dns_count_mean,resp_orig_ssh_count_mean,resp_orig_http_count_mean,resp_orig_ssl_count_mean,resp_orig_files_count_mean,resp_orig_similar_conns_count,resp_orig_similar_dns_qtype_count,resp_orig_similar_dns_rcode_count,resp_orig_similar_ssh_auth_attempts_count,resp_orig_similar_ssh_host_key_count,resp_orig_similar_http_method_count,resp_orig_similar_http_status_code_count,resp_orig_similar_http_user_agent_count,resp_orig_similar_ssl_version_count,resp_orig_similar_ssl_cipher_count,resp_orig_similar_ssl_curve_count,resp_orig_similar_ssl_validation_status_count,resp_orig_similar_files_source_count,resp_orig_similar_file_md5_count,resp_resp_total,resp_resp_proto_tcp_count,resp_resp_proto_udp_count,resp_resp_proto_icmp_count,resp_resp_connection.protocol_mode,resp_resp_connection.service_mode,resp_resp_connection.conn_state_mode,resp_resp_connection.time_mean,resp_resp_connection.duration_mean,resp_resp_connection.orig_bytes_mean,resp_resp_connection.orig_pkts_mean,resp_resp_connection.resp_bytes_mean,resp_resp_connection.resp_pkts_mean,resp_resp_orig_p_well_known_count,resp_resp_orig_p_reg_or_dyn_count,resp_resp_resp_p_21_count,resp_resp_resp_p_22_count,resp_resp_resp_p_53_count,resp_resp_resp_p_80_count,resp_resp_resp_p_123_count,resp_resp_resp_p_443_count,resp_resp_resp_p_3389_count,resp_resp_resp_p_well_known_count,resp_resp_resp_p_reg_count,resp_resp_resp_p_dyn_count,resp_resp_dns_count_mean,resp_resp_ssh_count_mean,resp_resp_http_count_mean,resp_resp_ssl_count_mean,resp_resp_files_count_mean,resp_resp_similar_conns_count,resp_resp_similar_dns_qtype_count,resp_resp_similar_dns_rcode_count,resp_resp_similar_ssh_auth_attempts_count,resp_resp_similar_ssh_host_key_count,resp_resp_similar_http_method_count,resp_resp_similar_http_status_code_count,resp_resp_similar_http_user_agent_count,resp_resp_similar_ssl_version_count,resp_resp_similar_ssl_cipher_count,resp_resp_similar_ssl_curve_count,resp_resp_similar_ssl_validation_status_count,resp_resp_similar_files_source_count,resp_resp_similar_file_md5_count
0,10.2.4.44,0x10e,C91B543mPc9uqyAmZg,SF,0.523709,603,1727,52240,21,tcp,32091,34507,443,20,ssl,2019-03-19T10:10:49.194278Z,4.122.55.2,0,0,0,1,1,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384'],[None],[None],['SSL'],['1ded0213d77efa85abc3593e21593bea'],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'1ded0213d77efa85abc3593e21593...,2019-03-19 10:10:49.194278+00:00,26,12,14,0,udp,ntp,SF,2019-03-19 10:11:06.261824+00:00,0.170888,226.230769,8.730769,20550.961538,8.5,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 10:10:49.194278+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,icmp,none,OTH,2019-03-19 10:08:07.080669952+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,313,291,0,22,tcp,ssl,RSTO,2019-03-19 10:11:09.731063552+00:00,0.384386,304.115016,19.648562,41534.805112,14.619808,22,291,0,0,0,98,0,193,0,22,0,0,0.0,0.0,0.309904,0.603834,0.891374,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,10.2.4.44,0x37a,CJDeQn1RA4RpXDJi76,SF,0.010425,116,384,51558,5,tcp,531,851,80,6,http,2019-03-19T10:58:49.150898Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 10:58:49.150898+00:00,29,12,17,0,udp,ntp,SF,2019-03-19 10:59:05.964887552+00:00,0.148223,206.137931,8.0,18428.310345,7.310345,14,15,0,0,3,4,14,8,0,0,0,0,0.103448,0.0,0.137931,0.275862,0.413793,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 10:58:49.150898+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,icmp,none,OTH,2019-03-19 10:55:57.169973504+00:00,0.0,0.0,1.0,0.0,0.0,2,0,0,0,0,0,0,0,0,2,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,304,284,0,20,tcp,ssl,RSTO,2019-03-19 10:59:13.658917632+00:00,0.390309,308.203947,20.230263,41725.9375,14.730263,20,284,0,0,0,94,0,190,0,20,0,0,0.0,0.0,0.305921,0.598684,0.875,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,10.2.4.44,0x3c0,CPm0gG6Cmt4bLshDh,SF,0.012546,116,384,51822,5,tcp,531,799,80,5,http,2019-03-19T11:23:34.113264Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 11:23:34.113264+00:00,26,12,14,0,udp,ntp,SF,2019-03-19 11:23:02.978089216+00:00,0.342311,226.269231,9.115385,20551.0,8.384615,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 11:23:34.113264+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,icmp,none,OTH,2019-03-19 11:21:07.077130240+00:00,0.0,0.0,1.0,0.0,0.0,2,0,0,0,0,0,0,0,0,2,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,343,324,0,19,tcp,ssl,RSTO,2019-03-19 11:23:54.756743424+00:00,0.401875,316.174927,21.717201,42060.705539,15.562682,19,324,0,0,0,108,0,216,0,19,0,0,0.0,0.0,0.311953,0.606414,0.883382,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10.2.4.44,0x7db,CFPUgMgxMkSXwpF0l,S1,0.014672,114,277,52580,3,tcp,523,164,80,3,http,2019-03-19T12:35:34.077791Z,4.122.55.2,0,0,1,0,0,[],[],[],[],['GET'],[0],[None],[],[],[],[],[],[],[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],[],2019-03-19 12:35:34.077791+00:00,25,12,13,0,udp,ntp,SF,2019-03-19 12:35:38.427494912+00:00,0.174436,231.52,8.96,21369.2,8.36,10,15,0,0,3,4,10,8,0,0,0,0,0.12,0.0,0.16,0.32,0.4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 12:35:34.077791+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,icmp,none,OTH,2019-03-19 12:32:47.466493952+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,358,338,0,20,tcp,ssl,RSTO,2019-03-19 12:35:51.960755456+00:00,0.421736,314.600559,20.539106,41960.0,14.843575,20,338,0,0,0,112,0,226,0,20,0,0,0.0,0.0,0.307263,0.628492,0.907821,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,10.2.4.44,0x80c,C93t9a4VGVuS6lYHD8,SF,0.009,116,384,52498,5,tcp,531,799,80,5,http,2019-03-19T12:28:49.134506Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 12:28:49.134506+00:00,27,12,15,0,udp,ntp,SF,2019-03-19 12:29:06.160511232+00:00,0.16178,219.62963,8.444444,19791.592593,8.074074,12,15,0,0,3,4,12,8,0,0,0,0,0.111111,0.0,0.148148,0.296296,0.444444,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 12:28:49.134506+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,icmp,none,OTH,2019-03-19 12:28:30.531671040+00:00,0.0,0.0,1.0,0.0,0.0,3,0,0,0,0,0,0,0,0,3,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,321,302,0,19,tcp,ssl,RSTO,2019-03-19 12:29:11.714937088+00:00,0.544281,306.358255,19.638629,41950.841121,14.654206,19,302,0,0,0,101,0,201,0,19,0,0,0.0,0.0,0.308411,0.607477,0.903427,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### 3. Concat

In [18]:
def concat_dfs(df_neighourhoods):
    df_result = pd.DataFrame()
    for i in range(0, len(df_neighourhoods)):
        df_i = df_neighourhoods[i]
        df_result = df_result.append(df_i)
        # print('Appending ' + str(i) + ', len = ' + str(len(df_i)) + ', df_result len = ' + str(len(df_result)))
    return df_result

df_result_day1 = concat_dfs(dfs_with_neighbourhoods_day1)
df_result_day2 = concat_dfs(dfs_with_neighbourhoods_day2)

In [19]:
df_result_day1

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time,orig_orig_total,orig_orig_proto_tcp_count,orig_orig_proto_udp_count,orig_orig_proto_icmp_count,orig_orig_connection.protocol_mode,orig_orig_connection.service_mode,orig_orig_connection.conn_state_mode,orig_orig_connection.time_mean,orig_orig_connection.duration_mean,orig_orig_connection.orig_bytes_mean,orig_orig_connection.orig_pkts_mean,orig_orig_connection.resp_bytes_mean,orig_orig_connection.resp_pkts_mean,orig_orig_orig_p_well_known_count,orig_orig_orig_p_reg_or_dyn_count,orig_orig_resp_p_21_count,orig_orig_resp_p_22_count,orig_orig_resp_p_53_count,orig_orig_resp_p_80_count,orig_orig_resp_p_123_count,orig_orig_resp_p_443_count,orig_orig_resp_p_3389_count,orig_orig_resp_p_well_known_count,orig_orig_resp_p_reg_count,orig_orig_resp_p_dyn_count,orig_orig_dns_count_mean,orig_orig_ssh_count_mean,orig_orig_http_count_mean,orig_orig_ssl_count_mean,orig_orig_files_count_mean,orig_orig_similar_conns_count,orig_orig_similar_dns_qtype_count,orig_orig_similar_dns_rcode_count,orig_orig_similar_ssh_auth_attempts_count,orig_orig_similar_ssh_host_key_count,orig_orig_similar_http_method_count,orig_orig_similar_http_status_code_count,orig_orig_similar_http_user_agent_count,orig_orig_similar_ssl_version_count,orig_orig_similar_ssl_cipher_count,orig_orig_similar_ssl_curve_count,orig_orig_similar_ssl_validation_status_count,orig_orig_similar_files_source_count,orig_orig_similar_file_md5_count,orig_resp_total,orig_resp_proto_tcp_count,orig_resp_proto_udp_count,orig_resp_proto_icmp_count,orig_resp_connection.protocol_mode,orig_resp_connection.service_mode,orig_resp_connection.conn_state_mode,orig_resp_connection.time_mean,orig_resp_connection.duration_mean,orig_resp_connection.orig_bytes_mean,orig_resp_connection.orig_pkts_mean,orig_resp_connection.resp_bytes_mean,orig_resp_connection.resp_pkts_mean,orig_resp_orig_p_well_known_count,orig_resp_orig_p_reg_or_dyn_count,orig_resp_resp_p_21_count,orig_resp_resp_p_22_count,orig_resp_resp_p_53_count,orig_resp_resp_p_80_count,orig_resp_resp_p_123_count,orig_resp_resp_p_443_count,orig_resp_resp_p_3389_count,orig_resp_resp_p_well_known_count,orig_resp_resp_p_reg_count,orig_resp_resp_p_dyn_count,orig_resp_dns_count_mean,orig_resp_ssh_count_mean,orig_resp_http_count_mean,orig_resp_ssl_count_mean,orig_resp_files_count_mean,orig_resp_similar_conns_count,orig_resp_similar_dns_qtype_count,orig_resp_similar_dns_rcode_count,orig_resp_similar_ssh_auth_attempts_count,orig_resp_similar_ssh_host_key_count,orig_resp_similar_http_method_count,orig_resp_similar_http_status_code_count,orig_resp_similar_http_user_agent_count,orig_resp_similar_ssl_version_count,orig_resp_similar_ssl_cipher_count,orig_resp_similar_ssl_curve_count,orig_resp_similar_ssl_validation_status_count,orig_resp_similar_files_source_count,orig_resp_similar_file_md5_count,resp_orig_total,resp_orig_proto_tcp_count,resp_orig_proto_udp_count,resp_orig_proto_icmp_count,resp_orig_connection.protocol_mode,resp_orig_connection.service_mode,resp_orig_connection.conn_state_mode,resp_orig_connection.time_mean,resp_orig_connection.duration_mean,resp_orig_connection.orig_bytes_mean,resp_orig_connection.orig_pkts_mean,resp_orig_connection.resp_bytes_mean,resp_orig_connection.resp_pkts_mean,resp_orig_orig_p_well_known_count,resp_orig_orig_p_reg_or_dyn_count,resp_orig_resp_p_21_count,resp_orig_resp_p_22_count,resp_orig_resp_p_53_count,resp_orig_resp_p_80_count,resp_orig_resp_p_123_count,resp_orig_resp_p_443_count,resp_orig_resp_p_3389_count,resp_orig_resp_p_well_known_count,resp_orig_resp_p_reg_count,resp_orig_resp_p_dyn_count,resp_orig_dns_count_mean,resp_orig_ssh_count_mean,resp_orig_http_count_mean,resp_orig_ssl_count_mean,resp_orig_files_count_mean,resp_orig_similar_conns_count,resp_orig_similar_dns_qtype_count,resp_orig_similar_dns_rcode_count,resp_orig_similar_ssh_auth_attempts_count,resp_orig_similar_ssh_host_key_count,resp_orig_similar_http_method_count,resp_orig_similar_http_status_code_count,resp_orig_similar_http_user_agent_count,resp_orig_similar_ssl_version_count,resp_orig_similar_ssl_cipher_count,resp_orig_similar_ssl_curve_count,resp_orig_similar_ssl_validation_status_count,resp_orig_similar_files_source_count,resp_orig_similar_file_md5_count,resp_resp_total,resp_resp_proto_tcp_count,resp_resp_proto_udp_count,resp_resp_proto_icmp_count,resp_resp_connection.protocol_mode,resp_resp_connection.service_mode,resp_resp_connection.conn_state_mode,resp_resp_connection.time_mean,resp_resp_connection.duration_mean,resp_resp_connection.orig_bytes_mean,resp_resp_connection.orig_pkts_mean,resp_resp_connection.resp_bytes_mean,resp_resp_connection.resp_pkts_mean,resp_resp_orig_p_well_known_count,resp_resp_orig_p_reg_or_dyn_count,resp_resp_resp_p_21_count,resp_resp_resp_p_22_count,resp_resp_resp_p_53_count,resp_resp_resp_p_80_count,resp_resp_resp_p_123_count,resp_resp_resp_p_443_count,resp_resp_resp_p_3389_count,resp_resp_resp_p_well_known_count,resp_resp_resp_p_reg_count,resp_resp_resp_p_dyn_count,resp_resp_dns_count_mean,resp_resp_ssh_count_mean,resp_resp_http_count_mean,resp_resp_ssl_count_mean,resp_resp_files_count_mean,resp_resp_similar_conns_count,resp_resp_similar_dns_qtype_count,resp_resp_similar_dns_rcode_count,resp_resp_similar_ssh_auth_attempts_count,resp_resp_similar_ssh_host_key_count,resp_resp_similar_http_method_count,resp_resp_similar_http_status_code_count,resp_resp_similar_http_user_agent_count,resp_resp_similar_ssl_version_count,resp_resp_similar_ssl_cipher_count,resp_resp_similar_ssl_curve_count,resp_resp_similar_ssl_validation_status_count,resp_resp_similar_files_source_count,resp_resp_similar_file_md5_count
0,10.2.4.44,0x10e,C91B543mPc9uqyAmZg,SF,0.523709,603,1727,52240,21,tcp,32091,34507,443,20,ssl,2019-03-19T10:10:49.194278Z,4.122.55.2,0,0,0,1,1,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384'],[None],[None],['SSL'],['1ded0213d77efa85abc3593e21593bea'],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'1ded0213d77efa85abc3593e21593...,2019-03-19 10:10:49.194278+00:00,26,12,14,0,udp,ntp,SF,2019-03-19 10:11:06.261824+00:00,0.170888,226.230769,8.730769,20550.961538,8.500000,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 10:10:49.194278+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,icmp,none,OTH,2019-03-19 10:08:07.080669952+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,313,291,0,22,tcp,ssl,RSTO,2019-03-19 10:11:09.731063552+00:00,0.384386,304.115016,19.648562,4.153481e+04,14.619808,22,291,0,0,0,98,0,193,0,22,0,0,0.0,0.0,0.309904,0.603834,0.891374,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,10.2.4.44,0x37a,CJDeQn1RA4RpXDJi76,SF,0.010425,116,384,51558,5,tcp,531,851,80,6,http,2019-03-19T10:58:49.150898Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 10:58:49.150898+00:00,29,12,17,0,udp,ntp,SF,2019-03-19 10:59:05.964887552+00:00,0.148223,206.137931,8.000000,18428.310345,7.310345,14,15,0,0,3,4,14,8,0,0,0,0,0.103448,0.0,0.137931,0.275862,0.413793,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 10:58:49.150898+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,icmp,none,OTH,2019-03-19 10:55:57.169973504+00:00,0.0,0.0,1.0,0.0,0.0,2,0,0,0,0,0,0,0,0,2,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,304,284,0,20,tcp,ssl,RSTO,2019-03-19 10:59:13.658917632+00:00,0.390309,308.203947,20.230263,4.172594e+04,14.730263,20,284,0,0,0,94,0,190,0,20,0,0,0.0,0.0,0.305921,0.598684,0.875000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,10.2.4.44,0x3c0,CPm0gG6Cmt4bLshDh,SF,0.012546,116,384,51822,5,tcp,531,799,80,5,http,2019-03-19T11:23:34.113264Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 11:23:34.113264+00:00,26,12,14,0,udp,ntp,SF,2019-03-19 11:23:02.978089216+00:00,0.342311,226.269231,9.115385,20551.000000,8.384615,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 11:23:34.113264+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,icmp,none,OTH,2019-03-19 11:21:07.077130240+00:00,0.0,0.0,1.0,0.0,0.0,2,0,0,0,0,0,0,0,0,2,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,343,324,0,19,tcp,ssl,RSTO,2019-03-19 11:23:54.756743424+00:00,0.401875,316.174927,21.717201,4.206071e+04,15.562682,19,324,0,0,0,108,0,216,0,19,0,0,0.0,0.0,0.311953,0.606414,0.883382,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10.2.4.44,0x7db,CFPUgMgxMkSXwpF0l,S1,0.014672,114,277,52580,3,tcp,523,164,80,3,http,2019-03-19T12:35:34.077791Z,4.122.55.2,0,0,1,0,0,[],[],[],[],['GET'],[0],[None],[],[],[],[],[],[],[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],[],2019-03-19 12:35:34.077791+00:00,25,12,13,0,udp,ntp,SF,2019-03-19 12:35:38.427494912+00:00,0.174436,231.520000,8.960000,21369.200000,8.360000,10,15,0,0,3,4,10,8,0,0,0,0,0.120000,0.0,0.160000,0.320000,0.400000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 12:35:34.077791+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,icmp,none,OTH,2019-03-19 12:32:47.466493952+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,358,338,0,20,tcp,ssl,RSTO,2019-03-19 12:35:51.960755456+00:00,0.421736,314.600559,20.539106,4.196000e+04,14.843575,20,338,0,0,0,112,0,226,0,20,0,0,0.0,0.0,0.307263,0.628492,0.907821,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,10.2.4.44,0x80c,C93t9a4VGVuS6lYHD8,SF,0.009000,116,384,52498,5,tcp,531,799,80,5,http,2019-03-19T12:28:49.134506Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-19 12:28:49.134506+00:00,27,12,15,0,udp,ntp,SF,2019-03-19 12:29:06.160511232+00:00,0.161780,219.629630,8.444444,19791.592593,8.074074,12,15,0,0,3,4,12,8,0,0,0,0,0.111111,0.0,0.148148,0.296296,0.444444,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 12:28:49.134506+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,icmp,none,OTH,2019-03-19 12:28:30.531671040+00:00,0.0,0.0,1.0,0.0,0.0,3,0,0,0,0,0,0,0,0,3,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,321,302,0,19,tcp,ssl,RSTO,2019-03-19 12:29:11.714937088+00:00,0.544281,306.358255,19.638629,4.195084e+04,14.654206,19,302,0,0,0,101,0,201,0,19,0,0,0.0,0.0,0.308411,0.607477,0.903427,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,10.0.3.32,0x2defc5,C8ham51Y624KjmmZx5,OTH,8.117698,0,464,60085,11,tcp,0,0,80,0,none,2019-03-19T13:07:34.615334Z,195.113.232.72,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-19 13:07:34.615334+00:00,29,29,0,0,tcp,none,OTH,2019-03-19 13:07:35.439048192+00:00,6.777367,0.000000,7.724138,0.000000,0.000000,0,29,0,0,0,29,0,0,0,0,0,0,0.000000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 13:07:34.615334+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 13:07:34.615334+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,59,59,0,0,tcp,none,OTH,2019-03-19 13:07:32.377989632+00:00,122.736941,390.118644,2931.728814,1.507623e+07,2746.050847,0,59,0,0,0,59,0,0,0,0,0,0,0.0,0.0,0.322034,0.000000,0.152542,0,0,0,0,0,0,0,0,0,0,0,0,0,0
87,10.0.3.32,0x2defc6,CKoctd4N3bUt0JM2ib,OTH,8.783433,0,640,60083,16,tcp,0,0,80,0,none,2019-03-19T13:07:33.96789Z,195.113.232.72,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-19 13:07:33.967890+00:00,29,29,0,0,tcp,none,OTH,2019-03-19 13:07:35.439048192+00:00,6.777367,0.000000,7.724138,0.000000,0.000000,0,29,0,0,0,29,0,0,0,0,0,0,0.000000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 13:07:33.967890+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 13:07:33.967890+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,59,59,0,0,tcp,none,OTH,2019-03-19 13:07:32.377989632+00:00,122.736941,390.118644,2931.728814,1.507623e+07,2746.050847,0,59,0,0,0,59,0,0,0,0,0,0,0.0,0.0,0.322034,0.000000,0.152542,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88,10.0.3.32,0x2f77ac,C8PDAx46OeMkciygY,OTH,0.000079,0,80,59900,2,tcp,0,0,443,0,none,2019-03-19T14:01:27.02194Z,184.51.10.54,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-19 14:01:27.021940+00:00,34,34,0,0,tcp,none,OTH,2019-03-19 14:02:22.980669696+00:00,0.644135,0.000000,2.088235,0.000000,0.000000,0,34,0,0,0,4,0,30,0,0,0,0,0.000000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 14:01:27.021940+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 14:01:27.021940+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,20,0,0,tcp,ssl,OTH,2019-03-19 14:01:14.161938176+00:00,42.017002,1554.750000,39.400000,6.659095e+04,22.800000,0,20,0,0,0,0,0,20,0,0,0,0,0.0,0.0,0.000000,0.700000,1.200000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
89,10.0.3.32,0x2f77e1,CQlDb93QcSqSY69DE3,OTH,0.000004,0,80,49387,2,tcp,0,0,443,0,none,2019-03-19T14:13:30.598744Z,2.18.70.48,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-19 14:13:30.598744+00:00,23,22,1,0,tcp,none,OTH,2019-03-19 14:13:44.238760192+00:00,0.729351,9.956522,2.086957,19.913043,0.086957,1,22,0,0,0,3,0,19,0,1,0,0,0.043478,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 14:13:30.598744+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-19 14:13:30.598744+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,11,0,0,tcp,ssl,SF,2019-03-19 14:12:10.548428544+00:00,127.215720,3649.909091,172.363636,4.513898e+05,104.636364,0,11,0,0,0,1,0,10,0,0,0,0,0.0,0.0,0.090909,0.818182,2.727273,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [20]:
df_result_day2

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time,orig_orig_total,orig_orig_proto_tcp_count,orig_orig_proto_udp_count,orig_orig_proto_icmp_count,orig_orig_connection.protocol_mode,orig_orig_connection.service_mode,orig_orig_connection.conn_state_mode,orig_orig_connection.time_mean,orig_orig_connection.duration_mean,orig_orig_connection.orig_bytes_mean,orig_orig_connection.orig_pkts_mean,orig_orig_connection.resp_bytes_mean,orig_orig_connection.resp_pkts_mean,orig_orig_orig_p_well_known_count,orig_orig_orig_p_reg_or_dyn_count,orig_orig_resp_p_21_count,orig_orig_resp_p_22_count,orig_orig_resp_p_53_count,orig_orig_resp_p_80_count,orig_orig_resp_p_123_count,orig_orig_resp_p_443_count,orig_orig_resp_p_3389_count,orig_orig_resp_p_well_known_count,orig_orig_resp_p_reg_count,orig_orig_resp_p_dyn_count,orig_orig_dns_count_mean,orig_orig_ssh_count_mean,orig_orig_http_count_mean,orig_orig_ssl_count_mean,orig_orig_files_count_mean,orig_orig_similar_conns_count,orig_orig_similar_dns_qtype_count,orig_orig_similar_dns_rcode_count,orig_orig_similar_ssh_auth_attempts_count,orig_orig_similar_ssh_host_key_count,orig_orig_similar_http_method_count,orig_orig_similar_http_status_code_count,orig_orig_similar_http_user_agent_count,orig_orig_similar_ssl_version_count,orig_orig_similar_ssl_cipher_count,orig_orig_similar_ssl_curve_count,orig_orig_similar_ssl_validation_status_count,orig_orig_similar_files_source_count,orig_orig_similar_file_md5_count,orig_resp_total,orig_resp_proto_tcp_count,orig_resp_proto_udp_count,orig_resp_proto_icmp_count,orig_resp_connection.protocol_mode,orig_resp_connection.service_mode,orig_resp_connection.conn_state_mode,orig_resp_connection.time_mean,orig_resp_connection.duration_mean,orig_resp_connection.orig_bytes_mean,orig_resp_connection.orig_pkts_mean,orig_resp_connection.resp_bytes_mean,orig_resp_connection.resp_pkts_mean,orig_resp_orig_p_well_known_count,orig_resp_orig_p_reg_or_dyn_count,orig_resp_resp_p_21_count,orig_resp_resp_p_22_count,orig_resp_resp_p_53_count,orig_resp_resp_p_80_count,orig_resp_resp_p_123_count,orig_resp_resp_p_443_count,orig_resp_resp_p_3389_count,orig_resp_resp_p_well_known_count,orig_resp_resp_p_reg_count,orig_resp_resp_p_dyn_count,orig_resp_dns_count_mean,orig_resp_ssh_count_mean,orig_resp_http_count_mean,orig_resp_ssl_count_mean,orig_resp_files_count_mean,orig_resp_similar_conns_count,orig_resp_similar_dns_qtype_count,orig_resp_similar_dns_rcode_count,orig_resp_similar_ssh_auth_attempts_count,orig_resp_similar_ssh_host_key_count,orig_resp_similar_http_method_count,orig_resp_similar_http_status_code_count,orig_resp_similar_http_user_agent_count,orig_resp_similar_ssl_version_count,orig_resp_similar_ssl_cipher_count,orig_resp_similar_ssl_curve_count,orig_resp_similar_ssl_validation_status_count,orig_resp_similar_files_source_count,orig_resp_similar_file_md5_count,resp_orig_total,resp_orig_proto_tcp_count,resp_orig_proto_udp_count,resp_orig_proto_icmp_count,resp_orig_connection.protocol_mode,resp_orig_connection.service_mode,resp_orig_connection.conn_state_mode,resp_orig_connection.time_mean,resp_orig_connection.duration_mean,resp_orig_connection.orig_bytes_mean,resp_orig_connection.orig_pkts_mean,resp_orig_connection.resp_bytes_mean,resp_orig_connection.resp_pkts_mean,resp_orig_orig_p_well_known_count,resp_orig_orig_p_reg_or_dyn_count,resp_orig_resp_p_21_count,resp_orig_resp_p_22_count,resp_orig_resp_p_53_count,resp_orig_resp_p_80_count,resp_orig_resp_p_123_count,resp_orig_resp_p_443_count,resp_orig_resp_p_3389_count,resp_orig_resp_p_well_known_count,resp_orig_resp_p_reg_count,resp_orig_resp_p_dyn_count,resp_orig_dns_count_mean,resp_orig_ssh_count_mean,resp_orig_http_count_mean,resp_orig_ssl_count_mean,resp_orig_files_count_mean,resp_orig_similar_conns_count,resp_orig_similar_dns_qtype_count,resp_orig_similar_dns_rcode_count,resp_orig_similar_ssh_auth_attempts_count,resp_orig_similar_ssh_host_key_count,resp_orig_similar_http_method_count,resp_orig_similar_http_status_code_count,resp_orig_similar_http_user_agent_count,resp_orig_similar_ssl_version_count,resp_orig_similar_ssl_cipher_count,resp_orig_similar_ssl_curve_count,resp_orig_similar_ssl_validation_status_count,resp_orig_similar_files_source_count,resp_orig_similar_file_md5_count,resp_resp_total,resp_resp_proto_tcp_count,resp_resp_proto_udp_count,resp_resp_proto_icmp_count,resp_resp_connection.protocol_mode,resp_resp_connection.service_mode,resp_resp_connection.conn_state_mode,resp_resp_connection.time_mean,resp_resp_connection.duration_mean,resp_resp_connection.orig_bytes_mean,resp_resp_connection.orig_pkts_mean,resp_resp_connection.resp_bytes_mean,resp_resp_connection.resp_pkts_mean,resp_resp_orig_p_well_known_count,resp_resp_orig_p_reg_or_dyn_count,resp_resp_resp_p_21_count,resp_resp_resp_p_22_count,resp_resp_resp_p_53_count,resp_resp_resp_p_80_count,resp_resp_resp_p_123_count,resp_resp_resp_p_443_count,resp_resp_resp_p_3389_count,resp_resp_resp_p_well_known_count,resp_resp_resp_p_reg_count,resp_resp_resp_p_dyn_count,resp_resp_dns_count_mean,resp_resp_ssh_count_mean,resp_resp_http_count_mean,resp_resp_ssl_count_mean,resp_resp_files_count_mean,resp_resp_similar_conns_count,resp_resp_similar_dns_qtype_count,resp_resp_similar_dns_rcode_count,resp_resp_similar_ssh_auth_attempts_count,resp_resp_similar_ssh_host_key_count,resp_resp_similar_http_method_count,resp_resp_similar_http_status_code_count,resp_resp_similar_http_user_agent_count,resp_resp_similar_ssl_version_count,resp_resp_similar_ssl_cipher_count,resp_resp_similar_ssl_curve_count,resp_resp_similar_ssl_validation_status_count,resp_resp_similar_files_source_count,resp_resp_similar_file_md5_count
0,10.2.4.44,0x3f7,C5C1Tx3XxXleua0GB1,SF,0.511765,603,1495,37392,17,tcp,32091,24310,443,15,ssl,2019-03-20T07:17:34.097588Z,4.122.55.2,0,0,0,1,1,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384'],[None],[None],['SSL'],['1ded0213d77efa85abc3593e21593bea'],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'1ded0213d77efa85abc3593e21593...,2019-03-20 07:17:34.097588+00:00,26,12,14,0,udp,ntp,SF,2019-03-20 07:17:35.424322304+00:00,0.165681,224.384615,8.653846,20549.115385,7.961538,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,312,290,0,22,tcp,ssl,RSTO,2019-03-20 07:17:53.358038784+00:00,0.351425,316.423077,20.349359,41449.080128,15.057692,22,290,0,0,0,97,0,193,0,22,0,0,0.0,0.0,0.304487,0.592949,0.878205,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,10.2.4.44,0x441,CUd78C1JRdV928bl1l,S2,0.013029,116,384,36438,5,tcp,532,164,80,3,http,2019-03-20T07:41:20.223739Z,4.122.55.2,0,0,1,0,0,[],[],[],[],['GET'],[0],[None],[],[],[],[],[],[],[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],[],2019-03-20 07:41:20.223739+00:00,22,12,10,0,tcp,ntp,SF,2019-03-20 07:41:17.899228160+00:00,0.188653,257.409091,10.000000,24275.318182,8.727273,8,14,0,0,2,4,8,8,0,0,0,0,0.090909,0.0,0.136364,0.363636,0.409091,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 07:43:00.579407872+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:41:20.223739+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,281,262,0,19,tcp,ssl,RSTO,2019-03-20 07:41:44.674563328+00:00,0.692335,390.455516,20.270463,61645.174377,16.064057,19,262,0,0,0,85,0,177,0,19,0,0,0.0,0.0,0.288256,0.622776,0.825623,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,10.2.4.44,0x652,CIbz4V2PJ98UIA7aR6,S2,0.510075,600,2408,37568,33,tcp,100775,65262,443,24,ssl,2019-03-20T07:34:49.202535Z,4.122.55.2,0,0,0,1,0,[],[],[],[],[],[],[],[''],[''],[None],[None],[],[],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[],2019-03-20 07:34:49.202535+00:00,24,12,12,0,tcp,ntp,SF,2019-03-20 07:34:53.149330688+00:00,0.176395,242.000000,8.625000,22281.833333,8.416667,10,14,0,0,2,4,10,8,0,0,0,0,0.083333,0.0,0.166667,0.333333,0.333333,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,293,273,0,20,tcp,ssl,RSTO,2019-03-20 07:35:07.234555136+00:00,0.389865,311.798635,19.665529,40885.914676,14.699659,20,273,0,0,0,91,0,182,0,20,0,0,0.0,0.0,0.300341,0.607509,0.849829,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10.2.4.44,0x15a8,Cr2XXL30vP6lBN3WBg,SF,0.026865,116,384,37872,5,tcp,531,799,80,5,http,2019-03-20T10:53:34.142505Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-20 10:53:34.142505+00:00,20,6,14,0,udp,ntp,SF,2019-03-20 10:53:23.689937408+00:00,0.056519,160.400000,4.250000,6434.650000,4.400000,11,9,0,0,3,2,11,4,0,0,0,0,0.150000,0.0,0.100000,0.200000,0.300000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,158,141,0,17,tcp,ssl,RSTO,2019-03-20 10:53:44.476082176+00:00,0.358566,302.537975,21.639241,39214.689873,14.537975,17,141,0,0,0,47,0,94,0,17,0,0,0.0,0.0,0.297468,0.575949,0.841772,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,10.2.4.44,0x198a,CqpQanEJOw21gTPw1,SF,0.013462,113,381,38900,5,tcp,522,790,80,5,http,2019-03-20T12:47:34.128794Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b700ab2377cc6e65f31018d76f477ccd'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b700ab2377cc6e65f31018d76f477...,2019-03-20 12:47:34.128794+00:00,33,18,15,0,tcp,ntp,SF,2019-03-20 12:47:40.670394624+00:00,0.205852,259.575758,8.666667,24144.909091,8.636364,12,21,0,0,3,6,12,12,0,0,0,0,0.090909,0.0,0.181818,0.333333,0.515152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,134,122,0,12,tcp,ssl,RSTO,2019-03-20 12:47:22.392037376+00:00,0.386866,300.813433,19.373134,40447.895522,14.335821,12,122,0,0,0,41,0,81,0,12,0,0,0.0,0.0,0.305970,0.582090,0.850746,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,10.1.4.48,0x2f91e1,CNdEBk2wkWMyECmMhg,SF,15.786600,48,76,123,1,udp,96,152,123,2,ntp,2019-03-20T09:13:42.804225Z,81.0.208.219,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:13:42.804225+00:00,8,0,8,0,udp,dns,SF,2019-03-20 09:13:46.242949888+00:00,1.977325,38.000000,1.000000,68.500000,1.125000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,0,udp,ntp,SF,2019-03-20 09:14:11.593086720+00:00,5.267240,48.000000,1.000000,64.000000,1.333333,3,0,0,0,0,0,3,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
266,10.1.4.48,0x2f9237,C1vvK01SBXeDpOVbK2,SF,0.003983,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T09:48:42.804561Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:48:42.804561+00:00,6,0,6,0,udp,dns,SF,2019-03-20 09:48:27.652052992+00:00,0.004423,33.333333,0.833333,49.666667,1.000000,3,3,0,0,3,0,3,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 09:49:23.816986112+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:48:42.804561+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 09:49:07.495205376+00:00,0.003241,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0
267,10.1.4.48,0x2f928f,Clekl93UX9CjnaZy43,SF,0.004308,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T10:35:44.805286Z,195.113.144.201,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 10:35:44.805286+00:00,5,0,5,0,udp,dns,SF,2019-03-20 10:37:06.618049792+00:00,0.003887,36.000000,1.000000,65.400000,1.000000,2,3,0,0,3,0,2,0,0,0,0,0,0.600000,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:35:44.805286+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 10:32:18.800468992+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 10:36:07.447244800+00:00,0.005925,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0
268,10.1.4.48,0x2f9644,CeZpVHXDH6ULU5U9a,S0,0.000000,0,76,123,1,udp,0,0,123,0,ntp,2019-03-20T12:25:51.805222Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 12:25:51.805222+00:00,8,0,8,0,udp,dns,SF,2019-03-20 12:26:26.867720192+00:00,0.101388,38.000000,1.125000,62.500000,1.000000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:25:51.805222+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 12:22:20.868156928+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,10,0,udp,ntp,SF,2019-03-20 12:26:04.410238208+00:00,0.003456,43.200000,1.000000,43.200000,0.900000,10,0,0,0,0,0,10,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
df_result_day2

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time,orig_orig_total,orig_orig_proto_tcp_count,orig_orig_proto_udp_count,orig_orig_proto_icmp_count,orig_orig_connection.protocol_mode,orig_orig_connection.service_mode,orig_orig_connection.conn_state_mode,orig_orig_connection.time_mean,orig_orig_connection.duration_mean,orig_orig_connection.orig_bytes_mean,orig_orig_connection.orig_pkts_mean,orig_orig_connection.resp_bytes_mean,orig_orig_connection.resp_pkts_mean,orig_orig_orig_p_well_known_count,orig_orig_orig_p_reg_or_dyn_count,orig_orig_resp_p_21_count,orig_orig_resp_p_22_count,orig_orig_resp_p_53_count,orig_orig_resp_p_80_count,orig_orig_resp_p_123_count,orig_orig_resp_p_443_count,orig_orig_resp_p_3389_count,orig_orig_resp_p_well_known_count,orig_orig_resp_p_reg_count,orig_orig_resp_p_dyn_count,orig_orig_dns_count_mean,orig_orig_ssh_count_mean,orig_orig_http_count_mean,orig_orig_ssl_count_mean,orig_orig_files_count_mean,orig_orig_similar_conns_count,orig_orig_similar_dns_qtype_count,orig_orig_similar_dns_rcode_count,orig_orig_similar_ssh_auth_attempts_count,orig_orig_similar_ssh_host_key_count,orig_orig_similar_http_method_count,orig_orig_similar_http_status_code_count,orig_orig_similar_http_user_agent_count,orig_orig_similar_ssl_version_count,orig_orig_similar_ssl_cipher_count,orig_orig_similar_ssl_curve_count,orig_orig_similar_ssl_validation_status_count,orig_orig_similar_files_source_count,orig_orig_similar_file_md5_count,orig_resp_total,orig_resp_proto_tcp_count,orig_resp_proto_udp_count,orig_resp_proto_icmp_count,orig_resp_connection.protocol_mode,orig_resp_connection.service_mode,orig_resp_connection.conn_state_mode,orig_resp_connection.time_mean,orig_resp_connection.duration_mean,orig_resp_connection.orig_bytes_mean,orig_resp_connection.orig_pkts_mean,orig_resp_connection.resp_bytes_mean,orig_resp_connection.resp_pkts_mean,orig_resp_orig_p_well_known_count,orig_resp_orig_p_reg_or_dyn_count,orig_resp_resp_p_21_count,orig_resp_resp_p_22_count,orig_resp_resp_p_53_count,orig_resp_resp_p_80_count,orig_resp_resp_p_123_count,orig_resp_resp_p_443_count,orig_resp_resp_p_3389_count,orig_resp_resp_p_well_known_count,orig_resp_resp_p_reg_count,orig_resp_resp_p_dyn_count,orig_resp_dns_count_mean,orig_resp_ssh_count_mean,orig_resp_http_count_mean,orig_resp_ssl_count_mean,orig_resp_files_count_mean,orig_resp_similar_conns_count,orig_resp_similar_dns_qtype_count,orig_resp_similar_dns_rcode_count,orig_resp_similar_ssh_auth_attempts_count,orig_resp_similar_ssh_host_key_count,orig_resp_similar_http_method_count,orig_resp_similar_http_status_code_count,orig_resp_similar_http_user_agent_count,orig_resp_similar_ssl_version_count,orig_resp_similar_ssl_cipher_count,orig_resp_similar_ssl_curve_count,orig_resp_similar_ssl_validation_status_count,orig_resp_similar_files_source_count,orig_resp_similar_file_md5_count,resp_orig_total,resp_orig_proto_tcp_count,resp_orig_proto_udp_count,resp_orig_proto_icmp_count,resp_orig_connection.protocol_mode,resp_orig_connection.service_mode,resp_orig_connection.conn_state_mode,resp_orig_connection.time_mean,resp_orig_connection.duration_mean,resp_orig_connection.orig_bytes_mean,resp_orig_connection.orig_pkts_mean,resp_orig_connection.resp_bytes_mean,resp_orig_connection.resp_pkts_mean,resp_orig_orig_p_well_known_count,resp_orig_orig_p_reg_or_dyn_count,resp_orig_resp_p_21_count,resp_orig_resp_p_22_count,resp_orig_resp_p_53_count,resp_orig_resp_p_80_count,resp_orig_resp_p_123_count,resp_orig_resp_p_443_count,resp_orig_resp_p_3389_count,resp_orig_resp_p_well_known_count,resp_orig_resp_p_reg_count,resp_orig_resp_p_dyn_count,resp_orig_dns_count_mean,resp_orig_ssh_count_mean,resp_orig_http_count_mean,resp_orig_ssl_count_mean,resp_orig_files_count_mean,resp_orig_similar_conns_count,resp_orig_similar_dns_qtype_count,resp_orig_similar_dns_rcode_count,resp_orig_similar_ssh_auth_attempts_count,resp_orig_similar_ssh_host_key_count,resp_orig_similar_http_method_count,resp_orig_similar_http_status_code_count,resp_orig_similar_http_user_agent_count,resp_orig_similar_ssl_version_count,resp_orig_similar_ssl_cipher_count,resp_orig_similar_ssl_curve_count,resp_orig_similar_ssl_validation_status_count,resp_orig_similar_files_source_count,resp_orig_similar_file_md5_count,resp_resp_total,resp_resp_proto_tcp_count,resp_resp_proto_udp_count,resp_resp_proto_icmp_count,resp_resp_connection.protocol_mode,resp_resp_connection.service_mode,resp_resp_connection.conn_state_mode,resp_resp_connection.time_mean,resp_resp_connection.duration_mean,resp_resp_connection.orig_bytes_mean,resp_resp_connection.orig_pkts_mean,resp_resp_connection.resp_bytes_mean,resp_resp_connection.resp_pkts_mean,resp_resp_orig_p_well_known_count,resp_resp_orig_p_reg_or_dyn_count,resp_resp_resp_p_21_count,resp_resp_resp_p_22_count,resp_resp_resp_p_53_count,resp_resp_resp_p_80_count,resp_resp_resp_p_123_count,resp_resp_resp_p_443_count,resp_resp_resp_p_3389_count,resp_resp_resp_p_well_known_count,resp_resp_resp_p_reg_count,resp_resp_resp_p_dyn_count,resp_resp_dns_count_mean,resp_resp_ssh_count_mean,resp_resp_http_count_mean,resp_resp_ssl_count_mean,resp_resp_files_count_mean,resp_resp_similar_conns_count,resp_resp_similar_dns_qtype_count,resp_resp_similar_dns_rcode_count,resp_resp_similar_ssh_auth_attempts_count,resp_resp_similar_ssh_host_key_count,resp_resp_similar_http_method_count,resp_resp_similar_http_status_code_count,resp_resp_similar_http_user_agent_count,resp_resp_similar_ssl_version_count,resp_resp_similar_ssl_cipher_count,resp_resp_similar_ssl_curve_count,resp_resp_similar_ssl_validation_status_count,resp_resp_similar_files_source_count,resp_resp_similar_file_md5_count
0,10.2.4.44,0x3f7,C5C1Tx3XxXleua0GB1,SF,0.511765,603,1495,37392,17,tcp,32091,24310,443,15,ssl,2019-03-20T07:17:34.097588Z,4.122.55.2,0,0,0,1,1,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384'],[None],[None],['SSL'],['1ded0213d77efa85abc3593e21593bea'],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'1ded0213d77efa85abc3593e21593...,2019-03-20 07:17:34.097588+00:00,26,12,14,0,udp,ntp,SF,2019-03-20 07:17:35.424322304+00:00,0.165681,224.384615,8.653846,20549.115385,7.961538,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,312,290,0,22,tcp,ssl,RSTO,2019-03-20 07:17:53.358038784+00:00,0.351425,316.423077,20.349359,41449.080128,15.057692,22,290,0,0,0,97,0,193,0,22,0,0,0.0,0.0,0.304487,0.592949,0.878205,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,10.2.4.44,0x441,CUd78C1JRdV928bl1l,S2,0.013029,116,384,36438,5,tcp,532,164,80,3,http,2019-03-20T07:41:20.223739Z,4.122.55.2,0,0,1,0,0,[],[],[],[],['GET'],[0],[None],[],[],[],[],[],[],[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],[],2019-03-20 07:41:20.223739+00:00,22,12,10,0,tcp,ntp,SF,2019-03-20 07:41:17.899228160+00:00,0.188653,257.409091,10.000000,24275.318182,8.727273,8,14,0,0,2,4,8,8,0,0,0,0,0.090909,0.0,0.136364,0.363636,0.409091,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 07:43:00.579407872+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:41:20.223739+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,281,262,0,19,tcp,ssl,RSTO,2019-03-20 07:41:44.674563328+00:00,0.692335,390.455516,20.270463,61645.174377,16.064057,19,262,0,0,0,85,0,177,0,19,0,0,0.0,0.0,0.288256,0.622776,0.825623,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,10.2.4.44,0x652,CIbz4V2PJ98UIA7aR6,S2,0.510075,600,2408,37568,33,tcp,100775,65262,443,24,ssl,2019-03-20T07:34:49.202535Z,4.122.55.2,0,0,0,1,0,[],[],[],[],[],[],[],[''],[''],[None],[None],[],[],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[],2019-03-20 07:34:49.202535+00:00,24,12,12,0,tcp,ntp,SF,2019-03-20 07:34:53.149330688+00:00,0.176395,242.000000,8.625000,22281.833333,8.416667,10,14,0,0,2,4,10,8,0,0,0,0,0.083333,0.0,0.166667,0.333333,0.333333,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,293,273,0,20,tcp,ssl,RSTO,2019-03-20 07:35:07.234555136+00:00,0.389865,311.798635,19.665529,40885.914676,14.699659,20,273,0,0,0,91,0,182,0,20,0,0,0.0,0.0,0.300341,0.607509,0.849829,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10.2.4.44,0x15a8,Cr2XXL30vP6lBN3WBg,SF,0.026865,116,384,37872,5,tcp,531,799,80,5,http,2019-03-20T10:53:34.142505Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-20 10:53:34.142505+00:00,20,6,14,0,udp,ntp,SF,2019-03-20 10:53:23.689937408+00:00,0.056519,160.400000,4.250000,6434.650000,4.400000,11,9,0,0,3,2,11,4,0,0,0,0,0.150000,0.0,0.100000,0.200000,0.300000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,158,141,0,17,tcp,ssl,RSTO,2019-03-20 10:53:44.476082176+00:00,0.358566,302.537975,21.639241,39214.689873,14.537975,17,141,0,0,0,47,0,94,0,17,0,0,0.0,0.0,0.297468,0.575949,0.841772,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,10.2.4.44,0x198a,CqpQanEJOw21gTPw1,SF,0.013462,113,381,38900,5,tcp,522,790,80,5,http,2019-03-20T12:47:34.128794Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b700ab2377cc6e65f31018d76f477ccd'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b700ab2377cc6e65f31018d76f477...,2019-03-20 12:47:34.128794+00:00,33,18,15,0,tcp,ntp,SF,2019-03-20 12:47:40.670394624+00:00,0.205852,259.575758,8.666667,24144.909091,8.636364,12,21,0,0,3,6,12,12,0,0,0,0,0.090909,0.0,0.181818,0.333333,0.515152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,134,122,0,12,tcp,ssl,RSTO,2019-03-20 12:47:22.392037376+00:00,0.386866,300.813433,19.373134,40447.895522,14.335821,12,122,0,0,0,41,0,81,0,12,0,0,0.0,0.0,0.305970,0.582090,0.850746,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,10.1.4.48,0x2f91e1,CNdEBk2wkWMyECmMhg,SF,15.786600,48,76,123,1,udp,96,152,123,2,ntp,2019-03-20T09:13:42.804225Z,81.0.208.219,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:13:42.804225+00:00,8,0,8,0,udp,dns,SF,2019-03-20 09:13:46.242949888+00:00,1.977325,38.000000,1.000000,68.500000,1.125000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,0,udp,ntp,SF,2019-03-20 09:14:11.593086720+00:00,5.267240,48.000000,1.000000,64.000000,1.333333,3,0,0,0,0,0,3,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
266,10.1.4.48,0x2f9237,C1vvK01SBXeDpOVbK2,SF,0.003983,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T09:48:42.804561Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:48:42.804561+00:00,6,0,6,0,udp,dns,SF,2019-03-20 09:48:27.652052992+00:00,0.004423,33.333333,0.833333,49.666667,1.000000,3,3,0,0,3,0,3,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 09:49:23.816986112+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:48:42.804561+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 09:49:07.495205376+00:00,0.003241,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0
267,10.1.4.48,0x2f928f,Clekl93UX9CjnaZy43,SF,0.004308,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T10:35:44.805286Z,195.113.144.201,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 10:35:44.805286+00:00,5,0,5,0,udp,dns,SF,2019-03-20 10:37:06.618049792+00:00,0.003887,36.000000,1.000000,65.400000,1.000000,2,3,0,0,3,0,2,0,0,0,0,0,0.600000,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:35:44.805286+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 10:32:18.800468992+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 10:36:07.447244800+00:00,0.005925,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0
268,10.1.4.48,0x2f9644,CeZpVHXDH6ULU5U9a,S0,0.000000,0,76,123,1,udp,0,0,123,0,ntp,2019-03-20T12:25:51.805222Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 12:25:51.805222+00:00,8,0,8,0,udp,dns,SF,2019-03-20 12:26:26.867720192+00:00,0.101388,38.000000,1.125000,62.500000,1.000000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:25:51.805222+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 12:22:20.868156928+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,10,0,udp,ntp,SF,2019-03-20 12:26:04.410238208+00:00,0.003456,43.200000,1.000000,43.200000,0.900000,10,0,0,0,0,0,10,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Assign attacker labels

In [23]:
df_result_day1['attacker_label'] = 'No'
df_result_day2['attacker_label'] = 'No'

In [24]:
# Red Team CIDR ranges:
ATTACKER_IPS = ["4.122.55.0/24", 
                "1.9.0.0/16", "5.23.128.0/17", "5.172.192.0/20", "27.3.0.0/19", "27.111.240.0/20", "37.6.0.0/16", "37.32.0.0/19",
                "66.231.64.0/20", "77.51.0.0/16", "78.177.0.0/16", "80.79.0.0/20", "80.93.176.0/20", "81.17.0.0/20", "92.53.192.0/19", 
                "110.5.80.0/20", "111.66.0.0/16", "129.90.0.0/16", "130.255.32.0/19", "181.118.144.0/20", "188.40.0.0/16", "193.151.128.0/19", 
                "200.110.240.0/20", "202.2.96.0/19", "212.5.0.0/19", "212.96.96.0/19", "213.5.0.0/21", "217.25.208.0/20", "219.15.224.0/20"]

# from netaddr import IPNetwork, IPAddress
import ipaddress

ips_cache = {} # optimalizatiion

def is_attacker(ip_address):
  for attacker_ip in ATTACKER_IPS:
    #if IPAddress(ip_address) in IPNetwork(attacker_ip):
    if ip_address in ips_cache:
      return ips_cache[ip_address]
    try:
      if ipaddress.ip_address(ip_address) in ipaddress.ip_network(attacker_ip):
        #print("IP address " + ip_address + " is from Red team (" + attacker_ip + ").")
        ips_cache[ip_address] = True
        return True
    except:
      pass # IPv6
  ips_cache[ip_address] = False
  return False

In [25]:
# assign labels to input data ('No' not from/ to attacker, 'Yes' originated from/ responded to attacker):
df_result_day2.loc[df_result_day2['responded_ip'].apply(is_attacker),'attacker_label'] = 'Yes'
df_result_day2.loc[df_result_day2['originated_ip'].apply(is_attacker),'attacker_label'] = 'Yes'

In [26]:
df_result_day2

Unnamed: 0,originated_ip,uid,connection.uid,connection.conn_state,connection.duration,connection.orig_bytes,connection.orig_ip_bytes,connection.orig_p,connection.orig_pkts,connection.proto,connection.resp_bytes,connection.resp_ip_bytes,connection.resp_p,connection.resp_pkts,connection.service,connection.ts,responded_ip,dns_count,ssh_count,http_count,ssl_count,files_count,dns_qtype,dns_rcode,ssh_auth_attempts,ssh_host_key,http_method,http_status_code,http_user_agent,ssl_version,ssl_cipher,ssl_curve,ssl_validation_status,files_source,file_md5,dns_dicts,ssh_dicts,http_dicts,ssl_dicts,files_dicts,connection.time,orig_orig_total,orig_orig_proto_tcp_count,orig_orig_proto_udp_count,orig_orig_proto_icmp_count,orig_orig_connection.protocol_mode,orig_orig_connection.service_mode,orig_orig_connection.conn_state_mode,orig_orig_connection.time_mean,orig_orig_connection.duration_mean,orig_orig_connection.orig_bytes_mean,orig_orig_connection.orig_pkts_mean,orig_orig_connection.resp_bytes_mean,orig_orig_connection.resp_pkts_mean,orig_orig_orig_p_well_known_count,orig_orig_orig_p_reg_or_dyn_count,orig_orig_resp_p_21_count,orig_orig_resp_p_22_count,orig_orig_resp_p_53_count,orig_orig_resp_p_80_count,orig_orig_resp_p_123_count,orig_orig_resp_p_443_count,orig_orig_resp_p_3389_count,orig_orig_resp_p_well_known_count,orig_orig_resp_p_reg_count,orig_orig_resp_p_dyn_count,orig_orig_dns_count_mean,orig_orig_ssh_count_mean,orig_orig_http_count_mean,orig_orig_ssl_count_mean,orig_orig_files_count_mean,orig_orig_similar_conns_count,orig_orig_similar_dns_qtype_count,orig_orig_similar_dns_rcode_count,orig_orig_similar_ssh_auth_attempts_count,orig_orig_similar_ssh_host_key_count,orig_orig_similar_http_method_count,orig_orig_similar_http_status_code_count,orig_orig_similar_http_user_agent_count,orig_orig_similar_ssl_version_count,orig_orig_similar_ssl_cipher_count,orig_orig_similar_ssl_curve_count,orig_orig_similar_ssl_validation_status_count,orig_orig_similar_files_source_count,orig_orig_similar_file_md5_count,orig_resp_total,orig_resp_proto_tcp_count,orig_resp_proto_udp_count,orig_resp_proto_icmp_count,orig_resp_connection.protocol_mode,orig_resp_connection.service_mode,orig_resp_connection.conn_state_mode,orig_resp_connection.time_mean,orig_resp_connection.duration_mean,orig_resp_connection.orig_bytes_mean,orig_resp_connection.orig_pkts_mean,orig_resp_connection.resp_bytes_mean,orig_resp_connection.resp_pkts_mean,orig_resp_orig_p_well_known_count,orig_resp_orig_p_reg_or_dyn_count,orig_resp_resp_p_21_count,orig_resp_resp_p_22_count,orig_resp_resp_p_53_count,orig_resp_resp_p_80_count,orig_resp_resp_p_123_count,orig_resp_resp_p_443_count,orig_resp_resp_p_3389_count,orig_resp_resp_p_well_known_count,orig_resp_resp_p_reg_count,orig_resp_resp_p_dyn_count,orig_resp_dns_count_mean,orig_resp_ssh_count_mean,orig_resp_http_count_mean,orig_resp_ssl_count_mean,orig_resp_files_count_mean,orig_resp_similar_conns_count,orig_resp_similar_dns_qtype_count,orig_resp_similar_dns_rcode_count,orig_resp_similar_ssh_auth_attempts_count,orig_resp_similar_ssh_host_key_count,orig_resp_similar_http_method_count,orig_resp_similar_http_status_code_count,orig_resp_similar_http_user_agent_count,orig_resp_similar_ssl_version_count,orig_resp_similar_ssl_cipher_count,orig_resp_similar_ssl_curve_count,orig_resp_similar_ssl_validation_status_count,orig_resp_similar_files_source_count,orig_resp_similar_file_md5_count,resp_orig_total,resp_orig_proto_tcp_count,resp_orig_proto_udp_count,resp_orig_proto_icmp_count,resp_orig_connection.protocol_mode,resp_orig_connection.service_mode,resp_orig_connection.conn_state_mode,resp_orig_connection.time_mean,resp_orig_connection.duration_mean,resp_orig_connection.orig_bytes_mean,resp_orig_connection.orig_pkts_mean,resp_orig_connection.resp_bytes_mean,resp_orig_connection.resp_pkts_mean,resp_orig_orig_p_well_known_count,resp_orig_orig_p_reg_or_dyn_count,resp_orig_resp_p_21_count,resp_orig_resp_p_22_count,resp_orig_resp_p_53_count,resp_orig_resp_p_80_count,resp_orig_resp_p_123_count,resp_orig_resp_p_443_count,resp_orig_resp_p_3389_count,resp_orig_resp_p_well_known_count,resp_orig_resp_p_reg_count,resp_orig_resp_p_dyn_count,resp_orig_dns_count_mean,resp_orig_ssh_count_mean,resp_orig_http_count_mean,resp_orig_ssl_count_mean,resp_orig_files_count_mean,resp_orig_similar_conns_count,resp_orig_similar_dns_qtype_count,resp_orig_similar_dns_rcode_count,resp_orig_similar_ssh_auth_attempts_count,resp_orig_similar_ssh_host_key_count,resp_orig_similar_http_method_count,resp_orig_similar_http_status_code_count,resp_orig_similar_http_user_agent_count,resp_orig_similar_ssl_version_count,resp_orig_similar_ssl_cipher_count,resp_orig_similar_ssl_curve_count,resp_orig_similar_ssl_validation_status_count,resp_orig_similar_files_source_count,resp_orig_similar_file_md5_count,resp_resp_total,resp_resp_proto_tcp_count,resp_resp_proto_udp_count,resp_resp_proto_icmp_count,resp_resp_connection.protocol_mode,resp_resp_connection.service_mode,resp_resp_connection.conn_state_mode,resp_resp_connection.time_mean,resp_resp_connection.duration_mean,resp_resp_connection.orig_bytes_mean,resp_resp_connection.orig_pkts_mean,resp_resp_connection.resp_bytes_mean,resp_resp_connection.resp_pkts_mean,resp_resp_orig_p_well_known_count,resp_resp_orig_p_reg_or_dyn_count,resp_resp_resp_p_21_count,resp_resp_resp_p_22_count,resp_resp_resp_p_53_count,resp_resp_resp_p_80_count,resp_resp_resp_p_123_count,resp_resp_resp_p_443_count,resp_resp_resp_p_3389_count,resp_resp_resp_p_well_known_count,resp_resp_resp_p_reg_count,resp_resp_resp_p_dyn_count,resp_resp_dns_count_mean,resp_resp_ssh_count_mean,resp_resp_http_count_mean,resp_resp_ssl_count_mean,resp_resp_files_count_mean,resp_resp_similar_conns_count,resp_resp_similar_dns_qtype_count,resp_resp_similar_dns_rcode_count,resp_resp_similar_ssh_auth_attempts_count,resp_resp_similar_ssh_host_key_count,resp_resp_similar_http_method_count,resp_resp_similar_http_status_code_count,resp_resp_similar_http_user_agent_count,resp_resp_similar_ssl_version_count,resp_resp_similar_ssl_cipher_count,resp_resp_similar_ssl_curve_count,resp_resp_similar_ssl_validation_status_count,resp_resp_similar_files_source_count,resp_resp_similar_file_md5_count,attacker_label
0,10.2.4.44,0x3f7,C5C1Tx3XxXleua0GB1,SF,0.511765,603,1495,37392,17,tcp,32091,24310,443,15,ssl,2019-03-20T07:17:34.097588Z,4.122.55.2,0,0,0,1,1,[],[],[],[],[],[],[],['TLSv12'],['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384'],[None],[None],['SSL'],['1ded0213d77efa85abc3593e21593bea'],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[{'file.md5s': {'1ded0213d77efa85abc3593e21593...,2019-03-20 07:17:34.097588+00:00,26,12,14,0,udp,ntp,SF,2019-03-20 07:17:35.424322304+00:00,0.165681,224.384615,8.653846,20549.115385,7.961538,11,15,0,0,3,4,11,8,0,0,0,0,0.115385,0.0,0.153846,0.307692,0.461538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:17:34.097588+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,312,290,0,22,tcp,ssl,RSTO,2019-03-20 07:17:53.358038784+00:00,0.351425,316.423077,20.349359,41449.080128,15.057692,22,290,0,0,0,97,0,193,0,22,0,0,0.0,0.0,0.304487,0.592949,0.878205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes
1,10.2.4.44,0x441,CUd78C1JRdV928bl1l,S2,0.013029,116,384,36438,5,tcp,532,164,80,3,http,2019-03-20T07:41:20.223739Z,4.122.55.2,0,0,1,0,0,[],[],[],[],['GET'],[0],[None],[],[],[],[],[],[],[],[],"[{'http.method': 'GET', 'http.status_code': 0,...",[],[],2019-03-20 07:41:20.223739+00:00,22,12,10,0,tcp,ntp,SF,2019-03-20 07:41:17.899228160+00:00,0.188653,257.409091,10.000000,24275.318182,8.727273,8,14,0,0,2,4,8,8,0,0,0,0,0.090909,0.0,0.136364,0.363636,0.409091,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 07:43:00.579407872+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:41:20.223739+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,281,262,0,19,tcp,ssl,RSTO,2019-03-20 07:41:44.674563328+00:00,0.692335,390.455516,20.270463,61645.174377,16.064057,19,262,0,0,0,85,0,177,0,19,0,0,0.0,0.0,0.288256,0.622776,0.825623,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes
2,10.2.4.44,0x652,CIbz4V2PJ98UIA7aR6,S2,0.510075,600,2408,37568,33,tcp,100775,65262,443,24,ssl,2019-03-20T07:34:49.202535Z,4.122.55.2,0,0,0,1,0,[],[],[],[],[],[],[],[''],[''],[None],[None],[],[],[],[],[],"[{'ssl.curve': None, 'ssl.validation_status': ...",[],2019-03-20 07:34:49.202535+00:00,24,12,12,0,tcp,ntp,SF,2019-03-20 07:34:53.149330688+00:00,0.176395,242.000000,8.625000,22281.833333,8.416667,10,14,0,0,2,4,10,8,0,0,0,0,0.083333,0.0,0.166667,0.333333,0.333333,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 07:34:49.202535+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,293,273,0,20,tcp,ssl,RSTO,2019-03-20 07:35:07.234555136+00:00,0.389865,311.798635,19.665529,40885.914676,14.699659,20,273,0,0,0,91,0,182,0,20,0,0,0.0,0.0,0.300341,0.607509,0.849829,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes
3,10.2.4.44,0x15a8,Cr2XXL30vP6lBN3WBg,SF,0.026865,116,384,37872,5,tcp,531,799,80,5,http,2019-03-20T10:53:34.142505Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b78b1c61f692fd8175bf471406c5469b'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b78b1c61f692fd8175bf471406c54...,2019-03-20 10:53:34.142505+00:00,20,6,14,0,udp,ntp,SF,2019-03-20 10:53:23.689937408+00:00,0.056519,160.400000,4.250000,6434.650000,4.400000,11,9,0,0,3,2,11,4,0,0,0,0,0.150000,0.0,0.100000,0.200000,0.300000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:53:34.142505+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,158,141,0,17,tcp,ssl,RSTO,2019-03-20 10:53:44.476082176+00:00,0.358566,302.537975,21.639241,39214.689873,14.537975,17,141,0,0,0,47,0,94,0,17,0,0,0.0,0.0,0.297468,0.575949,0.841772,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes
4,10.2.4.44,0x198a,CqpQanEJOw21gTPw1,SF,0.013462,113,381,38900,5,tcp,522,790,80,5,http,2019-03-20T12:47:34.128794Z,4.122.55.2,0,0,1,0,1,[],[],[],[],['GET'],[301],[None],[],[],[],[],['HTTP'],['b700ab2377cc6e65f31018d76f477ccd'],[],[],"[{'http.method': 'GET', 'http.status_code': 30...",[],[{'file.md5s': {'b700ab2377cc6e65f31018d76f477...,2019-03-20 12:47:34.128794+00:00,33,18,15,0,tcp,ntp,SF,2019-03-20 12:47:40.670394624+00:00,0.205852,259.575758,8.666667,24144.909091,8.636364,12,21,0,0,3,6,12,12,0,0,0,0,0.090909,0.0,0.181818,0.333333,0.515152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:47:34.128794+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,134,122,0,12,tcp,ssl,RSTO,2019-03-20 12:47:22.392037376+00:00,0.386866,300.813433,19.373134,40447.895522,14.335821,12,122,0,0,0,41,0,81,0,12,0,0,0.0,0.0,0.305970,0.582090,0.850746,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,10.1.4.48,0x2f91e1,CNdEBk2wkWMyECmMhg,SF,15.786600,48,76,123,1,udp,96,152,123,2,ntp,2019-03-20T09:13:42.804225Z,81.0.208.219,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:13:42.804225+00:00,8,0,8,0,udp,dns,SF,2019-03-20 09:13:46.242949888+00:00,1.977325,38.000000,1.000000,68.500000,1.125000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:13:42.804225+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,0,udp,ntp,SF,2019-03-20 09:14:11.593086720+00:00,5.267240,48.000000,1.000000,64.000000,1.333333,3,0,0,0,0,0,3,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,No
266,10.1.4.48,0x2f9237,C1vvK01SBXeDpOVbK2,SF,0.003983,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T09:48:42.804561Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 09:48:42.804561+00:00,6,0,6,0,udp,dns,SF,2019-03-20 09:48:27.652052992+00:00,0.004423,33.333333,0.833333,49.666667,1.000000,3,3,0,0,3,0,3,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 09:49:23.816986112+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 09:48:42.804561+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 09:49:07.495205376+00:00,0.003241,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0,No
267,10.1.4.48,0x2f928f,Clekl93UX9CjnaZy43,SF,0.004308,48,76,123,1,udp,48,76,123,1,ntp,2019-03-20T10:35:44.805286Z,195.113.144.201,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 10:35:44.805286+00:00,5,0,5,0,udp,dns,SF,2019-03-20 10:37:06.618049792+00:00,0.003887,36.000000,1.000000,65.400000,1.000000,2,3,0,0,3,0,2,0,0,0,0,0,0.600000,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 10:35:44.805286+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 10:32:18.800468992+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,udp,ntp,SF,2019-03-20 10:36:07.447244800+00:00,0.005925,48.000000,1.000000,48.000000,1.000000,2,0,0,0,0,0,2,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,1,0,0,0,0,0,0,0,0,0,0,0,0,0,No
268,10.1.4.48,0x2f9644,CeZpVHXDH6ULU5U9a,S0,0.000000,0,76,123,1,udp,0,0,123,0,ntp,2019-03-20T12:25:51.805222Z,147.251.48.140,0,0,0,0,0,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],2019-03-20 12:25:51.805222+00:00,8,0,8,0,udp,dns,SF,2019-03-20 12:26:26.867720192+00:00,0.101388,38.000000,1.125000,62.500000,1.000000,4,4,0,0,4,0,4,0,0,0,0,0,0.500000,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-,-,-,2019-03-20 12:25:51.805222+00:00,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,udp,ntp,S0,2019-03-20 12:22:20.868156928+00:00,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,10,0,udp,ntp,SF,2019-03-20 12:26:04.410238208+00:00,0.003456,43.200000,1.000000,43.200000,0.900000,10,0,0,0,0,0,10,0,0,0,0,0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,No


## Assign attack labels

In [40]:
df_result_day1['attack_label'] = 'Normal'
df_result_day2['attack_label'] = 'Normal'

In [41]:
print(df_result_day2['connection.time'].min())
print(df_result_day2['connection.time'].max())

2019-03-20 07:00:00.037142
2019-03-20 14:25:20.072063


In [42]:
import csv
import socket

LABEL_TIME_WINDOW_SECONDS = 4

CSV_LABELS_FILE_PATH = '/home/sramkova/diploma_thesis_data/labels.csv'

# Snort alerts mapper:
label_mapper = {'SYN Scan': 'Scan',
                'POP3 SSLv3 invalid timestamp attempt': 'Pop3',
                'WEB-MISC Invalid HTTP Version String': 'Web',
                'COMMUNITY WEB-MISC mod_jrun overflow attempt': 'Web',
                'SNMP AgentX/tcp request': 'Snmp',
                'SNMP request tcp': 'Snmp',
                'SCAN UPnP service discover attempt': 'Scan',
                'LOCAL DOS SYN packet flood outbound': 'DoS',
                'LOCAL DOS SYN packet flood inbound': 'DoS',
                'WEB-MISC PCT Client_Hello overflow attempt': 'Web',
                'INFO web bug 0x0 gif attempt': 'Web',
                'POP3 PCT Client_Hello overflow attempt': 'Pop3',
                'ATTACK-RESPONSES 403 Forbidden': 'Web',
                'MISC MS Terminal server request': 'Else',
                'SCAN nmap XMAS': 'Scan',
                'VIRUS OUTBOUND bad file attachment': 'Virus',
                'DNS named version attempt': 'Web',
                'MISC MS Terminal server request RDP': 'Else',
                'WEB-MISC Lotus Notes .exe script source download attempt': 'Web',
                'INFO TELNET login failed': 'Else',
                'WEB-MISC Chunked-Encoding transfer attempt': 'Web',
                'DNS zone transfer UDP': 'Web',
                'WEB-ATTACKS id command attempt': 'Web',
                'WEB-MISC SSLv2 Client_Hello with pad Challenge Length overflow attempt': 'Web',
                'POP3 PCT Client_Hello overflow attempt': 'Pop3',
                'WEB-MISC cat%20 access': 'Web',
                'WEB-MISC robots.txt access': 'Web',
                'MS-SQL probe response overflow attempt': 'Else',
                'WEB-ATTACKS netcat command attempt': 'Web',
                'WEB-MISC .htaccess access': 'Web',
                'WEB-MISC http directory traversal': 'Web'}

labels_list = []


with open(CSV_LABELS_FILE_PATH) as labels_file:
    csv_file = csv.reader(labels_file, delimiter=',')
    for splitted_csv_line in csv_file:
        label = label_mapper[splitted_csv_line[1].strip()] if splitted_csv_line[1].strip() in label_mapper else ''
        
        if label:
            time_start = pd.Timestamp(splitted_csv_line[0].replace('-', '/19 ')).tz_localize(None)
            time_start = time_start - pd.Timedelta(seconds=LABEL_TIME_WINDOW_SECONDS)
            time_end = pd.Timestamp(splitted_csv_line[0].replace('-', '/19 ')).tz_localize(None)
            time_end = time_end + pd.Timedelta(seconds=LABEL_TIME_WINDOW_SECONDS) #, milliseconds=30)
                
            res = (time_start, time_end, label,  splitted_csv_line[2].strip(), splitted_csv_line[3].strip())
            labels_list.append(res)

# sort based on starting time:
labels_list.sort(key=lambda x: x[0])
            
print(len(labels_list))
for i in range(10):
    print(labels_list[i])

47981
(Timestamp('2019-03-20 07:00:18.063298'), Timestamp('2019-03-20 07:00:26.063298'), 'Web', '4.122.55.5', '9.66.11.12')
(Timestamp('2019-03-20 07:00:18.118776'), Timestamp('2019-03-20 07:00:26.118776'), 'Web', '10.1.4.45', '4.122.55.2')
(Timestamp('2019-03-20 07:00:18.124460'), Timestamp('2019-03-20 07:00:26.124460'), 'Web', '10.1.4.45', '4.122.55.2')
(Timestamp('2019-03-20 07:00:18.124477'), Timestamp('2019-03-20 07:00:26.124477'), 'Web', '10.1.4.44', '4.122.55.2')
(Timestamp('2019-03-20 07:00:18.131511'), Timestamp('2019-03-20 07:00:26.131511'), 'Web', '4.122.55.5', '9.66.11.14')
(Timestamp('2019-03-20 07:00:18.132016'), Timestamp('2019-03-20 07:00:26.132016'), 'Web', '10.1.4.44', '4.122.55.2')
(Timestamp('2019-03-20 07:00:26.108608'), Timestamp('2019-03-20 07:00:34.108608'), 'Scan', '4.122.55.5', '9.66.44.12')
(Timestamp('2019-03-20 07:00:26.112921'), Timestamp('2019-03-20 07:00:34.112921'), 'Web', '4.122.55.5', '9.66.44.12')
(Timestamp('2019-03-20 07:00:26.126307'), Timestamp('

In [43]:
labels_list[0]

(Timestamp('2019-03-20 07:00:18.063298'),
 Timestamp('2019-03-20 07:00:26.063298'),
 'Web',
 '4.122.55.5',
 '9.66.11.12')

In [44]:
def is_attack(attacker_label, time, orig_ip, resp_ip):
    if attacker_label == 'Yes':
        attack_labels = []
        
        for attack_start, attack_end, attack_label, ip_originator, ip_responder in labels_list:

            time_localized = time.tz_localize(None)
            
            # time optimalization in one direction (TODO)
            if attack_start > time_localized and attack_end > time_localized: 
                break

            if time_localized >= attack_start and time_localized <= attack_end:
                if attack_label not in attack_labels and orig_ip == ip_originator and resp_ip == ip_responder: 
                    attack_labels.append(attack_label)
        
        if attack_labels:
            final_label  = ''
            for label in attack_labels:
                if final_label:
                    final_label += ','
                final_label += label
            return final_label
        
        return 'Not_Specified'
        
    return 'Normal'

In [45]:
print('Start at ' + datetime.now().strftime("%H:%M:%S") + '.')
df_result_day2['attack_label'] = df_result_day2.apply(lambda row : is_attack(row['attacker_label'], 
                                                                             row['connection.time'],
                                                                             row['originated_ip'], 
                                                                             row['responded_ip']), 
                                                      axis = 1)
end_time = datetime.now().strftime("%H:%M:%S")
print('Done at ' + end_time + '.')

Start at 08:15:40.
Done at 15:19:17.


In [46]:
df_result_day2['attack_label'].value_counts()

Scan,Snmp         83118
Scan              82751
Not_Specified     72997
Normal            64359
Scan,Pop3          8985
Web                7047
Pop3,Scan          6023
Scan,Pop3,Snmp     1242
Web,Scan           1227
Snmp,Scan          1038
Pop3                547
Scan,Web            298
Scan,Snmp,Pop3      235
Snmp                155
Snmp,Scan,Pop3       43
Scan,Else            21
Else                 21
Else,Scan            14
Pop3,Scan,Snmp        8
Snmp,Pop3,Scan        5
Virus                 2
Name: attack_label, dtype: int64

## Concat to one DF and write to file

In [47]:
df_result = df_result_day1.append(df_result_day2)

In [48]:
print(len(df_result_day1) + len(df_result_day2))
print(len(df_result))

472345
472345


In [49]:
df_result.to_csv('/home/sramkova/diploma_thesis_data/neighbourhood_both_days.csv', index=False, header=True)