# Ingress Detection Algorithm

```
cidr_max = 28   # nax split cidr mask
t=60            # seconds
e=120           # expire time if IPs
q = 0.95        # needed ingress fraction
c = 64          # c* sqrt(2^(IPv * max^-cidr)) -> min number of sampled

start with no knowledge (only /0 is known) 
loop
    collect IP from Netflow
        Filter IPs for ingress
        Mask them to cidrmax
        Insert IP into corresponding range

Every t seconds 
    Check all ranges
        Remove IPs older than e seconds 
        Prevalent color still valid (s_color >= q)
            YES → join siblings ? (join(s_color ) >= q) 
                YES → join siblings and check again 
                NO → do nothing
            NO → remove all information

        Check if enough samples have been collected (s_ipcount >= n_cidr ) 
            YES → is a single color prevalent ? (s_color >=q)
                YES → color range with link color
                NO → split subnet if s_cidr < cidrmax
            NO → join siblings ? (join(s_color) >= q or join(s_ipcount) < n_cidr−1)
                YES → join siblings and check again 
                NO → do nothing
```

In [34]:
import os
import gzip
import csv
import ipaddress
import math
from collections import defaultdict

cidr_max = 28   # nax split cidr mask
t=60            # seconds
e= 2*t          # 120  # expire time if IPs
q = 0.95        # needed ingress fraction
c = 64          # c* sqrt(2^(IPv * max^-cidr))


# /data/slow/mehner/netflow/parser_[00..25]/archived/[@000000000000001605639660.gz|@000000000000001605643260.gz]
netflow_path="/data/slow/mehner/netflow/dummy_netflow.gz"
ingresslink_file = "/data/slow/mehner/ingresslink/1605571200.gz"                # if we get more netflow, we should adjust the file 
router_ip_mapping_file="/data/slow/mehner/router_lookup_tables/1605571200.txt"
# output folders
output_folder="results"

os.makedirs(output_folder, exist_ok=True)



############## RANGE STATE CLASS ##############
class IPDRange:

    ### range as hierarchical dicts
    
    # |----------------------|
    # |                      | /0
    # |----------------------|
    #
    # 0.0.0.0/1  128.0.0.0/1
    # |----------------------|
    # |          |           | /1
    # |----------------------|
    # 0./2  64./2 128./2 192./2
    # |----------------------|
    # |     |    |     |     | /2
    # |----------------------|
    #
    # ....
    # ipd_range[cidr][prefix]
    #


    def __init__(self):
        pass

    def __multi_dict(self, K, type):
        if K == 1:
            return defaultdict(type)
        else:
            return defaultdict(lambda: self.__multi_dict(K-1, type))

    


    def __get_min_samples(cidr, ip_version=4):
        ipv_max = 32
        if ip_version == 6:
            ipv_max = 128
        return int(c * math.sqrt( math.pow(2, (ipv_max - cidr))))    




    def add_to_range(self, ip_version, prefix,last_seen,ingress):
        print(ip_version, prefix,last_seen,ingress)
        self.ipd_range[ip_version][cidr][prefix] = 
        pass



    def check_range():
        # check if enough samples in range
        # if there a prevalent color in range 

        pass

    
    

###################################################
########### ROUTER NAME <--> IP MAPPING ###########
###################################################
with open(router_ip_mapping_file, 'r') as csv_file:
    router_ip_mapping_csv = csv.reader(csv_file, delimiter=' ')
    router_ip_lookup_dict = {rows[0]:rows[1] for rows in router_ip_mapping_csv}

###################################################
###########     INGRESS LINK FILE       ###########
###################################################

print("> load ingresslink file")

ingresslink_dict= {}
with gzip.open("{}".format(ingresslink_file), 'rb') as f:
    for line in f:
        line = line.decode('utf-8').split(",")
        router= line[0].replace("PEER_SRC_IP=", "")
        in_iface= line[1].replace("IN_IFACE=", "")
        
        # ingresslink_list.append("{}.{}".format(router, in_iface))
        ingresslink_dict["{}.{}".format(router, in_iface)] = True
print("  ...done\n")

###################################################
###########     READ NETFLOW             ###########
###################################################

# reads a netflow file
# filter for ingress only
# mask ip to cidr_max
# insert ip into corresponding range

def read_netflow_file(netflow_file):
    ## netflow files sind auf stundenbasis e.g. @000000000000001605639660.gz und @000000000000001605643260.gz
    #
    # TAG     PEER_SRC_IP  IN IFACE OUT_IFACE SRC_IP          DST_NET        SRC_PORT DST_PORT PROTO  _       _       TS_START        TS_END    PKTS    BYTES
    # 0       194.25.7.141    13      1571    91.127.69.122   31.13.84.4      40730   443     tcp     0       i       1605639641      1605639641 1       121
    # 0       194.25.7.141    13      1530    91.228.166.91   66.171.29.67    80      45659   tcp     0       i       1605639641      1605639641 1       46
    # 0       194.25.7.141    646     17      46.125.249.104  5.187.227.210   42681   27046   udp     0       i       1605639641      1605639642 2       2184
    # 0       194.25.7.141    633     21      216.58.212.175  178.165.131.117 443     3462    udp     0       i       1605639641      1605639641 1       56
    #

    with gzip.open(netflow_file, 'rb') as f:
        for line in f:        
            line = line.decode('utf-8').split(",")
            router_name = router_ip_lookup_dict.get(line[1])
            if line[-3] == "TIMESTAMP_END": continue
            in_iface = line[2]

            ip_version = 4 if not ":" in line[4] else 6

            src_ip = ipaddress.ip_network("{}/{}".format(line[4], cidr_max), strict=False) # mask to cidr_max

            cur_ts = str(int(int(line[-3]) / t) * t) # ts is binned to t ~time between calculations

            # if router and in_iface not in ingresslink list -> skip since we only want ingress traffic
            if not ingresslink_dict.get("{}.{}".format(router_name,in_iface), False): continue
            
            ### here are only valid ingressing ips that are masked to /28 ###

            # insert IP into corresponding range
            ipd.add_to_range(ip_version, src_ip, cur_ts, "{}.{}".format(router_name, in_iface))

        


ipd = IPDRange()
read_netflow_file(netflow_path)

> load ingresslink file
  ...done

4 213.81.220.0/28 1605639600 VIE-SB5.13
4 143.244.58.208/28 1605639600 VIE-SB5.1605
4 91.127.66.48/28 1605639600 VIE-SB5.13
4 95.103.170.144/28 1605639600 VIE-SB5.13
4 62.178.57.160/28 1605639600 VIE-SB5.1605
4 143.244.58.208/28 1605639600 VIE-SB5.1605
4 152.195.34.144/28 1605639600 VIE-SB5.20
4 185.133.60.144/28 1605639600 VIE-SB5.13
4 80.110.125.80/28 1605639600 VIE-SB5.1605
4 78.99.213.128/28 1605639600 VIE-SB5.13
4 143.244.58.16/28 1605639600 VIE-SB5.1605
4 185.246.210.48/28 1605639600 VIE-SB5.1605
4 95.103.149.176/28 1605639600 VIE-SB5.13
4 152.195.34.144/28 1605639600 VIE-SB5.20
4 143.244.59.0/28 1605639600 VIE-SB5.1605
4 195.91.18.224/28 1605639600 VIE-SB5.13
4 93.184.221.128/28 1605639600 VIE-SB5.20
4 91.236.82.64/28 1605639600 VIE-SB5.13
4 212.17.74.16/28 1605639600 VIE-SB5.1605
4 213.81.199.0/28 1605639600 VIE-SB5.13
4 194.154.244.16/28 1605639600 VIE-SB5.13
4 78.98.78.224/28 1605639600 VIE-SB5.13
4 143.244.58.16/28 1605639600 VIE-SB5.1605
4

In [32]:
# example from lecture video

import math
c=64#0.00025
ip_version=4

ipv_max = 32
if ip_version == 6:
    ipv_max = 128

    

def get_min_samples(cidr):
    return int(c * math.sqrt( math.pow(2, (ipv_max - cidr))))

print(get_min_samples(7)) # 16
print(get_min_samples(24)) # 11
print(get_min_samples(25)) # 8
print(get_min_samples(26)) # 5
print(get_min_samples(27)) # 4
print(get_min_samples(28)) # 2


370727
1024
724
512
362
256


In [65]:
class IPDRange:

    ### range as hierarchical dicts
    
    # |----------------------|
    # |                      | /0
    # |----------------------|
    #
    # 0.0.0.0/1  128.0.0.0/1
    # |----------------------|
    # |          |           | /1
    # |----------------------|
    # 0./2  64./2 128./2 192./2
    # |----------------------|
    # |     |    |     |     | /2
    # |----------------------|
    #
    # ....
    # ipd_range[cidr][prefix]
    #


    def __init__(self):
        self.ipd_range=self.__multi_dict(3, self.__atts)
        pass

    def __atts(self):
        return {'last_seen': 0, 'match_counter' : 0, 'miss_counter' : 0 }
    
    def __multi_dict(self, K, type):
        if K == 1:
            return defaultdict(type)
        else:
            return defaultdict(lambda: self.__multi_dict(K-1, type))

    


    def __get_min_samples(cidr, ip_version=4):
        ipv_max = 32
        if ip_version == 6:
            ipv_max = 128
        return int(c * math.sqrt( math.pow(2, (ipv_max - cidr))))    




    def add_to_range(self, ip_version, prefix,last_seen,ingress):
        print(ip_version, prefix,last_seen,ingress)
        #self.ipd_range[ip_version][cidr][prefix] = 
        pass

    def add_to_range_test(self):
     
        self.ipd_range[4][12]["123.0.0.0"]["match_counter"] +=1
        self.ipd_range[4][12]["123.0.0.0"]["last_seen"] = 123
        

        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
        self.ipd_range[4][21]["80.12.15.0"]["last_seen"] = 123

        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
        self.ipd_range[4][21]["80.12.15.0"]["miss_counter"] +=1
                
        pass

    def show_ipd_range(self):
        print(self.ipd_range)

    def check_range():
        # check if enough samples in range
        # if there a prevalent color in range 

        pass



i = IPDRange()

i.add_to_range_test()
i.show_ipd_range()



defaultdict(<function IPDRange.__multi_dict.<locals>.<lambda> at 0x7ff896f29160>, {4: defaultdict(<function IPDRange.__multi_dict.<locals>.<lambda> at 0x7ff896f29670>, {12: defaultdict(<bound method IPDRange.__atts of <__main__.IPDRange object at 0x7ff896f498e0>>, {'123.0.0.0': {'last_seen': 123, 'match_counter': 1, 'miss_counter': 0}}), 21: defaultdict(<bound method IPDRange.__atts of <__main__.IPDRange object at 0x7ff896f498e0>>, {'80.12.15.0': {'last_seen': 123, 'match_counter': 0, 'miss_counter': 6}})})})


In [50]:
i[4][12]["123.0.0.0"]['last_seen'] = 123

TypeError: 'IPDRange' object is not subscriptable

In [47]:
k

defaultdict(<function __main__.IPDRange.multi_dict.<locals>.<lambda>()>,
            {4: defaultdict(<function __main__.IPDRange.multi_dict.<locals>.<lambda>()>,
                         {12: defaultdict(<function __main__.IPDRange.multi_dict.<locals>.<lambda>()>,
                                      {'123.0.0.0': defaultdict(<function __main__.atts()>,
                                                   {'last_seen': 123})})})})