In [1]:
import numpy as np
import tensorflow as tf
from sklearn.cluster import DBSCAN, KMeans
from matplotlib import pyplot as plt
from math import inf

In [2]:
print(f' TensorFlow version: {tf.__version__}')
print(f' NumPy versio: {np.__version__}')

 TensorFlow version: 2.10.0
 NumPy versio: 1.23.4


## Algorithms implementation

### Stage 1 - Flow clustering

In [3]:
def subset_of_sets(_set: set, sets):
    return len([_ for s in sets if _set.issubset(s)]) > 0


def tp_cluster(clusters: set, noise: set, tp_ratio: float, tp_deviation: float):
    """
        Parameters
        clusters : set
            Set of DBSCAN cluster sets in descending throughput order
        noise : set
            Set of DBSCAN noice flows
        tp_ratio : float
            Ratio used to determine if two DBSCAN clusters can be combined into one TPCluster
        tp_deviation : float
            The relative distance a noise flow can be away from a TPCluster to be assigned to that cluster
    """

    r = 0
    cs = []
    for cluster in clusters:
        if not subset_of_sets(cluster, cs):
            cs.append(cluster)
            m = max(cluster)
            for cluster_k in clusters:
                if cluster_k is cluster: continue
                m_prim = max(cluster_k)
                if (1 - tp_ratio) * m < m_prim < m:
                    cs[r] += cluster_k
            r += 1
    for n_j in noise:
        delta_min = inf
        a = None
        for i in range(len(cs)):
            m = max(cs[i])
            if (-tp_deviation * m) <= (m - n_j) <= delta_min:
                delta_min = m - n_j
                a = i
        if a:
            cs[a] += n_j
        else:
            cs[0] += n_j


### Stage 2 - FOF computation

In [7]:
def compute_fof(clusters):
    """
        Parameters
        clusters : set
            Set of TPCluster sets

        Returns:
            FOF score for each flow in each cluster
    """
    k = len(clusters)
    for c_i in clusters:
        s = KMeans(n_clusters=k).fit(np.array(c_i))
        c_i_prim = max()

## Experiment