In [9]:
# 1) Setup path to include webapp/ as a package
import sys, os
project_root = os.getcwd()
sys.path.insert(0, os.path.join(project_root, 'webapp'))
print('Added to PYTHONPATH:', sys.path[0])

Added to PYTHONPATH: /home/tferreira/Documents/Clustering/flexible-clustering/webapp/webapp


In [47]:
# 1. Make sure your project modules are on PYTHONPATH.
# If this notebook lives outside webapp/, adjust the path below:
# import sys; sys.path.append("/full/path/to/flexible-clustering/webapp")

from clustering.clustering_algorithms import fetch_cowrie_data, run_clustering
from clustering.preprocessing      import is_real_command, abstract_command_line_substitution
from clustering.similarity                     import distance_func
from fish.fishdbc                   import FISHDBC

import numpy as np
import pandas as pd

from sklearn.metrics import silhouette_score, silhouette_samples


In [73]:
# 2. Configure your data range and honeypot type.
honeypot_type = "cowrie"
from_date    = "2025-01-01T00:00:00"  # ISO format, inclusive
to_date      = "2025-07-17T23:59:59"  # up to yesterday

# 3. Fetch raw logs and filter/abstract
df = fetch_cowrie_data(honeypot_type, from_date, to_date, size=1000)
df = df[df['input'].notna()]

# Keep only “real” shell commands
filtered = [(i, cmd) for i, cmd in enumerate(df['input'].values)]
            # if is_real_command(cmd)]
indices, raw_cmds = zip(*filtered)
abstracts = [abstract_command_line_substitution(cmd) for cmd in raw_cmds]

print(f"Loaded {len(abstracts)} commands for clustering.")


Loaded 1000 commands for clustering.


In [13]:
# 4. Run FISHDBC on your abstracts
dist_fn = distance_func()          # your semantic distance
fish   = FISHDBC(dist_fn)
fish.update(abstracts)

# cluster() returns (labels, probs, stabilities, condensed_tree, slt, mst)
labels, *_ = fish.cluster()
labels = np.array(labels)
print(f"Cluster labels: {np.unique(labels)}")


Cluster labels: [-1  0  1  2  3  4  5  6  7  8  9 10 11]


In [14]:
labels

array([-1, 10,  7, ...,  6, -1,  4], shape=(9958,))

In [16]:
# 5. Build the pairwise distance matrix (O(n²) cost!)
n = len(abstracts)
D = np.zeros((n, n))

for i in range(n):
    for j in range(i+1, n):
        d = dist_fn(abstracts[i], abstracts[j])
        D[i, j] = D[j, i] = d

D = D - D.min()
# 6. Compute overall silhouette score
sil_avg = silhouette_score(D, labels, metric="precomputed")
print(f"Average silhouette score: {sil_avg:.3f}")


KeyboardInterrupt: 

In [24]:
S_df = pd.read_csv("databases/UpdatedSimilarity.csv", index_col=0)

In [27]:
from clustering.load_data import load_command_resources

# this gives you (… , similarity_matrix, …)
_, similarity_matrix, _, _ = load_command_resources()
dist_fn = distance_func()   # wraps geometric_distance(cmd1, cmd2, similarity_matrix)


In [111]:
from clustering.preprocessing import is_pure_string, group_commands_and_flags
from clustering.load_data import load_command_resources

_, similarity_matrix, _, _ = load_command_resources()

__all__ = ["geometric_distance", "distance_func"]

SIMILARITY_THRESHOLD = 1e-9

def geometric_distance1(cmd1, cmd2, sim_matrix):
    """
    Computes a semantic distance between two abstracted command lines using geometric mean
    of pairwise token similarities from a precomputed similarity matrix.

    - If either command is a pure string payload, distance is 1.0 (maximal).
    - Commands are tokenized and grouped using `group_commands_and_flags`.
    - Each token pair's similarity is retrieved and combined via geometric mean.
    - If no valid similarities are found, defaults to 1.0.

    Args:
        cmd1 (str): First abstracted command string.
        cmd2 (str): Second abstracted command string.
        sim_matrix (dict): Nested dictionary of pairwise token similarity scores.

    Returns:
        float: Distance in [0.0, 1.0], where 0 means highly similar, 1 means dissimilar.
    """

    if is_pure_string(cmd1) or is_pure_string(cmd2):
        return 1.0

    units1 = group_commands_and_flags(cmd1.strip())
    units2 = group_commands_and_flags(cmd2.strip())
    n = min(len(units1), len(units2))
    sims = []

    for u1, u2 in zip(units1[:n], units2[:n]):
        if u1.isupper() or u2.isupper() or '(' in u1 or '(' in u2:
            continue
        sims.append(sim_matrix.get(u1, {}).get(u2, 0.0))

    if not sims:
        return 1.0

    product = 1.0
    for s in sims:
        product *= max(s, SIMILARITY_THRESHOLD)

    geometric_mean = product ** (1.0 / len(sims))
    return np.clip(1.0 - geometric_mean, 0.0, 1.0)


def distance_func():
    """
    Returns a lambda function that computes the distance between two abstracted command strings
    using the global similarity matrix loaded at module-level.

    Returns:
        Callable: A two-argument function (cmd1, cmd2) → distance (float).
    """

    return lambda x, y: geometric_distance(x, y, similarity_matrix)


In [89]:
import numpy as np
from sklearn.metrics import pairwise_distances, silhouette_score

# abstracts is your list of N=9958 abstracted commands, in the same order as labels
X = np.array(abstracts)[:, None]   # shape (N,1), each entry is a string

# compute the N×N distance matrix in parallel
D = pairwise_distances(
    X,
    metric=lambda x, y: geometric_distance(x, y, similarity_matrix),
    n_jobs=-1
)

# sanity: zero out the diagonal
np.fill_diagonal(D, 0)


TypeError: cannot use a string pattern on a bytes-like object

In [31]:
import numpy as np
from sklearn.metrics import silhouette_score

# Number of samples
N = len(abstracts)

# 1) Build a numeric “feature” array of shape (N,1) whose only feature is the sample index
X_idx = np.arange(N, dtype=int).reshape(-1, 1)

# 2) Define a metric that maps back to your command strings
def idx_dist(a, b):
    # a and b are 1‑d numpy arrays of length 1, dtype float
    i = int(a[0])
    j = int(b[0])
    return dist_fn(abstracts[i], abstracts[j])

# 3) Call silhouette_score without precomputing the full D
sil = silhouette_score(
    X_idx,
    labels,
    metric=idx_dist,
    sample_size=1000,    # sample up to 1000 points to speed things up
    random_state=42,
    n_jobs=-1
)

print(f"Approximate silhouette (1 000 samples): {sil:.3f}")


Approximate silhouette (1 000 samples): 0.502


In [34]:
import numpy as np
from sklearn.metrics import silhouette_score
from clustering.similarity import distance_func
from fish.fishdbc         import FISHDBC

# -----------------------------------------------------------------------------
# 1) Prepare the “index” feature array and the idx→command metric
# -----------------------------------------------------------------------------
N = len(abstracts)
X_idx = np.arange(N, dtype=int).reshape(-1, 1)

def idx_dist(a, b):
    i = int(a[0])
    j = int(b[0])
    return dist_fn(abstracts[i], abstracts[j])

# -----------------------------------------------------------------------------
# 2) Sweep FISHDBC parameters and record the best silhouette
# -----------------------------------------------------------------------------
best = {
    'min_samples': None,
    'min_cluster_size': None,
    'method': None,
    'sil_score': -1.0
}

for min_s in [3, 5, 10, 20]:
    # you can also try different m, ef here:
    fish = FISHDBC(distance_func(),
                   min_samples=min_s,
                   m=5, ef=50,
                   vectorized=False)
    # build the index+MST once per setting
    fish.update(abstracts)
    
    for min_cs in [min_s, max(min_s*2, 1), max(min_s*5, 1)]:
        for method in ['eom', 'leaf']:
            labels, *_ = fish.cluster(
                min_cluster_size=min_cs,
                cluster_selection_method=method,
            )
            
            # compute (sampled) silhouette
            sil = silhouette_score(
                X_idx,
                labels,
                metric=idx_dist,
                sample_size=1000,
                random_state=42,
                n_jobs=-1
            )
            
            print(f"min_samples={min_s:2d}, min_cluster_size={min_cs:2d}, "
                  f"method={method:5s} → silhouette={sil:.3f}")
            
            if sil > best['sil_score']:
                best.update({
                    'min_samples':      min_s,
                    'min_cluster_size': min_cs,
                    'method':           method,
                    'sil_score':        sil
                })

print("\n🏆 Best setting:", best)


min_samples= 3, min_cluster_size= 3, method=eom   → silhouette=0.495
min_samples= 3, min_cluster_size= 3, method=leaf  → silhouette=0.495
min_samples= 3, min_cluster_size= 6, method=eom   → silhouette=0.495
min_samples= 3, min_cluster_size= 6, method=leaf  → silhouette=0.495
min_samples= 3, min_cluster_size=15, method=eom   → silhouette=0.495
min_samples= 3, min_cluster_size=15, method=leaf  → silhouette=0.495
min_samples= 5, min_cluster_size= 5, method=eom   → silhouette=0.502
min_samples= 5, min_cluster_size= 5, method=leaf  → silhouette=0.502
min_samples= 5, min_cluster_size=10, method=eom   → silhouette=0.502
min_samples= 5, min_cluster_size=10, method=leaf  → silhouette=0.502
min_samples= 5, min_cluster_size=25, method=eom   → silhouette=0.502
min_samples= 5, min_cluster_size=25, method=leaf  → silhouette=0.502
min_samples=10, min_cluster_size=10, method=eom   → silhouette=0.502
min_samples=10, min_cluster_size=10, method=leaf  → silhouette=0.502
min_samples=10, min_cluster_size=2

In [35]:
import numpy as np
from sklearn.metrics import silhouette_score

# Number of samples
N = len(abstracts)

# 1) Build a numeric “feature” array of shape (N,1) whose only feature is the sample index
X_idx = np.arange(N, dtype=int).reshape(-1, 1)

# 2) Define a metric that maps back to your command strings
def idx_dist(a, b):
    # a and b are 1‑d numpy arrays of length 1, dtype float
    i = int(a[0])
    j = int(b[0])
    return dist_fn(abstracts[i], abstracts[j])




Approximate silhouette all samples :) : 0.026


In [37]:
import numpy as np
from sklearn.metrics import silhouette_score

# Number of samples
N = len(abstracts)

# 1) Build a numeric “feature” array of shape (N,1) whose only feature is the sample index
X_idx = np.arange(N, dtype=int).reshape(-1, 1)

# 2) Define a metric that maps back to your command strings
def idx_dist(a, b):
    # a and b are 1‑d numpy arrays of length 1, dtype float
    i = int(a[0])
    j = int(b[0])
    print(i, " -> ", a)
    print(j, " -> ", b)
    return dist_fn(abstracts[i], abstracts[j])




In [None]:
# 3) Call silhouette_score without precomputing the full D
sil = silhouette_score(
    X_idx,
    fishdbc_labels,
    metric=idx_dist,
    random_state=42,
    n_jobs=-1
)

print(f"Approximate silhouette all samples :) : {sil:.3f}")

In [36]:
import numpy as np
import pandas as pd
import hdbscan
from sklearn.metrics import silhouette_score

# 1) Assume you already have:
#    - D: an (N×N) numpy array of pairwise distances (all ≥0, zero diagonal)
#    - abstracts: list of N commands
#    - labels_hdb: you’ll compute these below

# 2) Run HDBSCAN on the precomputed distances
#    Note: HDBSCAN (the python package) accepts a sparse or dense precomputed matrix:
clusterer = hdbscan.HDBSCAN(
    metric='precomputed',
    min_cluster_size=5,       # tweak as you like
    cluster_selection_method='eom'
)
labels_hdb = clusterer.fit_predict(D)  # labels in [-1 (noise), 0,1,2,…]

# 3) (Optional) filter out noise points if you don’t want them in the silhouette
mask = labels_hdb != -1
D_clean      = D[np.ix_(mask, mask)]
labels_clean = labels_hdb[mask]

# 4) Compute the silhouette score
sil = silhouette_score(
    D_clean,
    labels_clean,
    metric='precomputed'
)
print(f"HDBSCAN silhouette (no noise): {sil:.3f}")


ValueError: Negative values in data passed to `pairwise_distances`. Precomputed distance  need to have non-negative values..

In [25]:
sil = silhouette_score(D_clean, labels_clean, metric="precomputed")
print(f"Silhouette = {sil:.3f}")


IndexError: index 178 is out of bounds for axis 0 with size 178

In [18]:
D

array([[2.22044605e-16, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 2.22044605e-16, 9.99999999e-01, ...,
        9.99999999e-01, 9.99999999e-01, 9.99999999e-01],
       [1.00000000e+00, 9.99999999e-01, 2.22044605e-16, ...,
        9.99999999e-01, 9.99999999e-01, 9.99999999e-01],
       ...,
       [1.00000000e+00, 9.99999999e-01, 9.99999999e-01, ...,
        2.22044605e-16, 2.22044605e-16, 2.22044605e-16],
       [1.00000000e+00, 9.99999999e-01, 9.99999999e-01, ...,
        2.22044605e-16, 2.22044605e-16, 2.22044605e-16],
       [1.00000000e+00, 9.99999999e-01, 9.99999999e-01, ...,
        2.22044605e-16, 2.22044605e-16, 2.22044605e-16]],
      shape=(9958, 9958))

In [19]:
# 7. Per‐sample silhouette and plot
sil_vals = silhouette_samples(D, labels, metric="precomputed")

import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 5))
y_lower = 10

for cluster_id in np.unique(labels):
    ith_vals = np.sort(sil_vals[labels == cluster_id])
    size    = ith_vals.shape[0]
    y_upper = y_lower + size

    ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_vals, alpha=0.7)
    ax.text(-0.05, y_lower + 0.5 * size, str(cluster_id))
    y_lower = y_upper + 10

ax.axvline(x=sil_avg, color='red', linestyle='--')
ax.set_title("Silhouette Plot for FISHDBC Clusters")
ax.set_xlabel("Silhouette Coefficient")
ax.set_ylabel("Cluster Label")
ax.set_yticks([])
ax.set_xlim([-0.1, 1])
plt.show()


ModuleNotFoundError: No module named 'matplotlib'

In [90]:
import pandas as pd
import numpy as np
import time
from sklearn.metrics import silhouette_score
from scipy.stats import entropy
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json

# ====== Load Resources ======
similarity_matrix = pd.read_csv("databases/UpdatedSimilarity.csv", index_col=0)
purpose_db = pd.read_csv("databases/UpdatedCommandDB.csv")
purpose_lookup = dict(zip(purpose_db["label"], purpose_db["simplified_purpose"]))

# ====== Evaluation Functions ======
def safe_distance_matrix(sim_matrix):
    return np.clip(1 - sim_matrix, 0.0, 1.0)

def compute_silhouette(sim_matrix, labels):
    distance_matrix = safe_distance_matrix(sim_matrix)
    return silhouette_score(distance_matrix, labels, metric="precomputed")

def cluster_size_stats(labels):
    unique, counts = np.unique(labels, return_counts=True)
    return {
        "num_clusters": len(unique),
        "avg_size": np.mean(counts),
        "min_size": np.min(counts),
        "max_size": np.max(counts),
        "noise_points": np.sum(labels == -1)
    }

def intra_cluster_similarity(sim_matrix, labels):
    cluster_sims = {}
    for label in np.unique(labels):
        if label == -1:
            continue
        indices = np.where(labels == label)[0]
        if len(indices) < 2:
            continue
        sims = sim_matrix.iloc[indices, indices].values
        upper_triangle = sims[np.triu_indices_from(sims, k=1)]
        cluster_sims[label] = np.mean(upper_triangle)
    return cluster_sims

def compute_entropy_per_cluster(labels, commands):
    cluster_purposes = defaultdict(list)
    for i, label in enumerate(labels):
        if label == -1:
            continue
        cmd = commands[i]
        base_key = cmd.split()[0]
        purpose = purpose_lookup.get(base_key, "Unknown")
        cluster_purposes[label].append(purpose)

    entropy_per_cluster = {}
    for cid, plist in cluster_purposes.items():
        counts = pd.Series(plist).value_counts()
        probs = counts / counts.sum()
        entropy_per_cluster[cid] = entropy(probs)
    return entropy_per_cluster

# ====== Main Evaluation Runner ======
def evaluate_clustering(labels, commands, method_name, sim_matrix, output_dir="results"):
    start = time.time()
    
    sil_score = compute_silhouette(sim_matrix, labels)
    sizes = cluster_size_stats(labels)
    intra_sim = intra_cluster_similarity(sim_matrix, labels)
    entropy_vals = compute_entropy_per_cluster(labels, commands)
    
    duration = time.time() - start
    
    print(f"[✓] {method_name} done in {duration:.2f}s")
    
    # Save results
    os.makedirs(output_dir, exist_ok=True)
    with open(os.path.join(output_dir, f"{method_name}_summary.json"), "w") as f:
        json.dump({
            "silhouette": sil_score,
            "runtime_sec": duration,
            **sizes
        }, f, indent=2)

    pd.Series(intra_sim).to_csv(os.path.join(output_dir, f"{method_name}_intra_similarity.csv"))
    pd.Series(entropy_vals).to_csv(os.path.join(output_dir, f"{method_name}_entropy.csv"))

    # Optional: plot
    for name, series in [("intra_similarity", intra_sim), ("entropy", entropy_vals)]:
        sns.histplot(list(series.values()), bins=10)
        plt.title(f"{method_name} – {name.replace('_', ' ').title()}")
        plt.xlabel(name.replace('_', ' ').title())
        plt.ylabel("Cluster Count")
        plt.savefig(os.path.join(output_dir, f"{method_name}_{name}.png"))
        plt.clf()


In [127]:
def extract_leaf_labels(filtered_commands, ctree):
    """
    Assigns each filtered command to the leaf cluster it belongs to.
    Each `child` in the tree with `child_size == 1` is treated as a leaf cluster.

    Returns:
        labels: list[int], where labels[i] = cluster_id for command i
    """
    n = len(filtered_commands)
    labels = [-1] * n  # default to noise

    for parent, child, _, size in ctree:
        if size == 1 and child < n:  # child is a data point index
            labels[child] = child  # or assign parent if you want coarser granularity

    return labels


In [92]:
honeypot_type = "cowrie"
from_date    = "2025-01-01T00:00:00"  # ISO format, inclusive
to_date      = "2025-07-17T23:59:59"  # up to yesterday

In [54]:
clusters, ctree = run_clustering(honeypot_type,from_date,to_date,1000)  # from your main code

In [128]:
fishdbc_labels = extract_leaf_labels(filtered, ctree)

In [139]:
# from clustering.similarity import geometric_distance
from clustering.load_data import load_command_resources

# Load token-level similarity matrix
_, sim_matrix, _, _ = load_command_resources()

def compute_pairwise_distance_matrix(abstracts):
    n = len(abstracts)
    dist = np.zeros((n, n))
    for i in range(n):
        for j in range(i + 1, n):
            d = geometric_distance1(abstracts[i], abstracts[j], sim_matrix)
            dist[i, j] = dist[j, i] = d
    return dist

In [140]:
distance_matrix = compute_pairwise_distance_matrix(abstracts)

In [141]:
len(fishdbc_labels)

1000

In [142]:
len(distance_matrix)

1000

In [143]:
has_negatives = np.any(distance_matrix < 0)

print("Matrix has negative values:", has_negatives)

Matrix has negative values: False


In [121]:
negatives = distance_matrix[distance_matrix < 0]
print("Number of negative entries:", len(negatives))
print("Most negative value:", negatives.min())


Number of negative entries: 0


ValueError: zero-size array to reduction operation minimum which has no identity

In [116]:
distance_matrix = np.clip(distance_matrix, 0.0, 1.0)


In [131]:
print(set(fishdbc_labels))


{np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36), np.int64(37), np.int64(38), np.int64(39), np.int64(40), np.int64(41), np.int64(42), np.int64(43), np.int64(44), np.int64(45), np.int64(46), np.int64(47), np.int64(48), np.int64(49), np.int64(50), np.int64(51), np.int64(52), np.int64(53), np.int64(54), np.int64(55), np.int64(56), np.int64(57), np.int64(58), np.int64(59), np.int64(60), np.int64(61), np.int64(62), np.int64(63), np.int64(64), np.int64(65), np.int64(66), np.int64(67), np.int64(68), np.int64(69), np.int64(70), np.int64(71), n

In [133]:
print("Number of nodes in tree:", len(ctree))
print("Example rows:", ctree[:5])


Number of nodes in tree: 1020
Example rows: [(998, 987, 1., 1) (998, 983, 1., 1) (998, 941, 1., 1) (998, 932, 1., 1)
 (998, 924, 1., 1)]


In [136]:
for i, label in enumerate(labels):
    print(f"Command {i}: {filtered[i][1]} → Cluster {label}")


Command 0: /ip cloud print → Cluster -1
Command 1: uname -a → Cluster 0
Command 2: cat /proc/cpuinfo → Cluster 2
Command 3: ifconfig → Cluster 0
Command 4: ps -ef | grep '[Mm]iner' → Cluster -1
Command 5: echo Hi | cat -n → Cluster 1
Command 6: /ip cloud print → Cluster -1
Command 7: ls -la /dev/ttyGSM* /dev/ttyUSB-mod* /var/spool/sms/* /var/log/smsd.log /etc/smsd.conf* /usr/bin/qmuxd /var/qmux_connect_socket /etc/config/simman /dev/modem* /var/config/sms/* → Cluster 2
Command 8: ps | grep '[Mm]iner' → Cluster 0
Command 9: cd ~; chattr -ia .ssh; lockr -ia .ssh → Cluster -1
Command 10: cat /proc/cpuinfo | grep name | wc -l → Cluster 2
Command 11: rm -rf /tmp/secure.sh; rm -rf /tmp/auth.sh; pkill -9 secure.sh; pkill -9 auth.sh; echo > /etc/hosts.deny; pkill -9 sleep; → Cluster 2
Command 12: pkill -9 sleep → Cluster 0
Command 13: pkill -9 secure.sh → Cluster 0
Command 14: pkill -9 auth.sh → Cluster 0
Command 15: lockr -ia .ssh → Cluster -1
Command 16: free -m | grep Mem | awk '{print $2 ,

IndexError: list index out of range

In [122]:
distance_matrix

array([[0.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 0.        , 1.        , ..., 0.47212614, 1.        ,
        0.        ],
       [1.        , 1.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 0.47212614, 1.        , ..., 0.        , 1.        ,
        0.47212614],
       [1.        , 1.        , 1.        , ..., 1.        , 0.        ,
        1.        ],
       [1.        , 0.        , 1.        , ..., 0.47212614, 1.        ,
        0.        ]], shape=(1000, 1000))

In [144]:
from sklearn.metrics import silhouette_score
score = silhouette_score(distance_matrix, fishdbc_labels, metric="precomputed")
print("Silhouette Score:", score)

Silhouette Score: -0.0010009999990010002


In [61]:
# hdbscan_sem_labels = np.load("labels/hdbscan_sem_labels.npy")
# hdbscan_lex_labels = np.load("labels/hdbscan_lex_labels.npy")

# Run evaluation per pipeline
evaluate_clustering(fishdbc_labels, abstracts, "FISHDBC_semantic", similarity_matrix)
# evaluate_clustering(hdbscan_sem_labels, commands, "HDBSCAN_semantic", similarity_matrix)
# evaluate_clustering(hdbscan_lex_labels, commands, "HDBSCAN_lexical", similarity_matrix)  # optional if aligned

ValueError: Found input variables with inconsistent numbers of samples: [178, 10000]