<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20251126%5D%20Confusion%20matrices%20of%20RF-LSTM%20from%20your%20CSV%20files%20on%203%20datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
    import csv
    import numpy as np

    def read_confusion_matrix(filepath):
        with open(filepath, 'r') as f:
            reader = csv.reader(f)
            next(reader)  # skip the header
            data = []
            for row in reader:
                # Check the last element, if it's a string and not a number, remove it
                if row and not row[-1].isdigit():
                    row = row[:-1]
                # Convert the remaining to integers
                data.append([int(x) for x in row])
            return np.array(data)

In [None]:
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np
    import csv

    # Set the style to grayscale for the figure? We are using grayscale colormap, but the figure background is white.

    # Read the matrices
    cm_wsnbfsf = read_confusion_matrix('cm_rf_lstm_wsnbfsf.csv')
    cm_wsnds = read_confusion_matrix('cm_rf_lstm_wsnds.csv')
    cm_unsw = read_confusion_matrix('cm_rf_lstm_unsw.csv')

    # Class labels
    classes_wsnbfsf = ['Normal', 'Blackhole', 'Forwarding', 'Flooding']
    classes_wsnds = ['Normal', 'Blackhole', 'Flooding', 'Grayhole', 'TDMA']
    classes_unsw = ['Normal', 'Analysis', 'Backdoor', 'DoS', 'Exploits', 'Fuzzers', 'Generic', 'Recon', 'Shellcode', 'Worms']

    # Create the figure
    #fig, axes = plt.subplots(1, 3, figsize=(20, 6), gridspec_kw={'width_ratios': [4, 5, 10]})
    fig, axes = plt.subplots(1, 3, figsize=(20, 5), gridspec_kw={'width_ratios': [6, 6, 10]})

    # We'll create a list of the datasets and their class labels
    datasets = [
        (cm_wsnbfsf, classes_wsnbfsf, 'WSN-BFSF'),
        (cm_wsnds, classes_wsnds, 'WSN-DS'),
        (cm_unsw, classes_unsw, 'UNSW-NB15')
    ]

    # We'll create a common normalization for the colorbar
    vmin, vmax = 0, 1

    # We'll create the annotation for each cell: raw (normalized)
    for ax, (raw_cm, classes, title) in zip(axes, datasets):
        n = len(classes)
        # Normalize the confusion matrix by row (recall)
        row_sums = raw_cm.sum(axis=1, keepdims=True)
        normalized_cm = raw_cm / row_sums
        normalized_cm = np.nan_to_num(normalized_cm)

        # Create annotations
        annotations = []
        for i in range(n):
            row = []
            for j in range(n):
                # Format: raw (normalized)
                # If the normalized value is 0, we don't want to show 0.00? But we show.
                row.append(f"{raw_cm[i, j]}\n({normalized_cm[i, j]:.4f})")
            annotations.append(row)

        # Plot the heatmap
        sns.heatmap(normalized_cm, annot=annotations, fmt='', cmap='gray_r', cbar=False,
                    xticklabels=classes, yticklabels=classes, ax=ax, vmin=vmin, vmax=vmax,
                    linewidths=0.5, linecolor='gray')
        ax.set_title(title)
        ax.set_xlabel("Predicted")
        ax.set_ylabel("True")

    # Adjust the layout to make space for the colorbar
    #plt.subplots_adjust(right=0.9)
    plt.subplots_adjust(right=0.9)

    # Add a colorbar
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
    sm = plt.cm.ScalarMappable(cmap='gray_r', norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm.set_array([])
    cbar = fig.colorbar(sm, cax=cbar_ax)
    cbar.set_label('Normalized Value (Recall)')

    # Save the figure
    plt.savefig('confusion_matrices.png', dpi=450, bbox_inches='tight')
    plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---------------------------------------------------
# 1) Load confusion matrices from your CSV files
# ---------------------------------------------------
# Using the custom function to correctly read confusion matrices
cm_wsnbfsf = read_confusion_matrix("cm_rf_lstm_wsnbfsf.csv")
cm_wsnds   = read_confusion_matrix("cm_rf_lstm_wsnds.csv")
cm_unsw    = read_confusion_matrix("cm_rf_lstm_unsw.csv")

# Sanity check shapes
print("WSN-BFSF CM shape:", cm_wsnbfsf.shape)
print("WSN-DS   CM shape:", cm_wsnds.shape)
print("UNSW     CM shape:", cm_unsw.shape)

# ---------------------------------------------------
# 2) Define class labels (in the same order as indices)
# ---------------------------------------------------
labels_wsnbfsf = ["Normal", "Blackhole", "Forwarding", "Flooding"]
labels_wsnds   = ["Normal", "Blackhole", "Flooding", "Grayhole", "TDMA"]
labels_unsw    = [
    "Normal", "Analysis", "Backdoor", "DoS", "Exploits",
    "Fuzzers", "Generic", "Recon.", "Shellcode", "Worms"
]

# ---------------------------------------------------
# 3) Helper: row-normalize + plot confusion matrix
# ---------------------------------------------------
def normalize_rows(cm):
    """Row-normalize confusion matrix (per true class)."""
    row_sums = cm.sum(axis=1, keepdims=True)
    # avoid division by zero
    row_sums[row_sums == 0] = 1
    return cm / row_sums

def plot_confusion_matrix(ax, cm, class_names, title, show_colorbar=False):
    """
    cm: raw confusion matrix (counts)
    class_names: list of labels
    title: subplot title
    """
    cm_norm = normalize_rows(cm)  # values in [0,1]

    im = ax.imshow(cm_norm, interpolation="nearest", cmap="Greys")

    # Optional colorbar (attach only once outside if needed)
    if show_colorbar:
        plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)

    ax.set_title(title, fontsize=9)
    ax.set_xlabel("Predicted class", fontsize=8)
    ax.set_ylabel("True class", fontsize=8)

    num_classes = len(class_names)
    ax.set_xticks(np.arange(num_classes))
    ax.set_yticks(np.arange(num_classes))
    ax.set_xticklabels(class_names, rotation=45, ha="right", fontsize=7)
    ax.set_yticklabels(class_names, fontsize=7)

    # Annotate with percentages
    # For UNSW (10x10) we keep text a bit smaller
    for i in range(num_classes):
        for j in range(num_classes):
            value = cm_norm[i, j] * 100.0
            text = f"{value:.1f}"
            ax.text(
                j, i, text,
                ha="center", va="center",
                fontsize=6 if num_classes > 6 else 7
            )

    ax.set_xlim(-0.5, num_classes - 0.5)
    ax.set_ylim(num_classes - 0.5, -0.5)  # keep origin at top-left
    ax.grid(False)

# ---------------------------------------------------
# 4) Create a 3-panel ACM-style grayscale figure
# ---------------------------------------------------
fig, axes = plt.subplots(1, 3, figsize=(9.0, 3.0))

plot_confusion_matrix(
    axes[0],
    cm_wsnds,
    labels_wsnds,
    title="(a) WSN-DS (5 classes)",
    show_colorbar=False
)

plot_confusion_matrix(
    axes[1],
    cm_wsnbfsf,
    labels_wsnbfsf,
    title="(b) WSN-BFSF (4 classes)",
    show_colorbar=False
)

plot_confusion_matrix(
    axes[2],
    cm_unsw,
    labels_unsw,
    title="(c) UNSW-NB15 (10 classes)",
    show_colorbar=False
)

plt.tight_layout()
plt.savefig("cm_rf_lstm_all_datasets_acm.png", dpi=300, bbox_inches="tight")
plt.show()