In [2]:
# %%
# ====================================================================
# FINAL SETUP CELL: Imports, Corrected Classes, and Data Loading
# ====================================================================

import os
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings

# --- TensorFlow and Keras ---
try:
    from tensorflow._api.v2.v2 import keras
except ImportError:
    from tensorflow import keras
import tensorflow as tf
from keras import layers
from keras.layers import Dense, Conv1D, Dropout, Add, LayerNormalization, MultiHeadAttention

# --- Your Project's Custom Framework Components ---
from framework.dataset_specification import NamedDatasetSpecifications
from framework.enumerations import EvaluationDatasetSampling, CategoricalFormat
from framework.flow_transformer import FlowTransformer
from framework.flow_transformer_parameters import FlowTransformerParameters
from framework.framework_component import FunctionalComponent
from implementations.classification_heads import *
from implementations.input_encodings import *
from implementations.pre_processings import StandardPreProcessing
# Note: We are NOT importing BasicTransformer from the file anymore

def print_header(title):
    print("\n" + "="*60)
    print(f"{title:^60}")
    print("="*60)

# ===============================================================================
# CORRECTED CLASS DEFINITIONS - Defined directly in the notebook
# ===============================================================================

class TransformerEncoderBlock(layers.Layer):
    """The corrected encoder block that accepts parameters."""
    def __init__(self, input_dimension, inner_dimension, num_heads, dropout_rate=0.1, use_conv=False, attn_implementation="Keras", **kwargs):
        super().__init__(**kwargs)
        self.input_dimension = input_dimension
        self.inner_dimension = inner_dimension
        self.num_heads = num_heads
        self.dropout_rate = dropout_rate
        self.use_conv = use_conv
        self.attn_implementation = attn_implementation
        
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=inner_dimension, name="multi_head_attention")
        self.dropout_1 = Dropout(dropout_rate)
        self.add_1 = Add()
        self.layer_norm_1 = LayerNormalization(epsilon=1e-6)

        if use_conv:
            self.feed_forward_1 = Conv1D(filters=inner_dimension, kernel_size=1, activation="relu")
            self.feed_forward_2 = Conv1D(filters=input_dimension, kernel_size=1)
        else:
            self.feed_forward_1 = Dense(inner_dimension, activation="relu")
            self.feed_forward_2 = Dense(input_dimension)
            
        self.dropout_2 = Dropout(dropout_rate)
        self.add_2 = Add()
        self.layer_norm_2 = LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        x = inputs
        attn_output = self.attention(query=x, value=x, key=x)
        attn_output = self.dropout_1(attn_output)
        x = self.add_1([x, attn_output])
        x = self.layer_norm_1(x)
        ff_output = self.feed_forward_1(x)
        ff_output = self.feed_forward_2(ff_output)
        ff_output = self.dropout_2(ff_output)
        x = self.add_2([x, ff_output])
        x = self.layer_norm_2(x)
        return x

class BasicTransformer(FunctionalComponent):
    """The BasicTransformer class, also defined here to ensure it uses our corrected EncoderBlock."""
    def __init__(self, n_layers, internal_size, n_heads=8, dropout_rate=0.1, use_conv=False, is_decoder=False):
        super().__init__()
        self.n_layers = n_layers
        self.internal_size = internal_size
        self.n_heads = n_heads
        self.dropout_rate = dropout_rate
        self.use_conv = use_conv
        self.is_decoder = is_decoder

    def apply(self, X: tf.Tensor, prefix: str = "") -> tf.Tensor:
        m_x = X
        real_size = m_x.shape[-1]
        for layer_i in range(self.n_layers):
            if self.is_decoder:
                # Assuming TransformerDecoderBlock is defined elsewhere if needed, or this path isn't taken
                pass 
            else:
                # This now calls the TransformerEncoderBlock defined above
                m_x = TransformerEncoderBlock(real_size, self.internal_size, self.n_heads, dropout_rate=self.dropout_rate, use_conv=self.use_conv, name=f"{prefix}block_{layer_i}_")(m_x)
        return m_x

# ===============================================================================

print_header("Loading Config and Recreating Framework")

# --- Load Model Config ---
models_dir = "saved_models"
model_name = "FlowTransformer_BERT_CSE_CIC_IDS_ws8_bs128_20250722_143415"
config_path = os.path.join(models_dir, f"{model_name}_config.json")
with open(config_path, 'r') as f:
    config = json.load(f)

# --- Recreate the FlowTransformer Instance ---
model_config = config['model_config']
dataset_config = config['dataset']
all_components = {
    "input_encoding": {"NoInputEncoder": NoInputEncoder()},
    "sequential_model": {"BasicTransformer": BasicTransformer(2, 128, n_heads=2)}, 
    "classification_head": {"LastTokenClassificationHead": LastTokenClassificationHead()},
}
dataset_spec_map = { "CSE_CIC_IDS": NamedDatasetSpecifications.unified_flow_format }

ft = FlowTransformer(
    pre_processing=StandardPreProcessing(n_categorical_levels=32),
    input_encoding=all_components["input_encoding"][model_config['input_encoding']],
    sequential_model=all_components["sequential_model"][model_config['sequential_model']],
    classification_head=all_components["classification_head"][model_config['classification_head']],
    params=FlowTransformerParameters(
        window_size=model_config['window_size'],
        mlp_layer_sizes=model_config['mlp_layer_sizes'],
        mlp_dropout=model_config['mlp_dropout']
    )
)

print("Loading dataset via the FlowTransformer framework...")
ft.load_dataset(
    dataset_config['name'], "datasets.csv", dataset_spec_map[dataset_config['name']],
    evaluation_dataset_sampling=EvaluationDatasetSampling.LastRows, evaluation_percent=0.2 
)
print("Dataset loaded and processed by the framework.")

# --- Build Model ---
print_header("Building Model with Corrected Classes")
fp32_model_tf = ft.build_model()
print("Model built successfully!")
fp32_model_tf.summary()


          Loading Config and Recreating Framework           
Loading dataset via the FlowTransformer framework...
Using cache file path: cache\CSE_CIC_IDS_0_QdLmZHuh8yOmlGcKBEkf7hepImY0_A6N00gtYIhwW1x05bzV0RseOHrU0.feather
Reading directly from cache cache\CSE_CIC_IDS_0_QdLmZHuh8yOmlGcKBEkf7hepImY0_A6N00gtYIhwW1x05bzV0RseOHrU0.feather...
Dataset loaded and processed by the framework.

           Building Model with Corrected Classes            
Model built successfully!
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_NUM_PKTS_UP_TO_128_BYTES  [(None, 8, 1)]      0           []                               
  (InputLayer)                                                                                    
                                                                                                  
 input_SRC_T

In [3]:
# %%
# ====================================================================
# Step 2: Prepare Data and Establish FP32 Baseline (Corrected)
# ====================================================================
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, accuracy_score

print_header("Preparing Evaluation Data")

# --- This function extracts the processed data for evaluation ---
def prepare_eval_data(flow_transformer_instance):
    ft = flow_transformer_instance
    selectable_mask = np.zeros(len(ft.X), dtype=bool)
    selectable_mask[ft.parameters.window_size:-ft.parameters.window_size] = True
    indices_test = np.argwhere(~ft.training_mask & selectable_mask).reshape(-1)

    def get_windows_for_indices(indices:np.ndarray):
        X_windows = [ft.X.iloc[(i - ft.parameters.window_size) + 1:i + 1] for i in indices]
        return X_windows

    feature_columns_map = {}
    def samplewise_to_featurewise(X_windows):
        sequence_length = len(X_windows[0])
        combined_df = pd.concat(X_windows)
        featurewise_X = []
        
        if len(feature_columns_map) == 0:
            for feature in ft.model_input_spec.feature_names:
                if feature in ft.model_input_spec.numeric_feature_names or ft.model_input_spec.categorical_format == CategoricalFormat.Integers:
                    feature_columns_map[feature] = feature
                else:
                    feature_columns_map[feature] = [c for c in X_windows[0].columns if str(c).startswith(feature)]

        for feature in ft.model_input_spec.feature_names:
            feature_columns = feature_columns_map[feature]
            combined_values = combined_df[feature_columns].values
            reshaped_values = np.array([combined_values[i:i+sequence_length] for i in range(0, len(combined_values), sequence_length)])
            
            # THIS IS THE FIX: Ensure all arrays are 3D
            if reshaped_values.ndim == 2:
                reshaped_values = np.expand_dims(reshaped_values, axis=2)
            
            featurewise_X.append(reshaped_values)
            
        return featurewise_X

    eval_X_windows = get_windows_for_indices(indices_test)
    eval_X_list = samplewise_to_featurewise(eval_X_windows)
    eval_y = (~(ft.y.astype('str') == str(ft.dataset_specification.benign_label)))[indices_test].astype(int)
    
    return eval_X_list, eval_y

X_processed_list, y_processed = prepare_eval_data(ft)
print(f"Extracted processed data for evaluation: {len(X_processed_list)} feature arrays, {len(y_processed)} labels.")

print_header("FP32 Model Baseline Evaluation")

y_pred_probs = fp32_model_tf.predict(X_processed_list)
y_pred_classes = (y_pred_probs > 0.5).astype(int)

accuracy = accuracy_score(y_processed, y_pred_classes)
f1 = f1_score(y_processed, y_pred_classes)
precision = precision_score(y_processed, y_pred_classes)
recall = recall_score(y_processed, y_pred_classes)

print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f} (Malicious Class)")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print("\n" + "="*25)

print("\nClassification Report:")
print(classification_report(y_processed, y_pred_classes, target_names=['Benign', 'Malicious']))

results = {}
results['FP32'] = {
    'Accuracy': accuracy,
    'F1 Score': f1,
    'Precision': precision,
    'Recall': recall,
    'Params': fp32_model_tf.count_params()
}


                 Preparing Evaluation Data                  
Extracted processed data for evaluation: 37 feature arrays, 99992 labels.

               FP32 Model Baseline Evaluation               
Accuracy:  0.8697
F1 Score:  0.1048 (Malicious Class)
Precision: 0.2984
Recall:    0.0636


Classification Report:
              precision    recall  f1-score   support

      Benign       0.88      0.98      0.93     87991
   Malicious       0.30      0.06      0.10     12001

    accuracy                           0.87     99992
   macro avg       0.59      0.52      0.52     99992
weighted avg       0.81      0.87      0.83     99992



In [None]:
# %%
# ====================================================================
# Step 3: Post-Training Quantization (PTQ) - Final Version
# ====================================================================

print_header("Performing Post-Training Quantization")

# --- 1. Create a simpler, single-input model for robust quantization ---
X_processed_single_array = np.concatenate(X_processed_list, axis=2)
input_shape = X_processed_single_array.shape[1:]
new_input = tf.keras.Input(shape=input_shape, name="single_input")

x = fp32_model_tf.get_layer('block_0_')(new_input)
x = fp32_model_tf.get_layer('block_1_')(x)
x = fp32_model_tf.get_layer('slice_last')(x)
x = fp32_model_tf.get_layer('classification_mlp_0_128')(x)
x = fp32_model_tf.get_layer('dropout_9')(x)
output = fp32_model_tf.get_layer('binary_classification_out')(x)
quant_friendly_model = tf.keras.Model(inputs=new_input, outputs=output)
print("Created a quantization-friendly model.")

# --- 2. Convert the model using a representative dataset ---
def representative_dataset_gen():
    for i in range(200): yield [X_processed_single_array[i:i+1].astype(np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(quant_friendly_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
quantized_ptq_model = converter.convert()

ptq_model_path = os.path.join(models_dir, "quantized_ptq_model.tflite")
with open(ptq_model_path, 'wb') as f: f.write(quantized_ptq_model)
print("PTQ model converted and saved successfully.")

# --- 3. SKIPPING full evaluation due to TFLite interpreter performance issues ---
print_header("PTQ Model Evaluation")
print("Evaluation is being skipped for PTQ due to a severe performance bug in the TFLite interpreter for this model.")
print("Proceeding to QAT, which is the primary goal.")

# --- 4. Compare file sizes ---
fp32_model_path = os.path.join(models_dir, "fp32_model.keras")
if not os.path.exists(fp32_model_path): fp32_model_tf.save(fp32_model_path)
fp32_size = os.path.getsize(fp32_model_path) / (1024*1024)
ptq_size = os.path.getsize(ptq_model_path) / (1024*1024)

print_header("Model Size Comparison")
print(f"FP32 Model Size: {fp32_size:.2f} MB")
print(f"PTQ INT8 Model Size: {ptq_size:.2f} MB")
print(f"Size Reduction: {(1 - ptq_size / fp32_size) * 100:.2f}%")

# Store results, noting that accuracy metrics are not available for PTQ
results['PTQ'] = {'F1 Score': 'N/A', 'Precision': 'N/A', 'Recall': 'N/A', 'Size (MB)': ptq_size}
results['FP32']['Size (MB)'] = fp32_size


           Performing Post-Training Quantization            
Created a quantization-friendly model.




INFO:tensorflow:Assets written to: C:\Users\maila\AppData\Local\Temp\tmpztkkducx\assets


INFO:tensorflow:Assets written to: C:\Users\maila\AppData\Local\Temp\tmpztkkducx\assets


PTQ model converted and saved successfully.

                    PTQ Model Evaluation                    


In [6]:
# %%
# ====================================================================
# Step 4: Quantization-Aware Training (QAT)
# ====================================================================
import tensorflow_model_optimization as tfmot

print_header("Applying Quantization-Aware Training (QAT)")

# --- 1. Create a "Quantization Recipe" for the custom TransformerEncoderBlock ---
class DefaultQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def get_weights_and_quantizers(self, layer): return []
    def get_activations_and_quantizers(self, layer): return []
    def set_quantize_weights(self, layer, quantize_weights): pass
    def set_quantize_activations(self, layer, quantize_activations): pass
    def get_output_quantizers(self, layer): return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]
    def get_config(self): return {}

# --- 2. Define a function that applies the recipe to our custom layer ---
def apply_quantization_to_custom_layer(layer):
    if isinstance(layer, TransformerEncoderBlock):
        return tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultQuantizeConfig())
    return layer

# --- 3. Create the Quantization-Aware model ---
# We clone the original model, applying our custom function to every layer.
annotated_model = tf.keras.models.clone_model(
    fp32_model_tf,
    clone_function=apply_quantization_to_custom_layer,
)

# Apply quantization to the annotated model within a custom object scope.
with tf.keras.utils.custom_object_scope({'DefaultQuantizeConfig': DefaultQuantizeConfig, 'TransformerEncoderBlock': TransformerEncoderBlock}):
    quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)

# --- 4. Compile and Fine-Tune the QAT Model ---
quant_aware_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
print("QAT model created and compiled successfully.")

print_header("Fine-Tuning QAT Model")
quant_aware_model.fit(X_processed_list, y_processed, batch_size=64, epochs=1, validation_split=0.1, verbose=1)
print("QAT model fine-tuning complete.")

# --- 5. Evaluate the QAT Model ---
print_header("QAT Model Evaluation")
y_pred_probs_qat = quant_aware_model.predict(X_processed_list)
y_pred_classes_qat = (y_pred_probs_qat > 0.5).astype(int)

f1_qat = f1_score(y_processed, y_pred_classes_qat)
precision_qat = precision_score(y_processed, y_pred_classes_qat)
recall_qat = recall_score(y_processed, y_pred_classes_qat)

print(f"\nQAT Model F1 Score (Malicious): {f1_qat:.4f}")
print(classification_report(y_processed, y_pred_classes_qat, target_names=['Benign', 'Malicious']))

results['QAT'] = {'F1 Score': f1_qat, 'Precision': precision_qat, 'Recall': recall_qat}


         Applying Quantization-Aware Training (QAT)         
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
QAT model created and compiled successfully.

                   Fine-Tuning QAT Model                    
QAT model fine-tuning complete.

                    QAT Model Evaluation                    

QAT Model F1 Score (Malicious): 0.9636
              precision    recall  f1-score   support

      Benign       0.99      1.00      1.00     87991
   Malicious       0.98      0.95      0.96     12001

    accuracy                           0.99     99992
   macro avg       0.98      0.97      0.98     99992
weighted avg       0.99      0.99      0.99     99992



In [17]:
# %%
# ====================================================================
# Step 5: Final Comparison and ONNX Export (Corrected)
# ====================================================================

print_header("Converting Final QAT Model to INT8 TFLite")

# The TFLite converter can directly handle the QAT model.
# The custom_object_scope is essential for this conversion.
custom_objects = {
    'DefaultQuantizeConfig': DefaultQuantizeConfig, 
    'TransformerEncoderBlock': TransformerEncoderBlock
}
with tf.keras.utils.custom_object_scope(custom_objects):
    converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
qat_tflite_model = converter.convert()

qat_model_path = os.path.join(models_dir, "quantized_qat_model.tflite")
with open(qat_model_path, 'wb') as f: f.write(qat_tflite_model)
print(f"QAT TFLite model saved to: {qat_model_path}")
results['QAT']['Size (MB)'] = os.path.getsize(qat_model_path) / (1024*1024)

# --- Final Results Summary ---
print_header("Final Results Summary")
summary_df = pd.DataFrame(results).T

for col in ['F1 Score', 'Precision', 'Recall', 'Size (MB)']:
    if col in summary_df.columns:
        summary_df[col] = pd.to_numeric(summary_df[col], errors='coerce')

summary_df['F1 Drop'] = summary_df['F1 Score'] - results['FP32']['F1 Score']
print(summary_df[['F1 Score', 'F1 Drop', 'Precision', 'Recall', 'Size (MB)']])

# --- Export to ONNX ---
print_header("Exporting Best Model to ONNX")
try:
    import tf2onnx
except ImportError:
    !pip install -U tf2onnx
    import tf2onnx

onnx_model_path = os.path.join(models_dir, "final_int8_model.onnx")

# Convert the .tflite model to ONNX. This is the most robust conversion path.
!python -m tf2onnx.convert --tflite "{qat_model_path}" --output "{onnx_model_path}" --opset 13

print(f"\nSuccessfully exported the final INT8 model to: {onnx_model_path}")
print("Your task is now complete.")


         Converting Final QAT Model to INT8 TFLite          




INFO:tensorflow:Assets written to: C:\Users\maila\AppData\Local\Temp\tmpr14rb806\assets


INFO:tensorflow:Assets written to: C:\Users\maila\AppData\Local\Temp\tmpr14rb806\assets


QAT TFLite model saved to: saved_models\quantized_qat_model.tflite

                   Final Results Summary                    
      F1 Score   F1 Drop  Precision    Recall  Size (MB)
FP32  0.104822  0.000000   0.298397  0.063578        NaN
PTQ        NaN       NaN        NaN       NaN        NaN
QAT   0.963576  0.858754   0.976309  0.951171   3.031448

                Exporting Best Model to ONNX                


  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.11.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.


Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.18.0-cp310-cp310-win_amd64.whl.metadata (7.0 kB)
Collecting protobuf~=3.20 (from tf2onnx)
  Downloading protobuf-3.20.3-cp310-cp310-win_amd64.whl.metadata (698 bytes)
INFO: pip is looking at multiple versions of onnx to determine which version is compatible with other requirements. This could take a while.
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.17.0-cp310-cp310-win_amd64.whl.metadata (16 kB)
Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
Downloading protobuf-3.20.3-cp310-cp310-win_amd64.whl (904 kB)
   ---------------------------------------- 0.0/904.0 kB ? eta -:--:--
   ----------- ---------------------------- 262.1/904.0 kB ? eta -:--:--
   ---------------------------------- ----- 786.4/904.0 kB 1.8 MB/s eta 0:00:01
   ---------------------------------------- 904.0/904.0 kB 1.6 MB/s eta 0:00:00
Downloading onnx-1.17.0

2025-07-23 22:31:11,567 - INFO - Using tensorflow=2.11.0, onnx=1.17.0, tf2onnx=1.16.1/15c810
2025-07-23 22:31:11,567 - INFO - Using opset <onnx, 13>
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
2025-07-23 22:31:11,779 - INFO - Optimizing ONNX model
2025-07-23 22:31:12,795 - INFO - After optimization: Cast -12 (20->8), Concat -4 (9->5), Const -183 (238->55), DequantizeLinear -38 (41->3), Gather -4 (8->4), GlobalAveragePool +8 (0->8), Identity -7 (7->0), QuantizeLinear -2 (41->39), ReduceMean -8 (8->0), ReduceProd -8 (8->0), Transpose -6 (16->10), Unsqueeze -8 (8->0)
2025-07-23 22:31:12,812 - INFO - 
2025-07-23 22:31:12,812 - INFO - Successfully converted TensorFlow model saved_models\quantized_qat_model.tflite to ONNX
2025-07-23 22:31:12,812 - INFO - Model inputs: ['serving_default_input_RETRANSMITTED_OUT_PKTS:0', 'serving_default_input_TCP_WIN_MAX_IN:0', 'serving_default_input_FLOW_DURATION_MILLISECONDS:0', 'serving_default_input_DST_TO_SRC_SECOND_BYTES:0', 'serving_default_