In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef
import matplotlib.pyplot as plt
from keras.losses import mean_squared_error
from tensorflow.keras import layers, Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Layer
import tensorflow.keras.ops as ops

In [4]:
benign_samples = pd.read_csv("../../Dataset_prep/CNN+VAE/benign_samples.csv")

In [5]:
xss_payloads = pd.read_csv("../../Dataset_prep/CNN+VAE/xss_payloads.csv")

In [6]:
for_split = xss_payloads.count()[1]



In [7]:
benign_samples.head()



In [8]:
benign_samples.info()



In [9]:
benign_samples = benign_samples[:775974]

In [10]:
xss_payloads.head()



In [11]:
benign_samples.drop(columns=['Unnamed: 0'], inplace=True)

In [12]:
benign_samples.head()



In [13]:
benign_samples.info()



In [14]:
xss_payloads.info()



In [15]:
cnn_dataset = pd.concat([benign_samples, xss_payloads], axis=0)
cnn_dataset.reset_index(drop=True, inplace=True)
cnn_dataset = cnn_dataset.sample(frac=1, random_state=42)  # Shuffle rows

In [16]:
cnn_dataset.info()



In [17]:
cnn_dataset.head()



In [18]:
def preprocess(texts, max_len=250, normalize=False):
    processed = []
    for text in texts:
        encoded = [ord(c) % 256 for c in text[:max_len]]
        encoded += [0] * (max_len - len(encoded))  # Pad with zeros
        if normalize:
            encoded = [x / 255.0 for x in encoded]  # Normalize for VAE
        processed.append(encoded)
    return np.array(processed)

In [19]:
cnn_dataset["payload"].shape



In [20]:
# Load and preprocess CNN dataset

cnn_X = preprocess(cnn_dataset["payload"], max_len=100, normalize=False).reshape(-1, 100, 1)
cnn_y = np.array(cnn_dataset["xss"]).reshape(-1, 1)

In [21]:
vae_X = preprocess(benign_samples["payload"], max_len=100, normalize=True).reshape(-1, 100, 1)

In [22]:
print(f"CNN dataset shape: {cnn_X.shape}, Labels shape: {cnn_y.shape}")
print(f"VAE dataset shape: {vae_X.shape}")



In [23]:
# Split CNN data
cnn_X_train, cnn_X_test, cnn_y_train, cnn_y_test = train_test_split(cnn_X, cnn_y, test_size=0.2, random_state=42)

In [24]:
# Split VAE data
vae_X_train, vae_X_test, _, _ = train_test_split(vae_X, vae_X, test_size=0.2, random_state=42)

### Training CNN input 100

In [25]:
cnn_model = tf.keras.Sequential([
    tf.keras.layers.DepthwiseConv1D(5, depth_multiplier=8, activation='relu', input_shape=(100, 1)),
    tf.keras.layers.Conv1D(16, 1, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(1, activation='sigmoid')
]) 

cnn_model.summary()
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])













In [26]:
early_stopping_cnn = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint_filepath_cnn = './'
model_checkpoint_callback_cnn = tf.keras.callbacks.ModelCheckpoint(
    "best_CNN.keras",
    monitor='val_loss',
    mode='min',
    save_best_only=True)

In [27]:
history_cnn = cnn_model.fit(cnn_X_train, cnn_y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping_cnn, model_checkpoint_callback_cnn])



In [28]:
try:
    cnn_model = tf.keras.models.load_model("best_CNN.keras")
except Exception as e :
    print(e)

In [29]:
y_pred_probs_cnn = cnn_model.predict(cnn_X_test)
y_pred_cnn = (y_pred_probs_cnn > 0.5).astype(int)



In [30]:
precision_cnn = precision_score(cnn_y_test, y_pred_cnn)
recall_cnn = recall_score(cnn_y_test, y_pred_cnn)
f1_cnn = f1_score(cnn_y_test, y_pred_cnn)
roc_auc_cnn = roc_auc_score(cnn_y_test, y_pred_probs_cnn)
mcc_cnn = matthews_corrcoef(cnn_y_test, y_pred_cnn)

print("CNN Evaluation:")
print(f"Precision: {precision_cnn}")
print(f"Recall: {recall_cnn}")
print(f"F1-score: {f1_cnn}")
print(f"ROC AUC: {roc_auc_cnn}")
print(f"MCC: {mcc_cnn}")
print(f"Training Accuracy: {history_cnn.history['accuracy'][-1]}")
print(f"Training Loss: {history_cnn.history['loss'][-1]}")
print(f"Validation Accuracy: {history_cnn.history['val_accuracy'][-1]}")
print(f"Validation Precision: {history_cnn.history['val_precision'][-1]}")
print(f"Validation Recall: {history_cnn.history['val_recall'][-1]}")
print(f"Validation Loss: {history_cnn.history['val_loss'][-1]}")
print(f"Validation Accuracy: {history_cnn.history['val_accuracy'][-1]}")

cnn_model.save('xss_depthwise_cnn.h5')
# Convert: tensorflowjs_converter --input_format keras --quantize_float16 xss_depthwise_cnn xss_depthwise_cnn_js





In [31]:
def plot_history(history, model_name):
    """Plots training and validation accuracy and loss."""
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

In [32]:
plot_history(history_cnn, 'CNN')



In [33]:
import tensorflow as tf
import numpy as np

test_samples = ["<p>Test</p>", "<script>alert(1)</script>"]
test_X = preprocess(test_samples, max_len=100, normalize=False).reshape(-1, 100, 1)
predictions = cnn_model.predict(test_X)

print("CNN Predictions:", predictions)

for i, prediction in enumerate(predictions):
    if prediction[0] > 0.5:
        result = "Malicious"
    else:
        result = "Benign"
    print(f"Sample {i+1}: '{test_samples[i]}' - Prediction: {prediction[0]:.4f} - {result}")



In [34]:
#Saving for tensorflow js
cnn_model.export('xss_depthwise_cnn')







In [35]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef
import matplotlib.pyplot as plt
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load data
benign_samples = pd.read_csv("../../Dataset_prep/CNN+VAE/benign_samples.csv")
xss_payloads = pd.read_csv("../../Dataset_prep/CNN+VAE/xss_payloads.csv")

benign_samples = benign_samples[:775974]
benign_samples.drop(columns=['Unnamed: 0'], inplace=True)

cnn_dataset = pd.concat([benign_samples, xss_payloads], axis=0)
cnn_dataset.reset_index(drop=True, inplace=True)
cnn_dataset = cnn_dataset.sample(frac=1, random_state=42)

def preprocess(texts, max_len=100): # Removed normalize. normalization is not used in the CNN
    """Preprocesses text for CNN, ensuring consistent input shape."""
    processed = []
    for text in texts:
        encoded = [ord(c) % 256 for c in text[:max_len]]
        encoded += [0] * (max_len - len(encoded))
        processed.append(encoded)
    return np.array(processed)

# Preprocess data
cnn_X = preprocess(cnn_dataset["payload"], max_len=100).reshape(-1, 100, 1)
cnn_y = np.array(cnn_dataset["xss"]).reshape(-1, 1)

# Split data
cnn_X_train, cnn_X_test, cnn_y_train, cnn_y_test = train_test_split(
    cnn_X, cnn_y, test_size=0.2, random_state=42)

# Define CNN model (explicit input shape)
cnn_model = tf.keras.Sequential([
    layers.Input(shape=(100, 1)), #Explicit Input layer
    layers.DepthwiseConv1D(5, depth_multiplier=8, activation='relu'),
    layers.Conv1D(16, 1, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(1, activation='sigmoid')
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(
    "best_CNN.keras", monitor='val_loss', mode='min', save_best_only=True)

# Train the model
history = cnn_model.fit(cnn_X_train, cnn_y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

# Load best model
try:
    cnn_model = tf.keras.models.load_model("best_CNN.keras")
except Exception as e:
    print(e)

# Evaluate the model
y_pred_probs = cnn_model.predict(cnn_X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

precision = precision_score(cnn_y_test, y_pred)
recall = recall_score(cnn_y_test, y_pred)
f1 = f1_score(cnn_y_test, y_pred)
roc_auc = roc_auc_score(cnn_y_test, y_pred_probs)
mcc = matthews_corrcoef(cnn_y_test, y_pred)

print("CNN Evaluation:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")
print(f"ROC AUC: {roc_auc}")
print(f"MCC: {mcc}")
print(f"Training Accuracy: {history.history['accuracy'][-1]}")
print(f"Training Loss: {history.history['loss'][-1]}")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]}")






In [None]:
print(f"Validation Precision: {history.history['val_precision'][-1]}")
print(f"Validation Recall: {history.history['val_recall'][-1]}")
print(f"Validation Loss: {history.history['val_loss'][-1]}")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]}")

# Save for TensorFlow.js
cnn_model.save('xss_depthwise_cnn.h5')

# Plotting function
def plot_history(history, model_name):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

plot_history(history, 'CNN')

# Test predictions
test_samples = ["<p>Test</p>", "<script>alert(1)</script>"]
test_X = preprocess(test_samples, max_len=100).reshape(-1, 100, 1)
predictions = cnn_model.predict(test_X)

print("CNN Predictions:", predictions)

for i, prediction in enumerate(predictions):
    result = "Malicious" if prediction[0] > 0.5 else "Benign"
    print(f"Sample {i+1}: '{test_samples[i]}' - Prediction: {prediction[0]:.4f} - {result}")

# Convert for TensorFlow.js
# Use the input_shape flag to ensure the json file has the correct value.
# !tensorflowjs_converter --input_format keras --input_shape 100,1 xss_depthwise_cnn.h5 xss_depthwise_cnn_js









In [None]:
cnn_model.export('xss_depthwise_cnn')





