<a href="https://colab.research.google.com/github/fuchuin19/SafeX---Your-AI-powered-and-real-time-safety-expert/blob/main/SafeX_TFLite_Triage_Training_py312.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SafeX TFLite Triage Training (Python 3.12 Compatible)

This version removes **tflite-model-maker** and uses **pure TensorFlow / Keras**, which works on Python 3.12.

In [1]:
# === 1) Imports (Python 3.12 compatible) ===
import os, random, json
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

tf.get_logger().setLevel('ERROR')
random.seed(42)
np.random.seed(42)


In [2]:
# === 2) Labels ===
MULTICLASS_LABELS = [
    "BENIGN",
    "INVESTMENT",
    "ROMANCE",
    "DELIVERY",
    "JOB",
    "IMPERSONATION",
    "OTHER",
]

def to_binary_label(multiclass_label: str) -> str:
    return "BENIGN" if multiclass_label == "BENIGN" else "SCAM"


Dataset generation logic remains unchanged (templates, sampling, CSV creation).

In [4]:
# === 3) Load CSVs ===
train_df = pd.read_csv("train_multiclass.csv")
test_df = pd.read_csv("test_multiclass.csv")

label_to_id = {l:i for i,l in enumerate(MULTICLASS_LABELS)}
id_to_label = {i:l for l,i in label_to_id.items()}

y_train = train_df["label"].map(label_to_id).values
y_test = test_df["label"].map(label_to_id).values

x_train = train_df["text"].values
x_test = test_df["text"].values


In [6]:
# === 4) Text Vectorization ===
MAX_TOKENS = 20000
SEQ_LEN = 100

vectorizer = layers.TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode="int",
    output_sequence_length=SEQ_LEN,
)

vectorizer.adapt(x_train)


In [7]:
# === 5) Model ===
model = tf.keras.Sequential([
    vectorizer,
    layers.Embedding(MAX_TOKENS, 64),
    layers.GlobalAveragePooling1D(),
    layers.Dense(64, activation="relu"),
    layers.Dense(len(MULTICLASS_LABELS), activation="softmax"),
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

model.summary()


In [8]:
# === 6) Train ===
history = model.fit(
    x_train,
    y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=32
)

loss, acc = model.evaluate(x_test, y_test)
print("Test accuracy:", acc)


Epoch 1/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.1678 - loss: 1.9368 - val_accuracy: 0.4060 - val_loss: 1.8291
Epoch 2/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5327 - loss: 1.6582 - val_accuracy: 0.6345 - val_loss: 1.0924
Epoch 3/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8218 - loss: 0.7944 - val_accuracy: 0.9429 - val_loss: 0.4738
Epoch 4/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9209 - loss: 0.4285 - val_accuracy: 1.0000 - val_loss: 0.2857
Epoch 5/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9814 - loss: 0.2700 - val_accuracy: 0.9869 - val_loss: 0.1960
Epoch 6/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.9958 - loss: 0.1669 - val_accuracy: 0.9869 - val_loss: 0.1272
Epoch 7/10
[1m237/237

In [11]:
# === 7) Export to TFLite ===
# The error occurs because the TextVectorization layer contains string operations
# that are not directly convertible to TFLite operations by default,
# especially with optimizations enabled.
# To fix this, we will create a new model that excludes the TextVectorization layer.
# This new model will expect pre-vectorized integer inputs.

# Create a new model that starts from the Embedding layer
numerical_input_model = tf.keras.Sequential([
    layers.Embedding(MAX_TOKENS, 64, input_length=SEQ_LEN), # Keep input_length for now, address deprecation later if needed.
    layers.GlobalAveragePooling1D(),
    layers.Dense(64, activation="relu"),
    layers.Dense(len(MULTICLASS_LABELS), activation="softmax"),
])

# Explicitly build the model to ensure its layers are initialized and have weights.
# The input shape should be (batch_size, sequence_length). None for batch_size.
numerical_input_model.build(input_shape=(None, SEQ_LEN))

# Transfer weights from the original model's corresponding layers
# The original model's layers are: vectorizer, Embedding, GlobalAveragePooling1D, Dense (64), Dense (softmax)

# Transfer weights for the Embedding layer
# numerical_input_model.layers[0] is the new Embedding layer
# model.layers[1] is the original Embedding layer
numerical_input_model.layers[0].set_weights(model.layers[1].get_weights())

# GlobalAveragePooling1D layers do not have trainable weights, so no weights need to be transferred.
# We skip numerical_input_model.layers[1] and model.layers[2] as they are GlobalAveragePooling1D.

# Transfer weights for the first Dense layer
# numerical_input_model.layers[2] is the new 64-unit Dense layer
# model.layers[3] is the original 64-unit Dense layer
numerical_input_model.layers[2].set_weights(model.layers[3].get_weights())

# Transfer weights for the output Dense layer
# numerical_input_model.layers[3] is the new output Dense layer
# model.layers[4] is the original output Dense layer
numerical_input_model.layers[3].set_weights(model.layers[4].get_weights())

# Compile this new model (though not strictly necessary for conversion, good practice)
numerical_input_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

converter = tf.lite.TFLiteConverter.from_keras_model(numerical_input_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

os.makedirs("safex_triage_multiclass", exist_ok=True)
tflite_path = "safex_triage_multiclass/model.tflite"

with open(tflite_path, "wb") as f:
    f.write(tflite_model)

print("TFLite model saved to:", tflite_path)


Saved artifact at '/tmp/tmpsk_0z20x'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name='keras_tensor_6')
Output Type:
  TensorSpec(shape=(None, 7), dtype=tf.float32, name=None)
Captures:
  135189216545104: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135189216543376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135189216545872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135189216543568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135189216549328: TensorSpec(shape=(), dtype=tf.resource, name=None)




TFLite model saved to: safex_triage_multiclass/model.tflite
