In [None]:
import tensorflow as tf
import json
import numpy as np
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. Load the trained model
model = tf.keras.models.load_model('/content/best_high_accuracy_model.h5')

# 2. Load tokenizer config
with open('/content/high_accuracy_tokenizer_config.json', 'r') as f:
    config = json.load(f)
word_index = config['word_index']
max_len = config['max_len']
categories = config['categories']

# 3. Preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'http[s]?://\S+', 'URL', text)
    text = re.sub(r'www\.\S+', 'URL', text)
    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', 'EMAIL', text)
    text = re.sub(r'\b\d+\b', 'NUMBER', text)
    text = re.sub(r'[^\w\s]', ' ', text)
    text = ' '.join(text.split())
    return text

def text_to_sequence(text):
    words = text.split()
    sequence = [ word_index.get(w, 1) for w in words ]  # 1 => OOV token
    return pad_sequences([sequence], maxlen=max_len, padding='post', truncating='post')

def predict_email_category(text):
    processed = preprocess_text(text)
    seq = text_to_sequence(processed)
    probabilities = model.predict(seq, verbose=0)[0]
    idx = np.argmax(probabilities)
    category = categories[idx]
    confidence = float(probabilities[idx])
    all_probs = {categories[i]: float(probabilities[i]) for i in range(len(categories))}
    return {
        'predicted_category': category,
        'confidence': confidence,
        'all_probabilities': all_probs
    }

# 4. Example usage
email_text = "Your verification code is 123456. Please enter this code."
result = predict_email_category(email_text)
print("Category:", result['predicted_category'])
print("Confidence:", result['confidence'])
print("All probabilities:", result['all_probabilities'])
