In [1]:
import torch

print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("GPU name:", torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "No GPU found")

CUDA available: True
GPU count: 1
Current device: 0
GPU name: NVIDIA GeForce RTX 3050 Laptop GPU


Predictions using Logistic regression

In [3]:
import joblib

vectorizer = joblib.load("tfidf_vectorizer.pkl")
lr_model = joblib.load("logistic_regression_email_classifier.pkl")
print("✅ Traditional ML model and vectorizer loaded successfully")

✅ Traditional ML model and vectorizer loaded successfully


In [4]:
def predict_logistic(email_text):
    X = vectorizer.transform([email_text])
    prediction = lr_model.predict(X)[0]
    confidence = lr_model.predict_proba(X).max()
    return prediction, confidence


In [8]:
email = "Please review the attached document for the project updates and let me know your feedback."

label, score = predict_logistic(email)
print("Logistic Regression Prediction:", label)
print("Confidence:", round(score, 3))


Logistic Regression Prediction: request
Confidence: 0.998


In [9]:
email = "Thanks for your prompt response to my query."

label, score = predict_logistic(email)
print("Logistic Regression Prediction:", label)
print("Confidence:", round(score, 3))


Logistic Regression Prediction: feedback
Confidence: 0.999


In [26]:
email = " check for the database error"

label, score = predict_logistic(email)
print("Logistic Regression Prediction:", label)
print("Confidence:", round(score, 3))


Logistic Regression Prediction: complaint
Confidence: 0.999


DistiBERT predictions

In [14]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification


In [15]:
tokenizer = AutoTokenizer.from_pretrained("models/distilbert_classifier")
bert_model = AutoModelForSequenceClassification.from_pretrained(
    "models/distilbert_classifier"
)
bert_model.eval()
print("✅ DistilBERT model and tokenizer loaded successfully")

✅ DistilBERT model and tokenizer loaded successfully


In [17]:
label_map = {
    0: "complaint",
    1: "feedback",
    2: "other",
    3: "request",
    4: "spam"
}

def predict_distilbert(email_text):
    inputs = tokenizer(
        email_text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    )

    with torch.no_grad():
        outputs = bert_model(**inputs)

    probs = torch.softmax(outputs.logits, dim=1)
    confidence, predicted_class = torch.max(probs, dim=1)

    return label_map[predicted_class.item()], confidence.item()



In [27]:
email = "Check for the database error"

label, score = predict_distilbert(email)
print("DistilBERT Prediction:", label)
print("Confidence:", round(score, 3))


DistilBERT Prediction: complaint
Confidence: 0.995


In [19]:
email = "Please review the attached document for the project updates and let me know your feedback."

label, score = predict_logistic(email)
print("Logistic Regression Prediction:", label)
print("Confidence:", round(score, 3))


Logistic Regression Prediction: request
Confidence: 0.998


In [20]:
email = "Thanks for your prompt response to my query."

label, score = predict_logistic(email)
print("Logistic Regression Prediction:", label)
print("Confidence:", round(score, 3))


Logistic Regression Prediction: feedback
Confidence: 0.999


In [30]:
email = "my system has has been down for two days and needs urgent attention."

lr_label, lr_score = predict_logistic(email)
bert_label, bert_score = predict_distilbert(email)

print(f"Logistic Regression → {lr_label} ({lr_score:.2f})")
print(f"DistilBERT          → {bert_label} ({bert_score:.2f})")


Logistic Regression → other (0.74)
DistilBERT          → request (1.00)


Rule based predictons for edge cases

In [31]:
def apply_rule_based_correction(email_text, predicted_label):
    """
    Post-processing rule-based correction for edge cases.
    """

    text = email_text.lower()

    complaint_indicators = [
        "not working",
        "system down",
        "has been down",
        "failed",
        "error",
        "issue",
        "problem",
        "unable",
        "crashed",
        "urgent",
        "outage"
    ]

    # Apply rule only for ambiguous classes
    if predicted_label in ["request", "other"]:
        if any(keyword in text for keyword in complaint_indicators):
            return "complaint"

    return predicted_label


In [32]:
email = "My system has been down for two days and needs urgent attention."

ml_label, ml_score = predict_logistic(email)
final_label = apply_rule_based_correction(email, ml_label)

print("Logistic Regression (raw):", ml_label, "| confidence:", round(ml_score, 3))
print("Final Prediction:", final_label)
print("Confidence:", round(score, 3))

Logistic Regression (raw): other | confidence: 0.744
Final Prediction: complaint
Confidence: 0.995


In [33]:
email = "My system has been down for two days and needs urgent attention."

ml_label, ml_score = predict_distilbert(email)
final_label = apply_rule_based_correction(email, ml_label)

print("DistilBERT (raw):", ml_label, "| confidence:", round(ml_score, 3))
print("Final Prediction:", final_label)
print("Confidence:", round(score, 3))

DistilBERT (raw): request | confidence: 0.999
Final Prediction: complaint
Confidence: 0.995
