<a href="https://colab.research.google.com/github/sarmi2325/AI_Portfolio/blob/main/intent_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
import joblib

# Load the dataset
df = pd.read_csv("intent_dataset_5000.csv")

# Split the data
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["intent"], test_size=0.2, random_state=42)

# Define models to compare
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "LinearSVC": LinearSVC(),
    "MultinomialNB": MultinomialNB(),
    "RandomForest": RandomForestClassifier(n_estimators=100)
}

# Track best model
best_model = None
best_metrics = {"model": "", "accuracy": 0, "precision": 0, "recall": 0}

# Train and evaluate each model
for name, model in models.items():
    print(f"\n🔍 Training {name}...")

    # Create pipeline with TF-IDF + classifier
    clf = Pipeline([
        ('tfidf', TfidfVectorizer(ngram_range=(1, 2), max_features=3000)),
        ('model', model)
    ])

    # Train
    clf.fit(X_train, y_train)

    # Predict
    y_pred = clf.predict(X_test)

    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')

    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

    # Save best
    if accuracy > best_metrics["accuracy"]:
        best_model = clf
        best_metrics = {
            "model": name,
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall
        }

# Save the best model
joblib.dump(best_model, "best_intent_classifier.joblib")

# Final output
print("\nBest Model:", best_metrics["model"])
print(f"Accuracy: {best_metrics['accuracy']:.4f}")
print(f"Precision: {best_metrics['precision']:.4f}")
print(f"Recall: {best_metrics['recall']:.4f}")



🔍 Training LogisticRegression...
Accuracy: 0.9944, Precision: 0.9951, Recall: 0.9942

🔍 Training LinearSVC...
Accuracy: 0.9958, Precision: 0.9958, Recall: 0.9957

🔍 Training MultinomialNB...
Accuracy: 0.9916, Precision: 0.9915, Recall: 0.9919

🔍 Training RandomForest...
Accuracy: 0.9902, Precision: 0.9910, Recall: 0.9898

Best Model: LinearSVC
Accuracy: 0.9958
Precision: 0.9958
Recall: 0.9957
