In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from io import StringIO
from scipy.stats import randint
from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer
from sklearn.feature_selection import chi2
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, accuracy_score
import joblib

# Load dataset
df = pd.read_csv("complaintsv3.csv", encoding='unicode_escape')
df1 = df[['cat', 'Grievance Category']].copy()
df1 = df1[pd.notnull(df1['Grievance Category'])]
df1.columns = ['cat', 'Grievance Category']

df1['category_id'] = df1['cat'].factorize()[0]
category_id_df = df1[['cat', 'category_id']].drop_duplicates()
category_to_id = dict(category_id_df.values)
id_to_category = dict(category_id_df[['category_id', 'cat']].values)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, ngram_range=(1, 2), stop_words='english')
features = tfidf.fit_transform(df1['Grievance Category']).toarray()
labels = df1.category_id

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(df1['Grievance Category'], df1['cat'], test_size=0.25, random_state=0)

# Model Training
models = [
    RandomForestClassifier(n_estimators=100, max_depth=5, random_state=0),
    LinearSVC(),
    MultinomialNB(),
    LogisticRegression(random_state=0),
]

for model in models:
    tfidf_vectorizer_vectors = tfidf.fit_transform(X_train)
    clf = model.fit(tfidf_vectorizer_vectors, y_train)
    y_pred = clf.predict(tfidf.transform(X_test))
    print(f"{model.__class__.__name__} Accuracy: {accuracy_score(y_test, y_pred)}")

# Save the best model
best_model = LinearSVC()
best_model.fit(tfidf.fit_transform(X_train), y_train)
joblib.dump(best_model, "student_query_classifier.pkl")

print("Model training complete and saved as student_query_classifier.pkl")
