In [2]:
import os
os.chdir("C://Users//thila//Downloads//AI_TASK")
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
df = pd.read_csv("Synthetic Data.csv")

In [3]:
df['Title'] = df['Title'].fillna('')
df['Description'] = df['Description'].fillna('')
df['Text'] = df['Title'] + " " + df['Description']
X = df['Text']
y = df['Priority']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=5000)),
    ('rf', RandomForestClassifier(random_state=42))
])
param_grid = {
    'rf__n_estimators': [100, 200, 300],
    'rf__max_depth': [10, 20, 30, None],
    'rf__min_samples_split': [2, 5, 10],
    'rf__min_samples_leaf': [1, 2, 4],
    'rf__bootstrap': [True, False]
}
grid_search = GridSearchCV(
    pipeline,
    param_grid=param_grid,
    cv=3,
    n_jobs=-1,
    verbose=2,
    scoring='accuracy'
)
grid_search.fit(X_train, y_train)
print("Best Parameters Found:")
print(grid_search.best_params_)
print("\nBest Cross-Validation Accuracy:", grid_search.best_score_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("\nTest Accuracy:", accuracy_score(y_test, y_pred))

Fitting 3 folds for each of 216 candidates, totalling 648 fits
Best Parameters Found:
{'rf__bootstrap': True, 'rf__max_depth': None, 'rf__min_samples_leaf': 2, 'rf__min_samples_split': 2, 'rf__n_estimators': 300}

Best Cross-Validation Accuracy: 0.522537687502338

Test Accuracy: 0.52


In [7]:
import pickle
with open('priority_prediction.pkl', 'wb') as f:
    pickle.dump(best_model, f)