In [1]:
# Import packages
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
import warnings 
warnings.filterwarnings("ignore")

In [2]:
# Load data
df = pd.read_csv("all_tickets.csv")

In [3]:
# Data engineering
df = df.fillna(value = "x")
df["text"] = df["title"] + " " + df["body"]
df = df.drop(columns = ["title", "body"])

In [4]:
df.head()

Unnamed: 0,ticket_type,category,sub_category1,sub_category2,business_service,urgency,impact,text
0,1,4,2,21,71,3,4,x hi since recruiter lead permission approve r...
1,1,6,22,7,26,3,4,connection with icon icon dear please setup ic...
2,1,5,13,7,32,3,4,work experience user work experience user hi w...
3,1,5,13,7,32,3,4,requesting for meeting requesting meeting hi p...
4,1,4,2,76,4,3,4,reset passwords for external accounts re expir...


In [5]:
df.ticket_type.value_counts()

ticket_type
1    34621
0    13928
Name: count, dtype: int64

In [6]:
# Process data using count vectorization
count_vec = CountVectorizer()
bow = count_vec.fit_transform(df["text"])
bow = np.array(bow.todense())

In [7]:
data = bow
target = df["ticket_type"]

In [8]:
# Split test and training data
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.2, stratify = target)

In [9]:
# Fit a Multinomial Naive Bayes model
model = MultinomialNB().fit(X_train, y_train)
y_pred = model.predict(X_test)

In [10]:
# Assess model performance
print("Accuracy:", round(accuracy_score(y_test, y_pred), 3))
print("F1 score:", round(f1_score(y_test, y_pred, average = "macro"), 3))

Accuracy: 0.978
F1 score: 0.974


In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.99      0.96      2786
           1       1.00      0.97      0.98      6924

    accuracy                           0.98      9710
   macro avg       0.97      0.98      0.97      9710
weighted avg       0.98      0.98      0.98      9710



#### **Observations:** Our model can classify tickets with high accuracy but only based on the first column "ticket_type". With the creation of other models to classify the other columns, it would be possible to automate completely the process of ticket classification.