In [10]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [11]:
# Expanded Dataset with More Descriptions
data = {
    'issue': [
        'printer error', 'printer error', 'printer error',
        'network failure', 'network failure', 'network failure',
        'authentication issue', 'authentication issue', 'authentication issue',
        'software crash', 'software crash', 'software crash',
        'hardware failure', 'hardware failure', 'hardware failure'
    ],
    'description': [
        'Printer is not responding and unable to print',
        'The printer is printing incorrectly and skipping pages',
        'Printer shows error message and stops working',
        
        'Unable to connect to the network, no internet access',
        'Internet connection is very slow and dropping frequently',
        'Cannot connect to Wi-Fi, keeps disconnecting',

        'Cannot log in to the account, shows invalid credentials',
        'Authentication fails and password is incorrect',
        'Login issues with system, authentication error',

        'The software crashes and exits unexpectedly',
        'Application becomes unresponsive and shuts down',
        'Software closes suddenly with error message',

        'The computer hardware is malfunctioning and not responding',
        'The device hardware stops working, unresponsive keys',
        'Hardware failure detected, system is not working'
    ]
}

In [12]:
# Convert data to DataFrame
df = pd.DataFrame(data)

In [13]:
df

Unnamed: 0,issue,description
0,printer error,Printer is not responding and unable to print
1,printer error,The printer is printing incorrectly and skippi...
2,printer error,Printer shows error message and stops working
3,network failure,"Unable to connect to the network, no internet ..."
4,network failure,Internet connection is very slow and dropping ...
5,network failure,"Cannot connect to Wi-Fi, keeps disconnecting"
6,authentication issue,"Cannot log in to the account, shows invalid cr..."
7,authentication issue,Authentication fails and password is incorrect
8,authentication issue,"Login issues with system, authentication error"
9,software crash,The software crashes and exits unexpectedly


In [14]:
# Preprocess the text and labels
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['description'])

In [15]:
# Encode the target labels (issues)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['issue'])

In [16]:
# Train a Random Forest classifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

In [18]:
# Evaluate the model
y_pred = rf_classifier.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, labels=np.unique(y)))

                      precision    recall  f1-score   support

authentication issue       0.00      0.00      0.00       0.0
    hardware failure       0.00      0.00      0.00       0.0
     network failure       0.00      0.00      0.00       0.0
       printer error       0.00      0.00      0.00       1.0
      software crash       0.00      0.00      0.00       2.0

            accuracy                           0.00       3.0
           macro avg       0.00      0.00      0.00       3.0
        weighted avg       0.00      0.00      0.00       3.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
# Function to predict the issue
def predict_issue(input_text):
    input_vec = vectorizer.transform([input_text])
    probs = rf_classifier.predict_proba(input_vec)[0]  # Get probability distribution
    issues = label_encoder.inverse_transform(np.argsort(probs)[::-1])  # Get issue names sorted by probability
    percentages = np.sort(probs)[::-1] * 100  # Sort probabilities in descending order and convert to percentages
    
    # Display top results
    for i, issue in enumerate(issues):
        print(f"{issue}: {percentages[i]:.2f}% match")

In [20]:
# Test with an input
input_issue = "The printer is showing an error and won't print"
predict_issue(input_issue)

printer error: 44.00% match
authentication issue: 39.00% match
hardware failure: 10.00% match
software crash: 4.00% match
network failure: 3.00% match


In [21]:
# Test with an input
input_issue = "Unable to print via network"
predict_issue(input_issue)

network failure: 50.00% match
authentication issue: 37.00% match
software crash: 6.00% match
printer error: 4.00% match
hardware failure: 3.00% match
