<a href="https://colab.research.google.com/github/rickwag/ML/blob/main/FirstAidIntentClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data Acquisation

In [1]:
data_path = "/content/drive/MyDrive/Datasets/firstAidIntents.json"

In [2]:
import json
import pandas as pd 

In [11]:
with open(data_path, encoding="utf-8") as file:
  data = json.load(file)

In [9]:
data_test = [
        ["What to do if cuts?", "Cuts"],
        ["How to cure cuts?", "Cuts"],
        ["Which medicine to apply to cuts?", "Cuts"],
        ["How do you treat Sting?", "stings"],
        ["What to do if you get a sting?", "stings"],
        ["Which medicine to apply if sting?", "stings"]
]

In [10]:
df = pd.DataFrame(data_test)

df

Unnamed: 0,0,1
0,What to do if cuts?,Cuts
1,How to cure cuts?,Cuts
2,Which medicine to apply to cuts?,Cuts
3,How do you treat Sting?,stings
4,What to do if you get a sting?,stings
5,Which medicine to apply if sting?,stings


In [None]:
new_data = []

for intent in data["intents"]:
  for pattern in intent["patterns"]:
    new_data.append([pattern, intent["tag"]])

In [16]:
#sample data
for n in range(8):
  print(new_data[n])

['What to do if Cuts?', 'Cuts']
['How to cure Cuts?', 'Cuts']
['Which medicine to apply for Cuts?', 'Cuts']
['what to apply on cuts?', 'Cuts']
['Cuts', 'Cuts']
['how do you treat abrasions?', 'Abrasions']
['Do Abrasions cause scars?', 'Abrasions']
['Abrasions', 'Abrasions']


In [17]:
#data size
len(new_data)

188

In [18]:
df = pd.DataFrame(new_data)

df.head(8)

Unnamed: 0,0,1
0,What to do if Cuts?,Cuts
1,How to cure Cuts?,Cuts
2,Which medicine to apply for Cuts?,Cuts
3,what to apply on cuts?,Cuts
4,Cuts,Cuts
5,how do you treat abrasions?,Abrasions
6,Do Abrasions cause scars?,Abrasions
7,Abrasions,Abrasions


## Text Preprocessing

In [19]:
#lowercasing
df[0] = [entry.lower() for entry in df[0]]

df.head(2)

Unnamed: 0,0,1
0,what to do if cuts?,Cuts
1,how to cure cuts?,Cuts


In [20]:
#tokenization
#tokenization
import nltk
from nltk.tokenize import word_tokenize
nltk.download("punkt")

df[0] = [word_tokenize(entry) for entry in df[0]]

df.head(2)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Unnamed: 0,0,1
0,"[what, to, do, if, cuts, ?]",Cuts
1,"[how, to, cure, cuts, ?]",Cuts


In [21]:
#stopwords removal and lemmatization
from nltk.stem.wordnet import WordNetLemmatizer

nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("omw-1.4")

from nltk.corpus import stopwords

lemmatizer = WordNetLemmatizer()

for index, doc in enumerate(df[0]):
  new_doc = []
  for token in doc:
    if token not in stopwords.words("english"):
      token = lemmatizer.lemmatize(token) 
      new_doc.append(token)

  df.loc[index, "final_text"] = str(new_doc)

df.head(5)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Unnamed: 0,0,1,final_text
0,"[what, to, do, if, cuts, ?]",Cuts,"['cut', '?']"
1,"[how, to, cure, cuts, ?]",Cuts,"['cure', 'cut', '?']"
2,"[which, medicine, to, apply, for, cuts, ?]",Cuts,"['medicine', 'apply', 'cut', '?']"
3,"[what, to, apply, on, cuts, ?]",Cuts,"['apply', 'cut', '?']"
4,[cuts],Cuts,['cut']


In [22]:
#train test split
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(df["final_text"], df[1], test_size=.2)

In [23]:
print(X_train.shape)
print(X_test.shape)

(150,)
(38,)


## Feature Engineering

In [24]:
#vectorization
from sklearn.feature_extraction.text import TfidfVectorizer

tfIdfVectorizer = TfidfVectorizer()

tfIdfVectorizer.fit(df["final_text"])

X_train = tfIdfVectorizer.transform(X_train)
X_test = tfIdfVectorizer.transform(X_test)

## Modelling

In [25]:
#naive bayes
from sklearn import naive_bayes

naive_model = naive_bayes.MultinomialNB()

#training
naive_model.fit(X_train, y_train)

MultinomialNB()

In [26]:
#naive bayes evaluation
y_pred_naive = naive_model.predict(X_test)

from sklearn.metrics import classification_report 

print(classification_report(y_test, y_pred_naive))

                  precision    recall  f1-score   support

       Abrasions       1.00      1.00      1.00         2
      Broken Toe       0.00      0.00      0.00         0
             CPR       1.00      1.00      1.00         1
   Chemical Burn       1.00      1.00      1.00         1
         Choking       0.00      0.00      0.00         2
            Cold       1.00      1.00      1.00         1
           Cough       1.00      1.00      1.00         1
            Cuts       1.00      1.00      1.00         1
        Drowning       0.00      0.00      0.00         1
      Eye Injury       1.00      1.00      1.00         1
        Fainting       0.00      0.00      0.00         1
        Fracture       0.00      0.00      0.00         2
 Heat Exhaustion       0.00      0.00      0.00         1
     Heat Stroke       0.00      0.00      0.00         1
Nasal Congestion       0.00      0.00      0.00         2
 Normal Bleeding       0.00      0.00      0.00         2
   Pulled Mus

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
#svm 
from sklearn import svm 

svm_model = svm.SVC(kernel="linear", gamma="auto")

#training
svm_model.fit(X_train, y_train)

SVC(gamma='auto', kernel='linear')

In [28]:
#svm evaluation
y_pred_svm = svm_model.predict(X_test)

print(classification_report(y_test, y_pred_svm))

                  precision    recall  f1-score   support

       Abrasions       1.00      1.00      1.00         2
             CPR       1.00      1.00      1.00         1
   Chemical Burn       1.00      1.00      1.00         1
         Choking       1.00      0.50      0.67         2
            Cold       1.00      1.00      1.00         1
           Cough       1.00      1.00      1.00         1
            Cuts       1.00      1.00      1.00         1
        Drowning       1.00      1.00      1.00         1
      Eye Injury       1.00      1.00      1.00         1
        Fainting       1.00      1.00      1.00         1
        Fracture       1.00      1.00      1.00         2
 Heat Exhaustion       1.00      1.00      1.00         1
     Heat Stroke       1.00      1.00      1.00         1
Nasal Congestion       0.00      0.00      0.00         2
 Normal Bleeding       1.00      1.00      1.00         2
          Poison       0.00      0.00      0.00         0
   Pulled Mus

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
#Decision Tree
from sklearn.tree import DecisionTreeClassifier

decision_tree_model = DecisionTreeClassifier()

#training
decision_tree_model.fit(X_train, y_train)

DecisionTreeClassifier()

In [30]:
#decision tree evaluation
y_pred_tree = decision_tree_model.predict(X_test)

print(classification_report(y_test, y_pred_tree))

                  precision    recall  f1-score   support

       Abrasions       1.00      1.00      1.00         2
             CPR       1.00      1.00      1.00         1
   Chemical Burn       1.00      1.00      1.00         1
         Choking       1.00      1.00      1.00         2
            Cold       1.00      1.00      1.00         1
           Cough       1.00      1.00      1.00         1
            Cuts       1.00      1.00      1.00         1
        Drowning       1.00      1.00      1.00         1
      Eye Injury       1.00      1.00      1.00         1
        Fainting       1.00      1.00      1.00         1
        Fracture       1.00      1.00      1.00         2
 Heat Exhaustion       1.00      1.00      1.00         1
     Heat Stroke       1.00      1.00      1.00         1
Nasal Congestion       0.00      0.00      0.00         2
 Normal Bleeding       1.00      1.00      1.00         2
   Pulled Muscle       1.00      1.00      1.00         1
 Rectal bleed

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
#Multi Layer Perceptron model
from sklearn.neural_network import MLPClassifier

MLP_model = MLPClassifier()

#training
MLP_model.fit(X_train, y_train)



MLPClassifier()

In [32]:
#MLP evaluation
y_pred_MLP = MLP_model.predict(X_test) 

print(classification_report(y_test, y_pred_MLP))

                  precision    recall  f1-score   support

       Abrasions       1.00      1.00      1.00         2
             CPR       1.00      1.00      1.00         1
   Chemical Burn       1.00      1.00      1.00         1
         Choking       1.00      0.50      0.67         2
            Cold       1.00      1.00      1.00         1
           Cough       1.00      1.00      1.00         1
            Cuts       1.00      1.00      1.00         1
        Diarrhea       0.00      0.00      0.00         0
        Drowning       1.00      1.00      1.00         1
      Eye Injury       1.00      1.00      1.00         1
        Fainting       1.00      1.00      1.00         1
        Fracture       1.00      1.00      1.00         2
 Heat Exhaustion       1.00      1.00      1.00         1
     Heat Stroke       1.00      1.00      1.00         1
Nasal Congestion       0.00      0.00      0.00         2
 Normal Bleeding       1.00      1.00      1.00         2
   Pulled Mus

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [34]:
text = "how to do cpr"
text_vec = tfIdfVectorizer.transform([text])
svm_model.predict(text_vec)

array(['CPR'], dtype=object)

## Evaluation

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

model_names = ["Naive Bayes", "Support Vector Machine", "Decision Tree", "Multi Layer Perceptron"]
predictions = [y_pred_naive, y_pred_svm, y_pred_tree, y_pred_MLP]
precisions = [precision_score(y_test, predictions[n], average="macro") for n in range(len(predictions))]
recalls = [recall_score(y_test, predictions[n], average="macro") for n in range(len(predictions))]
f1s = [f1_score(y_test, predictions[n], average="macro") for n in range(len(predictions))]
accuracies = [accuracy_score(y_test, predictions[n]) for n in range(len(predictions))]

In [36]:
data = [[model_names[n], precisions[n], recalls[n], f1s[n], accuracies[n]] for n in range(len(model_names))]
print(data)

[['Naive Bayes', 0.4176245210727969, 0.4482758620689655, 0.42068965517241375, 0.3684210526315789], ['Support Vector Machine', 0.8571428571428571, 0.8273809523809524, 0.838095238095238, 0.8157894736842105], ['Decision Tree', 0.9259259259259259, 0.9259259259259259, 0.9259259259259259, 0.9473684210526315], ['Multi Layer Perceptron', 0.8928571428571429, 0.875, 0.8809523809523808, 0.9210526315789473]]


In [38]:
from tabulate import tabulate

col_names = ["model", "precision", "recall", "f1", "accuracy"]

print(tabulate(data, headers=col_names, tablefmt="grid"))

+------------------------+-------------+----------+----------+------------+
| model                  |   precision |   recall |       f1 |   accuracy |
| Naive Bayes            |    0.417625 | 0.448276 | 0.42069  |   0.368421 |
+------------------------+-------------+----------+----------+------------+
| Support Vector Machine |    0.857143 | 0.827381 | 0.838095 |   0.815789 |
+------------------------+-------------+----------+----------+------------+
| Decision Tree          |    0.925926 | 0.925926 | 0.925926 |   0.947368 |
+------------------------+-------------+----------+----------+------------+
| Multi Layer Perceptron |    0.892857 | 0.875    | 0.880952 |   0.921053 |
+------------------------+-------------+----------+----------+------------+
