In [1]:
import pickle
import tensorflow as tf

import pandas as pd
import numpy as np
import string
import json
import re

import nltk
from nltk.stem.lancaster import LancasterStemmer

stemmer = LancasterStemmer()

from IPython.display import display

In [6]:
class KpiIdentifier:
    def __init__(self, data_path, model_path, json_data_path):
        try:
            with open(data_path, 'rb') as file:
                self.words, self.classes, self.documents = pickle.load(file)
        except Exception as e:
            print(e)
            raise FileNotFoundError(f"{data_path} doesn't exist")
            
        try:
            self.model = tf.keras.models.load_model(model_path)
        except Exception as e:
            print(e)
            raise FileNotFoundError(f"{model_path} doesn't exist")
            
        try:
            with open(json_data_path, "rb") as file:
                self.data = json.load(file)
        except Exception as e:
            print(e)
            raise FileNotFoundError(f"{json_data_path} doesn't exist")
           
    @property
    def ignore_words(self):
        return ["'s"] + [s for s in string.punctuation]
    
    def clean_up_sentence(self, sentence):
        s_words = nltk.word_tokenize(sentence)
        s_words = [stemmer.stem(w.lower()) for w in s_words if w not in self.ignore_words]

        return s_words

    def bow(self, sentence, words, show_details=True):
        s_words = self.clean_up_sentence(sentence)

        bag = [0 for _ in range(len(words))]

        for s in s_words:
            for i, w in enumerate(words):
                if w == s:
                    bag[i] = 1

                    if show_details:
                        print(f"found in bag: {s}")

        return np.array(bag)

    def classify_local(self, sentence):
        ERROR_THRESHOLD = 0.60

        input_data = pd.DataFrame([self.bow(sentence, self.words, show_details=False)], dtype=float, index=['input'])
        input_data = tf.cast(input_data.values.reshape(-1, 1, input_data.shape[1]), tf.float32)
        print(input_data.shape)
        results = self.model.predict(input_data)[0]

        #filter out prediction below a threshold, and provide intent index
        results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]

        # sort by strength of probability
        results.sort(key=lambda x: x[1], reverse=False)

        return_list = []
        for r in results:
            return_list.append((self.classes[r[0]], str(r[1])))
                
#         print(return_list)
        return return_list

    def run(self, text):
        try:
            results = self.classify_local(text)[0]
            
            for intent in self.data["intents"]:
                if intent["tag"] == results[0]: 
                    for query in text.split():
                        for val in intent["validate"]:
                            if stemmer.stem(nltk.word_tokenize(query)[0]) == stemmer.stem(val): return results[0], text
                            
                    return None
                
        except IndexError:
            return None
                    
        except Exception as e:
            raise(e)
            return None

    
# identifier = KpiIdentifier(data_path="data.pickle", model_path="kpi_identifier.hdf5", json_data_path="kpi_identifier.json")
# identifier.run(text="")

In [7]:
identifier = KpiIdentifier(data_path="data.pickle", model_path="kpi_identifier.hdf5", json_data_path="kpi_identifier.json")
text = input("Enter Text: ")

# while True:
#     if text in ["quit", "exit", "stop", "q", "s", "e"]:
#         break
    
display(identifier.run(text=text))
# text = input("\nEnter Text: ")

Enter Text:  hi


(1, 1, 163)
[('greetings', '0.9994537')]


('greetings', 'hi')


Enter Text:  q


In [8]:
tf.__version__

'2.2.0'