In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from numpy.testing import assert_allclose
import pandas as pd
import pickle
import random
import json
import string

import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk
from nltk.stem.lancaster import LancasterStemmer
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

In [2]:
with open('kpi.json') as file:
    data = json.load(file)

In [3]:
stemmer = LancasterStemmer()

In [4]:
# define the checkpoint
filepath="./models/ramses_kpi/tmp/checkpoint"

my_callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="loss", patience=5),
    tf.keras.callbacks.TensorBoard(log_dir='./logs'),
    tf.keras.callbacks.ModelCheckpoint(filepath=filepath, monitor='loss', verbose=1, save_best_only=True, save_weights_only=True, mode='min')
]

In [5]:
words = []
classes = []
documents = []
ignore_words = ["'s"]
ignore_words.extend(string.punctuation)

try:
    with open('data.pickle', 'rb') as file:
        words, classes, documents = pickle.load(file)
except:
    # loop through each sentence in our intents pattern
    for intent in data['intents']:
        for pattern in intent['patterns']:
            # tokenize each word in the sentence
            wrds = nltk.word_tokenize(pattern)
            words.extend(wrds) # add to word list
            documents.append((wrds, intent['tag'])) # add to documents in our corpus

        if intent['tag'] not in classes:
            classes.append(intent['tag']) # add to our class list

    # stem and lower each word and remove duplicates
    words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
    words = sorted(list(set(words)))

    classes = sorted(list(set(classes))) # sort classes

    print(f"{len(documents)} documents\n{len(classes)} classes\n{len(words)} unique stemmed words")

#     with open('data.pickle', 'wb') as file:
#         pickle.dump((words, classes, documents), file)


# create our training data
training = []
output_empty = [0 for _ in range(len(classes))]

for doc in documents: # bag of words
    bag = []
    s_words = [stemmer.stem(w.lower()) for w in doc[0] if w not in ignore_words]
    
    for w in words:
        bag.append(1) if w in s_words else bag.append(0)
        
    output_row = output_empty[:]
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])
    
# shuffle our features and turn intp np.array
random.shuffle(training)
training = np.array(training)

# create train and test list.
train_x = list(training[:, 0])
train_y = list(training[:, 1])

print(f"{train_x.shape} {train_y.shape}")

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# print("\n",model.summary(), "\n")

# Fit the model
model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1, callbacks=my_callbacks)
model.load_weights("./models/ramses_kpi/tmp/checkpoint")
model.save("./models/ramses_kpi/model.h5")

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               20992     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 12)                780       
Total params: 30,028
Trainable params: 30,028
Non-trainable params: 0
_________________________________________________________________

 None 

Epoch 1/200
Epoch 00001: loss improved from inf to 1.88525, saving model to ./models/ramses_kpi/tmp/checkpoin

In [5]:
def clean_up_sentence(sentence):
    s_words = nltk.word_tokenize(sentence)
    s_words = [stemmer.stem(w.lower()) for w in s_words if w not in ignore_words]
    
    return s_words

def bow(sentence, words, show_details=True):
    s_words = clean_up_sentence(sentence)
    
    bag = [0 for _ in range(len(words))]
    
    for s in s_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                
                if show_details:
                    print(f"found in bag: {s}")
                    
    return np.array(bag)

def classify_local(sentence):
    ERROR_THRESHOLD = 0.25
    
    input_data = pd.DataFrame([bow(sentence, words, show_details=False)], dtype=float, index=['input'])
    results = model.predict([input_data])[0]
    
    #filter out prediction below a threshold, and provide intent index
    results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
    
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=False)
    
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], str(r[1])))
    
    print(f"\n{return_list}")
    return return_list
    
while True:
    inp = input("You: ")
    
    if inp == 'quit' or inp == 'stop' or inp == 'q':
        break
        
    results = classify_local(inp)[0]
    
    for intent in data['intents']:
        if intent['tag'] == results[0]:
            responses = intent['responses']
    
    print("\n", random.choice(responses), "\n")
    

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/vegas/anaconda3/envs/deeplearning/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-2a28e402268c>", line 42, in <module>
    inp = input("You: ")
  File "/home/vegas/anaconda3/envs/deeplearning/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 860, in raw_input
    return self._input_request(str(prompt),
  File "/home/vegas/anaconda3/envs/deeplearning/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 904, in _input_request
    raise KeyboardInterrupt("Interrupted by user") from None
KeyboardInterrupt: Interrupted by user

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/vegas/anaconda3/envs/deeplearning/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeE

TypeError: object of type 'NoneType' has no len()

In [None]:
# new_model = load_model(filepath)
# inp = input("You: ")
# input_data = pd.DataFrame([bow(inp, words, show_details=False)], dtype=float, index=['input'])
# assert_allclose(model.predict(input_data), new_model.predict(input_data), 1e-5)

In [None]:
# reloaded_obj = load_model("models/ramses_kpi/model.h5")
# reloaded_obj.load_weights("./models/ramses_kpi/tmp/checkpoint")
# reloaded_obj.summary()

In [None]:
# name = "./models/ramses_kpi/model.h5"

In [None]:
# import os

In [None]:
# os.path.normpath(filename + os.sep + os.pardir) + "/tmp/checkpoint"