# Diabetic Retinopathy Detection ChatBot

In [1]:
import json
import os

#### Load the training dataset

In [2]:
with open("retQA.json") as file:
    data = json.load(file)

In [3]:
print(data)



### Building the word Vocabulary and Tokenizing the Data

In [4]:
from nltk.corpus import stopwords
import numpy as np
import re
import random
import os
import time
import json
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

In [5]:
stop_words = set(stopwords.words('english'))

In [15]:
words = []
labels = []
responses = []
docs_x = []
docs_y = []
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        wrds = nltk.word_tokenize(pattern)
        words.extend([w for w in wrds if not w in stop_words])
        docs_x.append(wrds)
        docs_y.append(intent["tag"])

sentences = []

for i in range(len(docs_x)):
    filtered_sentence = [w for w in docs_x[i] if not w in stop_words]
    sentences.append(filtered_sentence)
        
for intent in data["intents"]:
    for response in intent["responses"]:
        resp = nltk.word_tokenize(response)
        responses.extend([w for w in resp if not w in stop_words])

words = [stemmer.stem(w.lower()) for w in words if w != "?"]
responses = [stemmer.stem(r.lower()) for r in responses if r != "?"]
vocab = sorted(set(words + responses))

In [7]:
print('Vectorizing the word sequences.....')
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
print('Done.')

Vectorizing the word sequences.....
Done.


In [8]:
word_idx

{'!': 1,
 "'m": 2,
 "'s": 3,
 '(': 4,
 ')': 5,
 ',': 6,
 '--': 7,
 '.': 8,
 '1': 9,
 '1500': 10,
 '2': 11,
 '2—are': 12,
 '3': 13,
 '314': 14,
 '4': 15,
 '40': 16,
 '4100': 17,
 '415': 18,
 '45': 19,
 '549': 20,
 '561': 21,
 '703': 22,
 '8500': 23,
 '991': 24,
 ':': 25,
 ';': 26,
 'a': 27,
 'a.k.': 28,
 'abl': 29,
 'abnorm': 30,
 'academy': 31,
 'accord': 32,
 'accum': 33,
 'addit': 34,
 'admin': 35,
 'adv': 36,
 'affect': 37,
 'al': 38,
 'alcohol': 39,
 'along': 40,
 'also': 41,
 'alt': 42,
 'am': 43,
 'amaz': 44,
 'amb': 45,
 'amount': 46,
 'an': 47,
 'angiograph': 48,
 'anoth': 49,
 'antihypertend': 50,
 'anyon': 51,
 'apply': 52,
 'apprecy': 53,
 'are': 54,
 'area': 55,
 'as': 56,
 'ask': 57,
 'assist': 58,
 'assocy': 59,
 'at': 60,
 'attend': 61,
 'back': 62,
 'bal': 63,
 'balloon-like': 64,
 'bas': 65,
 'be': 66,
 'bef': 67,
 'behind': 68,
 'best': 69,
 'bet': 70,
 'between': 71,
 'blee': 72,
 'blind': 73,
 'block': 74,
 'blood': 75,
 'body': 76,
 'both': 77,
 'but': 78,
 'cal': 

### Word representation model using FastText

In [167]:
from keras.preprocessing.text import Tokenizer
from gensim.models.fasttext import FastText
import numpy as np
import matplotlib.pyplot as plt
import nltk
from string import punctuation
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize
from nltk import WordPunctTokenizer

import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
en_stop = set(nltk.corpus.stopwords.words('english'))

%matplotlib inline

[nltk_data] Error loading punkt: <urlopen error [WinError 10060] A
[nltk_data]     connection attempt failed because the connected party
[nltk_data]     did not properly respond after a period of time, or
[nltk_data]     established connection failed because connected host
[nltk_data]     has failed to respond>
[nltk_data] Error loading wordnet: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


In [87]:
import re
from nltk.stem import WordNetLemmatizer

stemmer = WordNetLemmatizer()

def preprocess_text(document):
        # Remove all the special characters
        document = re.sub(r'\W', ' ', str(document))

        # remove all single characters
        document = re.sub(r'\s+[a-zA-Z]\s+', ' ', document)

        # Remove single characters from the start
        document = re.sub(r'\^[a-zA-Z]\s+', ' ', document)

        # Substituting multiple spaces with single space
        document = re.sub(r'\s+', ' ', document, flags=re.I)

        # Removing prefixed 'b'
        document = re.sub(r'^b\s+', '', document)

        # Converting to Lowercase
        document = document.lower()

        # Lemmatization
        tokens = document.split()
        tokens = [stemmer.lemmatize(word) for word in tokens]
        tokens = [word for word in tokens if word not in en_stop]
        tokens = [word for word in tokens if len(word) > 3]

        preprocessed_text = ' '.join(tokens)

        return preprocessed_text

In [62]:
preprocess_text('what are the symptoms of diabetic retinopathy')

'symptom diabetic retinopathy'

In [171]:
model10 = FastText(size=15, window=3, min_count=5)
model10.build_vocab(sentences=word_idx)

In [172]:
embedding_size = 15
window_size = 3
min_word = 5
down_sampling = 1e-2

%%time
fast_text_model = FastText('corpus.txt',
                      size=embedding_size,
                      window=window_size,
                      min_count=min_word,
                      sample=down_sampling,
                      sg=1,
                      iter=100)

In [1]:
from fasttext import train_unsupervised

In [2]:
lr = train_unsupervised(input='corpus.txt', epoch=2, lr=1.0, wordNgrams=3, verbose=2, minCount=1)

lr.ws

In [34]:
import fasttext

# Skipgram model :
skip_model = fasttext.train_unsupervised('corpus.txt', model='skipgram')

# or, cbow model :
cbow_model = fasttext.train_unsupervised('corpus.txt', model='cbow')

In [40]:
skip_model

<fasttext.FastText._FastText at 0x1415ec94438>

In [99]:
print(skip_model.words)   # list of words in dictionary
print(cbow_model.words)   # list of words in dictionary
print(skip_model['excellent']) # get the vector of the word 'retinopathy'
print(cbow_model['retinopathy']) # get the vector of the word 'retinopathy'

['the', 'of', 'and', 'is', 'to', 'blood', 'diabetic', 'retinopathy', 'a', 'can', 'your', 'you', '</s>', 'retina', 'with', 'are', 'what', 'vessels', 'have', 'eye', 'in', 'may', 'called', 'be', 'disease', 'I', 'treatment', 'diabetes', 'for', 'about', 'doctor', 'get', 'or', 'their', 'how', 'at', 'that', 'laser', 'will', 'an', 'prevent', 'help', 'as', 'people', 'me', 'this', 'These', 'retinopathy,', 'new', 'Diabetic', 'any', 'Retinopathy.', 'diabetes,', 'eye,', 'more', 'grow', 'If', 'i', 'should', 'condition', 'vessels.', 'has']
['the', 'of', 'and', 'is', 'to', 'blood', 'diabetic', 'retinopathy', 'a', 'can', 'your', 'you', '</s>', 'retina', 'with', 'are', 'what', 'vessels', 'have', 'eye', 'in', 'may', 'called', 'be', 'disease', 'I', 'treatment', 'diabetes', 'for', 'about', 'doctor', 'get', 'or', 'their', 'how', 'at', 'that', 'laser', 'will', 'an', 'prevent', 'help', 'as', 'people', 'me', 'this', 'These', 'retinopathy,', 'new', 'Diabetic', 'any', 'Retinopathy.', 'diabetes,', 'eye,', 'more',

#### Vectorizing the corpus

In [9]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [10]:
def vectorize(data):
    inputs = []
    wrds = nltk.word_tokenize(data)
    wrds = [w for w in wrds if not w in stop_words]
    words = [stemmer.stem(w.lower()) for w in wrds if w != "?"]
    # Remove words not in vocab
    removed_words = [words.pop(i) for i, w in enumerate(words) if w not in vocab]
    
    for w in removed_words:
        inputs.append(word_idx[w])
    return inputs
    #return pad_sequences(inputs, maxlen = 20)


In [18]:
sentences

[['What', 'diabetic', 'retinopathy'],
 ['tell', 'diabetic', 'retinopathy'],
 ['causes', 'disease'],
 ['someone', 'get', 'diabetic', 'retinopathy'],
 ['cause'],
 ['causes', 'infection'],
 ['risk', 'disease'],
 ['risk', 'diabetic', 'retinopathy'],
 ['What', 'stages', 'diabetic', 'retinopathy', '?'],
 ['different', 'types', 'disease'],
 ['different', 'types', 'diabetic', 'retinopathy'],
 ['different', 'classification', 'disease'],
 ['different', 'classification', 'diabetic', 'retinopathy'],
 ['disease', 'classified'],
 ['Does', 'diabetic', 'retinopathy', 'symptoms', '?'],
 ['symptoms', 'diabetic', 'retinopathy'],
 ['tell', 'signs', 'diabetic', 'retinopathy'],
 ['signs', 'disease'],
 ['I', 'get', 'diagnosed', '?'],
 ['get', 'tested'],
 ['tell', 'diagnosis'],
 ['disease', 'diagnosed'],
 ['diagnosis'],
 ['How', 'diabetic', 'retinopathy', 'treated', '?'],
 ['treatment'],
 ['tell', 'treatment'],
 ['solutions'],
 ['Can', 'condition', 'treated', '?'],
 ['What', 'treatment', 'condition', '?'],
 [

In [38]:
corpus = []
for ls in sentences:
    sent = [stemmer.stem(w.lower()) for w in ls if w != "?"]
    inner_list = []
    for ent in sent:
        inner_list.append(word_idx[ent])
    corpus.append(inner_list)
X_train = pad_sequences(corpus, 8)

In [39]:
X_train

array([[  0,   0,   0,   0,   0, 382, 113, 295],
       [  0,   0,   0,   0,   0, 344, 113, 295],
       [  0,   0,   0,   0,   0,   0,  83, 121],
       [  0,   0,   0,   0, 320, 169, 113, 295],
       [  0,   0,   0,   0,   0,   0,   0,  83],
       [  0,   0,   0,   0,   0,   0,  83, 195],
       [  0,   0,   0,   0,   0,   0, 297, 121],
       [  0,   0,   0,   0,   0, 297, 113, 295],
       [  0,   0,   0,   0, 382, 323, 113, 295],
       [  0,   0,   0,   0,   0, 117, 362, 121],
       [  0,   0,   0,   0, 117, 362, 113, 295],
       [  0,   0,   0,   0,   0, 117,  88, 121],
       [  0,   0,   0,   0, 117,  88, 113, 295],
       [  0,   0,   0,   0,   0,   0, 121,  88],
       [  0,   0,   0,   0, 124, 113, 295, 340],
       [  0,   0,   0,   0,   0, 340, 113, 295],
       [  0,   0,   0,   0, 344, 313, 113, 295],
       [  0,   0,   0,   0,   0,   0, 313, 121],
       [  0,   0,   0,   0,   0, 187, 169, 115],
       [  0,   0,   0,   0,   0,   0, 169, 345],
       [  0,   0,   

In [60]:
len(set(docs_y))

12

#### Vectorizing the Tags

In [43]:
# Create Tags dictionary
y = {}
y = dict((c, i) for i, c in enumerate(set(docs_y))) 

# Convert tags to interger representation
y_train = []
for entry in docs_y:
    y_train.append(y[entry])

# Convert to categorical using keras module
import keras

y_train = keras.utils.to_categorical(y_train)

In [44]:
y_train

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.,

In [45]:
from sklearn.model_selection import train_test_split

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, 
                                                    test_size=.3,
                                                   random_state=1,
                                                   stratify=y_train)

In [47]:
print(X_train.shape)
print(y_train.shape)

(35, 8)
(35, 12)


### Defining the Model

In [48]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

In [49]:
maxlen = 8

model_1 = Sequential()
model_1.add(Embedding(len(vocab)+1, 8, input_length=maxlen))
model_1.add(Flatten())
model_1.add(Dense(12, activation='softmax'))

In [50]:
model_1.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])
model_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 8, 8)              3176      
_________________________________________________________________
flatten_1 (Flatten)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 12)                780       
Total params: 3,956
Trainable params: 3,956
Non-trainable params: 0
_________________________________________________________________


In [52]:
history = model_1.fit(X_train, y_train, epochs=500, batch_size=8)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 



Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 

Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [125]:
import pickle
save_path ='/Users/adara/CAPSTONEPROJECT/eyeNetBot'
pickle.dump(vocab, open(os.path.join(save_path, "vocab.pkl"), "wb"))

Anvil websocket closed (code 1006, reason=Going away)


Exception in thread Thread-10:
Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 279, in call
    return _do_call(args, kwargs, fn_name=fn_name)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 271, in _do_call
    return _threaded_server.do_call(args, kwargs, fn_name=fn_name, live_object=live_object)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\_threaded_server.py", line 309, in do_call
    raise _server._deserialise_exception(r["error"])
anvil._server.AnvilWrappedError: 'Connection to Anvil Uplink server lost'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", 

Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnec

Exception in thread Thread-13:
Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 279, in call
    return _do_call(args, kwargs, fn_name=fn_name)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 271, in _do_call
    return _threaded_server.do_call(args, kwargs, fn_name=fn_name, live_object=live_object)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\_threaded_server.py", line 309, in do_call
    raise _server._deserialise_exception(r["error"])
anvil._server.AnvilWrappedError: 'Connection to Anvil Uplink server lost'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", 

Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnec

Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnec

Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnec

Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnec

In [None]:
# Load the model, if it exits, load vocab too
save_path = '/Users/adara/flatiron/CAPSTONE_PROJECT/ChatBot_Home_depot'
model = load_model(os.path.join(save_path, "chatbot.h5"))
vocab = pickle.load(open(os.path.join(save_path, "vocab.pkl"), "rb"))

In [10]:
# Load the text file
file = open("text.txt") 
line = file.read()# Use this to read file content as a stream:

#### Saving and loading a model object

In [None]:
model.save_model("skip_model.bin")
model.save_model("cbow_model.bin")

In [None]:
skip_model = fasttext.load_model("skip_model.bin")
cbow_model = fasttext.load_model("cbow_model.bin")

### Building the ChatBot

In [81]:
def vectorize(data):
    inputs = []
    wrds = nltk.word_tokenize(data)
    words = [stemmer.stem(w.lower()) for w in wrds if w != "?"]
    filtered_sentence = [w for w in words if not w in stop_words]
    [filtered_sentence.pop(i) for i, w in enumerate(filtered_sentence) if w not in vocab]
    
    inner_list = []
    
    for ent in filtered_sentence:
        inner_list.append(word_idx[ent])
    inputs.append(inner_list)
    
    return pad_sequences(inputs, maxlen = 8)

In [106]:
y

{'describe': 0,
 'class': 1,
 'symptoms': 2,
 'function': 3,
 'diagnosis': 4,
 'research': 5,
 'prevention': 6,
 'treatment': 7,
 'causes': 8,
 'name': 9,
 'stages': 10,
 'informmation': 11}

In [107]:
# Create Tags dictionary
labels = {}
labels = dict((i, c) for i, c in enumerate(set(docs_y)))

In [113]:
labels[0]

'describe'

In [98]:
n = vectorize('what causes the disease')

In [114]:
results = model_1.predict(n, 12)[0]
results_index = np.argmax(results)
print(labels[results_index])

causes


In [147]:
import sqlite3
 
from sqlite3 import Error
 
def sql_connection():
 
    try:
 
        con = sqlite3.connect('retQA.db')
 
        return con
 
    except Error:
 
        print(Error)

In [169]:
def sql_table(con):
 
    cursorObj = con.cursor()
 
    cursorObj.execute("CREATE TABLE QUESTIONS(Question text)")
 
    con.commit()

con = sql_connection()
 
sql_table(con)

In [146]:
import os
import tempfile
import time
import playsound
import speech_recognition as sr
from gtts import gTTS
from pathlib import Path
import sqlite3

In [167]:
def chat():
    print("start talking with the bot (type quit to stop)!")
    num = 100
    while True:
                
        inp = input("You: ")
        num = num + 1 
        
        if inp.lower() == "quit":
            tts = gTTS(text = 'bye', lang = "en", slow=False)
            filename = 'chat{}.mp3'.format(num)
            tts.save(filename)
            playsound.playsound(filename)
            break
        inp = vectorize(inp)
        results = model_1.predict(inp, 12)[0]
        results_index = np.argmax(results)
        output = labels[results_index]
        
        if results[results_index] > 0.7:
            for tg in data["intents"]:
                if tg['tag'] == output:
                    responses = tg['responses']
            tts = gTTS(text = random.choice(responses), lang = "en", slow=False)
            filename = 'chat{}.mp3'.format(num)
            tts.save(filename)
            playsound.playsound(filename)
        else:
            # Create a Database to store questions with low probability for update
            con = sqlite3.connect('retQA.db')
            cursor = con.cursor()
            cursor.execute("""INSERT INTO QUESTIONS (question) VALUES ('what')""")
            con.commit()
            tts = gTTS(text = "I didn't understand that, try again.", lang = "en", slow=False)
            filename = 'chat{}.mp3'.format(num)
            tts.save(filename)
            playsound.playsound(filename)

In [183]:
inp = 'omolewa'
con = sqlite3.connect('retQA.db')
cursor = con.cursor()
cursor.execute("""INSERT INTO QUESTIONS (question) VALUES (?)""", [(inp)])

OperationalError: database is locked

In [178]:
cursor.execute("""SELECT * FROM QUESTIONS""")
cursor.fetchall()

[('what',)]

In [168]:
chat()

start talking with the bot (type quit to stop)!
You: what is diabetic retinopathy


OperationalError: near "(": syntax error

In [122]:
import anvil.server
anvil.server.connect("GD4ZMM2NDWRVG6RJBEJTTZYQ-7NMJD52P2O5SK57D")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Authenticated OK


In [123]:
@anvil.server.callable


Anvil websocket closed (code 1006, reason=Going away)


Exception in thread Thread-7:
Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 279, in call
    return _do_call(args, kwargs, fn_name=fn_name)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", line 271, in _do_call
    return _threaded_server.do_call(args, kwargs, fn_name=fn_name, live_object=live_object)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\_threaded_server.py", line 309, in do_call
    raise _server._deserialise_exception(r["error"])
anvil._server.AnvilWrappedError: 'Connection to Anvil Uplink server lost'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\adara\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\adara\Anaconda3\lib\site-packages\anvil\server.py", l

Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnecting Anvil Uplink...
Connecting to wss://anvil.works/uplink
Reconnection failed. Waiting 10 seconds, then retrying.
Reconnec