In [1]:
!pip install newspaper3k



In [2]:
from newspaper import Article
import random
import string

from sklearn.feature_extraction.text import *
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings

In [3]:
#Ignore any warning messages
warnings.filterwarnings('ignore')

In [4]:
#Download the packages from ntlk
nltk.download('punkt',quiet=True)

nltk.download('wordnet',quiet=True)

True

In [5]:
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download()
article.parse()
article.nlp()
corpus = article.text

#print the corpus/text
print(corpus)

Overview

Chronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function. Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine. When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.

In the early stages of chronic kidney disease, you may have few signs or symptoms. Chronic kidney disease may not become apparent until your kidney function is significantly impaired.

Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause. Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.

Chronic kidney disease care at Mayo Clinic

How kidneys work

Symptoms

Signs and symptoms of chronic kidney disease develop over time if kidney damage progresses slowly. Signs an

In [6]:
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text) #convert the text into a list of sequence

#Print the list of sentences
print(sent_tokens)

['Overview\n\nChronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function.', 'Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine.', 'When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.', 'In the early stages of chronic kidney disease, you may have few signs or symptoms.', 'Chronic kidney disease may not become apparent until your kidney function is significantly impaired.', 'Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause.', 'Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.', 'Chronic kidney disease care at Mayo Clinic\n\nHow kidneys work\n\nSymptoms\n\nSigns and symptoms of chronic kidney disease develop over time if kidney damage

In [7]:
#Create a dictionary(key:value) pair to remove punctuations
remove_punct_dict = dict( ( ord(punct),None) for punct in string.punctuation)

#print the punctuations
print(string.punctuation)

#print the dictionary
print(remove_punct_dict)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
{33: None, 34: None, 35: None, 36: None, 37: None, 38: None, 39: None, 40: None, 41: None, 42: None, 43: None, 44: None, 45: None, 46: None, 47: None, 58: None, 59: None, 60: None, 61: None, 62: None, 63: None, 64: None, 91: None, 92: None, 93: None, 94: None, 95: None, 96: None, 123: None, 124: None, 125: None, 126: None}


In [8]:
#Create a function to return a list of lemmatized lower case words after remaining punctuations
def LemNormalize(text):
    return nltk.word_tokenize(text.lower().translate(remove_punct_dict))


#Print the tokenizatio text
print(LemNormalize(text))

['overview', 'chronic', 'kidney', 'disease', 'also', 'called', 'chronic', 'kidney', 'failure', 'describes', 'the', 'gradual', 'loss', 'of', 'kidney', 'function', 'your', 'kidneys', 'filter', 'wastes', 'and', 'excess', 'fluids', 'from', 'your', 'blood', 'which', 'are', 'then', 'excreted', 'in', 'your', 'urine', 'when', 'chronic', 'kidney', 'disease', 'reaches', 'an', 'advanced', 'stage', 'dangerous', 'levels', 'of', 'fluid', 'electrolytes', 'and', 'wastes', 'can', 'build', 'up', 'in', 'your', 'body', 'in', 'the', 'early', 'stages', 'of', 'chronic', 'kidney', 'disease', 'you', 'may', 'have', 'few', 'signs', 'or', 'symptoms', 'chronic', 'kidney', 'disease', 'may', 'not', 'become', 'apparent', 'until', 'your', 'kidney', 'function', 'is', 'significantly', 'impaired', 'treatment', 'for', 'chronic', 'kidney', 'disease', 'focuses', 'on', 'slowing', 'the', 'progression', 'of', 'the', 'kidney', 'damage', 'usually', 'by', 'controlling', 'the', 'underlying', 'cause', 'chronic', 'kidney', 'disease'

In [9]:
#Keyword Matching

#Greeting Inputs
GREETING_INPUTS = ["hi","hello","hola","greetings","wassup","hey"]

#Greetings response back to the user
GREETING_RESPONSE=["howdy","hi","hey","what's good","hello","hey there"]

#Function to return a random greeting response to a users greeting
def greeting(sentence):
    #if the users input is a greeting,then return a randomly chosen greeting
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETINGS_RESPONSES)

In [10]:
#the users response / query

user_response = 'what is chronic kidney disease'

user_response = user_response.lower() #make the response lower case

#Prints the users query/response
print(user_response)

#Set the chatbot response to an empty string
robo_response = ''

#Append the users response to the sentence list
sent_tokens.append(user_response)

#print the sentence list after appending the users response
print(sent_tokens)

#Create a TfidfVectorizer Object
TfidfVec = TfidfVectorizer(tokenizer = LemNormalize,stop_words='english')

#Convert the text to a matrix of TF-IDF features
tfidf = TfidfVec.fit_transform(sent_tokens)

#Print the TFDIF features
#print(tfidf)

#Get the measure of similarity (similarity scores)
vals = cosine_similarity(tfidf[-1], tfidf)

#print the similarity scores
print(vals)

#get the index of the most similar text/sentence to the users response

idx = vals.argsort()[0][-2]

#Reduce the dimensionality of vals
flat = vals.flatten()

#sort the list in ascending order
flat.sort()

#Get the most simlar score to the users response

score = flat[-2]

#prit the similarity score
print(score)

#If the variable 'score' is 0 then there is no text similar score to the users response
if(score == 0):
    robo_response = robo_response + "I apologize, I don't understand."
else:
    robo_response = robo_response+sent_tokens[idx]

#Print the chat bot response
print(robo_response)

sent_tokens.remove(user_response)

what is chronic kidney disease
['Overview\n\nChronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function.', 'Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine.', 'When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.', 'In the early stages of chronic kidney disease, you may have few signs or symptoms.', 'Chronic kidney disease may not become apparent until your kidney function is significantly impaired.', 'Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause.', 'Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.', 'Chronic kidney disease care at Mayo Clinic\n\nHow kidneys work\n\nSymptoms\n\nSigns and symptoms of chronic kidney disease dev