In [0]:
#description:this is a 'self learning' chatbot program

In [0]:
#install the package NLTk
pip install nltk

In [0]:
#install the package newspaper3k
pip install newspaper3k

In [0]:
#import some libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings

In [0]:
#ignore any warning messages
warnings.filterwarnings('ignore')

In [0]:
#download the packages from nltk
nltk.download('punkt',quiet=True)
nltk.download('wordnet',quiet=True)

In [0]:
#get the article url
article=Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download()
article.parse()
article.nlp()
corpus=article.text

#print the corpus/text
print(corpus)

In [0]:
#tokenization
text=corpus
sent_tokens=nltk.sent_tokenize(text) #coverting the text into a list of sentences

#print the list of sentences
print(sent_tokens)

In [0]:
#create a dictionary (key:value) pair to remove punctuations
remove_punct_dict=dict((ord(punct),None) for punct in string.punctuation)

#print the punctuations
print(string.punctuation)

#print the dictionary
print(remove_punct_dict)

In [0]:
#create a function to return a list of lemmatized words after removing punctuations
def LemNormalize(text):
  return nltk.word_tokenize(text.lower().translate(remove_punct_dict))

#print the tokenization text
print(LemNormalize(text))

In [0]:
#keyword matching

#greeting inputs
GREETING_INPUTS=["hi","hello","hola","greetings","wassup","hey"]

#greeting response back to the user
GREETING_RESPONSES=["howdy","hi","hey","what's good","hello","hey there"]

#function to return a random greeting response to a user's grreeting
def greeting(sentence):
  #if the user's input is a greeting ,then return a randomly chosen greeting response
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)

In [0]:
#generate the response
def response(user_response):


  #the user's response/query
  #user_response='what is chronic kidney disease'
  user_response=user_response.lower() #make the response lower case

  ###print the user's query/response
  #print(user_response)

  #set the chatbot's response to an empty string
  robo_response=''

  #append the user's response to the sentence list
  sent_tokens.append(user_response)

  ###print the sentence list after apending the user's response
  #print(sent_tokens)

  #create a Tfidfvectorizer object
  Tfidfvec=TfidfVectorizer(tokenizer=LemNormalize,stop_words='english')

  #convert the text to a matrix of TF-IDF features
  tfidf=Tfidfvec.fit_transform(sent_tokens)

  ###print the TFIDF features 
  #print(tfidf)

  #get the measure of similarity(similarity scores)
  vals=cosine_similarity(tfidf[-1],tfidf)

  ###print the similarity scores
  #print(vals)

  #get the index of most similar sentence/text to the user's response
  idx=vals.argsort()[0][-2]

  #reduce the dimensionality of vals
  flat=vals.flatten()

  #sort the list in ascending order
  flat.sort()

  #get the most similar score to the user's response
  score=flat[-2]

  ###print the similarity score
  #print(score)

  #if the variable 'score' is 0 then their is no text similar to user's response
  if (score==0):
    robo_response=robo_response+"I apologize ,I don't understand."
  else:
    robo_response=robo_response+sent_tokens[idx]
  
  ###print the chat bot response
  #print( robo_response)

  #remove the user's response from the sentence tokens list
  sent_tokens.remove(user_response)

  return robo_response

In [0]:
flag =True
print("DOCbot:I am doctor bot or DOCbot for short.I will answer your queries about Chronic Kidney Disease.If you want to exit,type Bye!")
while(flag==True):
  user_response=input()
  user_response=user_response.lower()
  if(user_response!='bye'):
    if(user_response=='thanks' or user_response=='thank you'):
      flag=False
      print("DOCbot:you are welcome")
    else:
      if(greeting(user_response)!=None):
        print("DOCbot:"+greeting(user_response))
      else:
        print("DOCbot: "+response(user_response))
  else:
    flag=False
    print("DOCbot:chat with you later!")