
# **CHATBOT USING STATIC DATASET**

**Importing Useful** **Libraries** 

In [None]:
#for importing and manuplating data
import numpy as np 
import pandas as pd 
import string
import csv

#for fitting model
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline

#connecting with google, request data and apllying nlp
import requests
from lxml import html
from googlesearch import search
from bs4 import BeautifulSoup
import string
import urllib.request 
from urllib.request import urlopen
import re
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

**Importing Dataset From Drive**

In [None]:
#original data is in the form of google sheets
#converting sheets to csv file

sheet_url = 'https://docs.google.com/spreadsheets/d/1KqvA5O9hH62UQWYpn2DVA5Pf4Kofp4gk51hZLVSuJpw/edit#gid=0'

csv_file = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')

#reading dataset into variable 'df'
df = pd.read_csv(csv_file)

In [None]:
#reviewing data

df

Unnamed: 0,User,Chatbot
0,Drug Reaction,"stop irritation,consult nearest hospital,stop ..."
1,Malaria,"Consult nearest hospital,avoid oily food,avoid..."
2,Allergy,"apply calamine,cover area with bandage,use ice..."
3,Hypothyroidism,"reduce stress,exercise,exercise,get proper sleep"
4,Psoriasis,"wash hands with warm soapy water,stop bleeding..."
...,...,...
139,book an appointment for me,"Sorry, I can't help you with this"
140,name a hospital near me,"Sorry, I can't help you with this"
141,do you know my location?,"Sorry, I can't help you with this"
142,can you help me?,"Yes, I think I can. Please enter a disease and..."


**Working on Healthcare Dataset**

In [None]:
#checking for empty columns

df['Chatbot'].isnull().sum()

32

In [None]:
#removing rows with empty columns

df = df.dropna()


### Dataset is now ready for use

In [None]:
#connecting to user inputs dataset

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
#input function

def user_input():
  a = "Hello there! I am Reyana, your healthcare assistant. Please enter your details to continue."
  b = "Enter your "
  fields = ['name', 'email', 'age', 'current location']                       #fields of input
  result = []

  print(a)
  for i in range(len(fields)):
    print(b+fields[i])
    inp = input()
    result.append(inp)

  #input to a csv file
  filename = '/content/drive/MyDrive/Healthcare Chatbot 1/User_dataset.csv' 
    
  with open(filename, 'a+', newline='\n', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(result)
    f.close()


In [None]:
#calling input function

user_input()

Hello there! I am Reyana, your healthcare assistant. Please enter your details to continue.
Enter your name
Aditi Sharma
Enter your email
aditisharma100201@gmail.com
Enter your age
20
Enter your current location
Noida


In [None]:
#display csv file head

inputs = pd.read_csv('/content/drive/MyDrive/Healthcare Chatbot 1/User_dataset.csv')

inputs.head()

Unnamed: 0,name,email,age,current location
0,Aditi,aditisharma100201@gmail.com,20,Noida
1,User1,abc@gmail.com,21,Delhi
2,Anushka,mokshapathak@rediffmail.com,20,Delhi
3,Aditi Sharma,aditisharma100201@gmail.com,20,Noida


***Decision Tree Classifier***

In [None]:
#function to remove punctuation
#converts the input to lowercase

def cleaner(x):
  return [a for a in (''.join([a for a in x if a not in string.punctuation])).lower().split()]

In [None]:
#Creating a pipeline to facilitate conversation
#Using Decision Tree for fitting data

Pipe = Pipeline([
    ('bow',CountVectorizer(analyzer=cleaner)),
    ('tfidf',TfidfTransformer()),
    ('classifier',DecisionTreeClassifier())
])

In [None]:
#Fitting our dataset in the pipeline

Pipe.fit(df['User'],df['Chatbot'])

Pipeline(memory=None,
         steps=[('bow',
                 CountVectorizer(analyzer=<function cleaner at 0x7f2a8122c170>,
                                 binary=False, decode_error='strict',
                                 dtype=<class 'numpy.int64'>, encoding='utf-8',
                                 input='content', lowercase=True, max_df=1.0,
                                 max_features=None, min_df=1,
                                 ngram_range=(1, 1), preprocessor=None,
                                 stop_words=None, strip_accents=None,
                                 token_pattern='(?u)\\b\\w\\w+\\b',
                                 tok...
                                  sublinear_tf=False, use_idf=True)),
                ('classifier',
                 DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                        criterion='gini', max_depth=None,
                                        max_features=None, max_leaf_nodes=None,
     

***Talking to chatbot***

In [None]:
input = ""
input = 'MALARIA'

In [None]:
output = Pipe.predict([input])[0]

In [None]:
output

'Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out'

Collecting Data from Google

In [None]:
#uploading small English pipeline trained on written web text from Spacy

nlp = spacy.load("en_core_web_sm")

In [None]:
query = 'diabetes precautions'

#getting top 10 links from google and storing it in a list
search_result_list = list(search(query, tld="co.in", num=10, stop=10, pause=1))

In [None]:
print(*search_result_list, sep='\n')

https://www.mayoclinic.org/diseases-conditions/diabetes/in-depth/diabetes-management/art-20045803
https://www.mayoclinic.org/diseases-conditions/type-2-diabetes/in-depth/diabetes-prevention/art-20047639
https://www.healthline.com/nutrition/prevent-diabetes
https://www.healthline.com/nutrition/does-sugar-cause-diabetes
https://www.healthline.com/health/type-2-diabetes/myths-and-misconceptions
https://www.webmd.com/diabetes/lifestyle-tips-to-avoid-diabetes-complications
https://www.webmd.com/diabetes/guide/diabetes-general-overview-facts
https://www.webmd.com/diabetes/guide/diabetes-general-living-with
https://www.webmd.com/diabetes/the-link-between-diabetes-and-gout
https://www.webmd.com/diabetes/diabetes-types-insulin


Extracting Results

In [None]:
#extracting html format of website

url = search_result_list[0]

r = requests.get(url)
html = r.text

In [None]:
#converting to text

soup = BeautifulSoup(html, "html5lib")

text = soup.get_text()

In [None]:
#working on text data

for script in soup(["script", "style"]):
    script.extract()   


# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)

#print(text)

In [None]:
#printing results

resultant = soup.findAll('h3')[:10]
print(*resultant, sep='\n')

<h3>1. Make a commitment to managing your diabetes</h3>
<h3>2. Don't smoke</h3>
<h3>3. Keep your blood pressure and cholesterol under control</h3>
<h3>4. Schedule regular physicals and eye exams</h3>
<h3>5. Keep your vaccines up to date</h3>
<h3>6. Take care of your teeth</h3>
<h3>7. Pay attention to your feet</h3>
<h3>8. Consider a daily aspirin</h3>
<h3>9. If you drink alcohol, do so responsibly</h3>
<h3>10. Take stress seriously</h3>


In [None]:
#removing tags, printing final result

sol = []

for i in range(len(resultant)):
  sol.append(resultant[i].text)

print(*sol, sep='\n')

1. Make a commitment to managing your diabetes
2. Don't smoke
3. Keep your blood pressure and cholesterol under control
4. Schedule regular physicals and eye exams
5. Keep your vaccines up to date
6. Take care of your teeth
7. Pay attention to your feet
8. Consider a daily aspirin
9. If you drink alcohol, do so responsibly
10. Take stress seriously
