In [2]:
from newspaper import Article
import random
import string
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import warnings
import re
warnings.filterwarnings('ignore')



In [3]:
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\jacob\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\jacob\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [4]:
paracetemol = Article('https://www.drugs.com/paracetamol.html')
paracetemol.download()
paracetemol.parse()
paracetemol.nlp()
corpus = paracetemol.text

In [5]:
print(corpus)

Paracetamol

Generic name: paracetamol

Brand names: Panadol, Calpol, Tylenol, Alvedon

Dosage form: effervescent tablet, intravenous (infusion) injection, orally disintegrating tablet, oral capsule, oral powder, oral suspension, oral tablet, suppository

Drug class: Miscellaneous analgesics

Medically reviewed by Nicole France, BPharm. Last updated on Aug 29, 2022.

What is paracetamol?

Paracetamol (Panadol, Calpol, Alvedon) is an analgesic and antipyretic drug that is used to temporarily relieve mild-to-moderate pain and fever. It is commonly included as an ingredient in cold and flu medications and is also used on its own.

Paracetamol is exactly the same drug as acetaminophen (Tylenol). Paracetamol is the drug's name assigned using the International Nonproprietary Name (INN) generic name system. Paracetamol is the name used for the drug in places such as Europe, Australia, New Zealand and India. Acetaminophen is the generic name assigned using the United States Adopted Names (USAN

In [6]:
#Tokenization
text = corpus
sentence_list = nltk.sent_tokenize(text)


In [7]:
sentence_list[0:2]

['Paracetamol\n\nGeneric name: paracetamol\n\nBrand names: Panadol, Calpol, Tylenol, Alvedon\n\nDosage form: effervescent tablet, intravenous (infusion) injection, orally disintegrating tablet, oral capsule, oral powder, oral suspension, oral tablet, suppository\n\nDrug class: Miscellaneous analgesics\n\nMedically reviewed by Nicole France, BPharm.',
 'Last updated on Aug 29, 2022.']

In [8]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
medication = []


In [9]:
from nltk.corpus import stopwords

In [10]:
for i in range (len(sentence_list)):
    review = re.sub('[^a-zA-Z]',' ',sentence_list[i]) #removing everything in spaces other than characters a-z and A-Z
    review = review.lower() #lowercasing them
    review = review.split() #making them into a list of words
    review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords.words('english'))] #lemmatzing the words
    review = ' '.join(review) #joining them into a space
    medication.append(review)

In [11]:
medication[0:2]

['paracetamol generic name paracetamol brand name panadol calpol tylenol alvedon dosage form effervescent tablet intravenous infusion injection orally disintegrating tablet oral capsule oral powder oral suspension oral tablet suppository drug class miscellaneous analgesic medically reviewed nicole france bpharm',
 'last updated aug']

In [12]:
def greeting_response(greeting):
    greeting = greeting.lower()

    #Bots freeting response
    bot_greetings = ['hi','hey','hello']

    #User's greeting
    user_greetings = ['hi','hey','hello','greetings']

    #Returning a randomnly chosen greeting from the bot 
    for word in greeting.split():
        if word in user_greetings:
            return random.choice(bot_greetings)
            

In [13]:
def index_sort(list_var):
    length = len(list_var)
    list_index = list(range(0,length))

    x = list_var
    for i in range(length):
        for j in range(length):
            if x[list_index[i]] > x[list_index[j]]:
                #swap variables
                temp = list_index[i]
                list_index[i] = list_index[j]
                list_index[j] = temp

    return list_index

In [14]:
#Create bot response
def bot_response(user_input):
    user_input = user_input.lower()
    medication.append(user_input) #Adding the user's input to the article we are scanning on
    bot_response = ''
    cm = TfidfVectorizer().fit_transform(medication) #cm is count matrix...we are getting a count matrix from the sentence list into vector format
    similarity_scores = cosine_similarity(cm[-1], cm) #Compare the last element in list ot rest of sentences
    #i.e, comparing the user input to the rest of the article as we added user input to last part of list
    similarity_scores_list = similarity_scores.flatten() #collapses into 1 dimension array

    #Find the index that has the highest score
    index = index_sort(similarity_scores_list)
    index = index[1:] #Getting the values that are not itself...0th index will be itself
    response_flag = 0

    #bringing back 2 sentences that are similar...not all
    j = 0
    for i in range(len(index)):
        if similarity_scores_list[index[i]] > 0.0: #if there is some sort of similarity
            bot_response = bot_response+' '+sentence_list[index[i]]
            response_flag = 1
            j = j+1 #j increments as number of similar sentences increases
        if j>2: #only allowing 2 sentences to be returned
            break
    if response_flag == 0: #if there aren't any similairties found
        bot_response = bot_response+' '+"I'm sorry, I do not understand"

    medication.remove(user_input) #Removing the user input from list after processing

    return bot_response


In [None]:
#Start chat with bot
print('Pharma Bot: I am Pharma Bot. I am here to assist you today with any queries you may have regarding medications that you are looking for. If you want to exit, type bye')

exit_list = ['exit','see you later','bye','quit']
while(True):
    user_input = input()
    user_input = user_input.lower()
    if user_input in exit_list:
        print("Pharma Bot: Chat with you later...stay dependant on drugs;)")
        break
    else:
        if greeting_response(user_input) != None:
            print("Pharma Bot: "+greeting_response(user_input))
        else:
            print("Pharma Bot: "+bot_response(user_input))