<a href="https://colab.research.google.com/github/massawegodii/Machine-Learning-Tutorial/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Install Required Libraries


In [None]:
!pip install nltk newspaper3k
!pip install --upgrade lxml lxml_html_clean newspaper3k




2. Import Libraries


In [None]:
import nltk
import random
import string
import warnings
import numpy as np
from newspaper import Article
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

warnings.filterwarnings('ignore')

# Download necessary NLTK data
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('punkt_tab', quiet=True)


True

3. Fetch and Process Article


In [None]:
# Function to fetch and process article text
def get_article_text(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        article.nlp()  # Extract key points and summary
        return article.text, article.summary
    except Exception as e:
        print("Error fetching article:", e)
        return "", ""

# URL for medical information
url = 'https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521'
corpus, summary = get_article_text(url)

# Tokenization
sent_tokens = nltk.sent_tokenize(corpus)

# Use a pre-trained BERT model for similarity scoring
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
sentence_embeddings = bert_model.encode(sent_tokens, convert_to_tensor=True)


4. Process and Clean Input

In [None]:
# Function to process and clean input
def lemmatize_sentence(text):
    lemmatizer = nltk.WordNetLemmatizer()
    return ' '.join([lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(text) if word not in string.punctuation])


5. Handle Greetings

In [None]:
# Greeting responses
def greeting_response(text):
    text = text.lower()
    bot_greetings = ['Howdy!', 'Hi there!', 'Hello!', 'Hey!', 'Hola!']
    user_greetings = ['hi', 'hey', 'hello', 'hola', 'greetings', 'wassup', 'whats up']

    for word in text.split():
        if word in user_greetings:
            return random.choice(bot_greetings)
    return None


6. Sort Similarity Scores

In [None]:
# Sorting function
def index_sort(similarity_scores):
    return sorted(range(len(similarity_scores)), key=lambda i: similarity_scores[i], reverse=True)


7. Generate Chatbot Response

In [None]:
# Chatbot response using TF-IDF + BERT embeddings for enhanced accuracy
def bot_response(user_input):
    user_input = lemmatize_sentence(user_input)

    # Encode user input using BERT
    user_embedding = bert_model.encode([user_input], convert_to_tensor=True)

    # Compute similarity scores
    similarity_scores = cosine_similarity(user_embedding.cpu().numpy(), sentence_embeddings.cpu().numpy())[0]

    # Get top relevant responses
    sorted_indexes = index_sort(similarity_scores)

    bot_response = ''
    response_flag = False
    for i in sorted_indexes[:3]:  # Get top 3 matches
        if similarity_scores[i] > 0.2:  # Threshold for relevance
            bot_response += sent_tokens[i] + ' '
            response_flag = True

    if not response_flag:
        bot_response = "I'm sorry, but I couldn't understand your query. Could you rephrase?"

    return bot_response


8. Run the Chatbot

In [None]:
# Memory storage for context-aware conversations
chat_memory = []
user_warnings = {}  # Dictionary to track abusive language warnings

# Sample knowledge base
knowledge_base = {
    "signs and symptoms of chronic kidney disease": "The common symptoms include swelling in the legs, fatigue, nausea, shortness of breath, and changes in urination patterns.",
    "symptoms of chronic kidney disease": "Chronic kidney disease symptoms include swelling, fatigue, high blood pressure, and changes in urine output.",
    "sign of kidney disease": "Signs of kidney disease can include swelling, back pain, changes in urination, and persistent itching."
}

# Function to check for abusive language
def contains_abuse(text):
    abusive_words = ["badword1", "badword2", "badword3", "stupid", "idiot", "fool"]
    words = text.lower().split()
    for word in words:
        if word in abusive_words:
            return True
    return False

# Chat interface
def chatbot():
    print("DocBot: I am an advanced AI medical chatbot. Please be respectful. Type 'bye' to exit.")
    exit_list = ['bye', 'quit', 'exit', 'see you later']

    while True:
        user_input = input("You: ")

        if user_input.lower() in exit_list:
            print("DocBot: Goodbye! Stay healthy.")
            break

        if contains_abuse(user_input):
            user_warnings["user"] = user_warnings.get("user", 0) + 1
            if user_warnings["user"] > 2:  # 3rd offense = block
                print("DocBot: You have been blocked for using abusive language repeatedly.")
                break
            else:
                print(f"DocBot: Warning {user_warnings['user']}/3 - Please avoid using offensive language.")
                continue  # Skip processing abusive input

        greeting = greeting_response(user_input)
        if greeting:
            print("DocBot:", greeting)
        else:
            # Store user queries for memory-based improvements
            chat_memory.append(user_input)
            print("DocBot:", bot_response(user_input))

# Run chatbot
chatbot()



DocBot: I am an advanced AI medical chatbot. Please be respectful. Type 'bye' to exit.
DocBot: Hi there!
DocBot: We wish you well. 
DocBot: I'm sorry, but I couldn't understand your query. Could you rephrase?
DocBot: And as kidney disease progresses, you may notice the following symptoms. Chronic kidney disease is a disease characterized by progressive damage and loss of function in the kidneys. Urine tests can reveal abnormalities and give clues to the underlying cause of the chronic kidney disease. 
DocBot: Chronic kidney disease is a disease characterized by progressive damage and loss of function in the kidneys. It's estimated that chronic kidney disease affects about one in seven American adults. In this video, we'll cover the basics of chronic kidney disease. 
DocBot: We wish you well. Whether you are looking for answers for yourself or for someone you love, we're here to give you the best information available. It can be helpful to talk to someone you trust, a friend or relative