In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
nltk.download('stopwords')
nltk.download('wordnet')
import re 
from spellchecker import SpellChecker
import string
from string import punctuation

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Pre-Running

In [2]:
# Building a list of Keywords
list_words=['hello','analyze', 'quit']
list_syn={}
for word in list_words:
    synonyms=[]

    # Create synonyms for each Keyword
    for syn in wordnet.synsets(word):
        for lem in syn.lemmas():

            # Remove any special characters from synonym strings
            lem_name = re.sub('[^a-zA-Z0-9 \n\.]', ' ', lem.name())
            synonyms.append(lem_name)
    list_syn[word]=set(synonyms)


print (list_syn['hello'])

{'how do you do', 'hullo', 'hi', 'hello', 'howdy'}


In [3]:
import joblib

# Load the saved classifier
loaded_clf = joblib.load('classifier_model.joblib')
loaded_vectorizer = joblib.load('vectorizer_model.joblib')

# Chatbot

In [4]:
# English Stopwords for clean_user_input function
stop_words = set(stopwords.words("english"))
# Spellchecker
spell = SpellChecker()
# Lemmatzier
lemmatizer = WordNetLemmatizer()


def clean_user_input(user_input):
    # Remove HTML tags from user input
    user_input_no_html = re.sub('<.*?>', '', user_input)

    # Word Tokenization
    user_input_token = word_tokenize(user_input_no_html)

    # Lowercase and Remove punctuation
    user_input_punct_lower = [x.lower() for x in user_input_token if x not in punctuation]

    # Remove Numbers
    user_input_no_num = [x for x in user_input_punct_lower if not x.isdigit()]

    # Spellchecker
    correct_words = [spell.correction(word) for word in user_input_no_num]

    # Remove Stopwords
    filtered_words = [word for word in correct_words if word not in stop_words]

    # Lemmatization
    base_words = [lemmatizer.lemmatize(word) for word in filtered_words]

    # Join words into sentence
    clean_response = ' '.join(base_words)

    return clean_response



def generate_response(user_input):
    patterns = {
        r'(?i)({}).*'.format('|'.join(list_syn['hello'])): "How can I help you?",
        r'(?i)analyze.*': "{}".format(clean_user_input(user_input)),
        r'(?i)logistic.*': "{}".format(loaded_clf.predict(loaded_vectorizer.transform([clean_user_input(user_input)]))),
        r'(?i)quit.*': "See You!"
    }

    for pattern, response in patterns.items():
        match = re.match(pattern, user_input)
        if match:
            return response
    return "I'm sorry, but I'm not sure I understand."



def chat():
    print("Hi")
    while True:
        user_input = input("You: ")

        if user_input.lower() == 'analyze':
            clean_response = clean_user_input(user_input)
            print(clean_response)

        # Here: Call Function for Logistic Regression or Naiv Bayes

        if user_input.lower() == 'logistic':
            log_response = loaded_clf.predict(loaded_vectorizer.transform([clean_user_input(user_input)]))
            print(log_response)
            

        if user_input.lower() == 'quit':
            print("Goodbye!")
            break

        response = generate_response(user_input)
        print("Mr. C-Bot:", response)

# Example usage
chat()


Hi


Mr. C-Bot: How can I help you?
Mr. C-Bot: analyze hello ned info
Mr. C-Bot: analyze hello ned info finger
Mr. C-Bot: analyze hello ned info finger
Mr. C-Bot: [1]
Mr. C-Bot: [0]
Mr. C-Bot: I'm sorry, but I'm not sure I understand.
