In [8]:
# CSC820 Homework 2 Improved ELIZA CHATBOT
# Andrew Dahlstrom
# 2/14/2024
# 
# This code is implimented and referenced from
# https://www.nltk.org/_modules/nltk/chat/eliza.html
# Source code for nltk.chat.eliza
# Natural Language Toolkit: Eliza
#
# Copyright (C) 2001-2023 NLTK Project
# Authors: Steven Bird <stevenbird1@gmail.com>
#          Edward Loper <edloper@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

# Based on an Eliza implementation by Joe Strout <joe@strout.net>,
# Jeff Epler <jepler@inetnebr.com> and Jez Higgins <mailto:jez@jezuk.co.uk>.

# a translation table used to convert things you say into things the
# computer says back, e.g. "I am" --> "you are"

from nltk.chat.util import Chat, reflections
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import re

# a table of response pairs, where each pair consists of a
# regular expression, and a list of possible responses,
# with group-macros labelled as %1, %2.

pairs = (
    # The following response pairs I've added to adapt ELIZA to be a
    # good listener for college students regarding psychological issues 
    # they may face. I moved these pairs to the top so they would have 
    # matching precedence 
    (
        # This pair provides some helpful tips given the topic of finals week    
        r"final week (.*)",
        (
            "Finals week can be a very difficult time! Remember to take care of your health",
            "I know what you mean. All you can do is your best!",
            "I see but don't forget to eat properly. Good nutrition can improve brain function.",
        ),
    ),
    (
        # This pair matches for "sleepy" in the user input and provides some tips
        # about getting more sleep.
        r"(.*) sleep (.*)",
        (
            "That's horrible! Remember to try to get 8 hours of sleep on average each night.",
            "Oh no, that's not good! Adequate sleep can improve brain function",
            "I see. If you're too sleepy while you study you may experience a diminishing return on your study time.",
        ),
    ),
        (
        # This pair matches for "hungry" in the user input and provides some reminders
        # to eat.
        r"(.*) hungry (.*)",
        (
            "You poor thing! When was the last time you ate a proper meal?",
            "Hmmm I've noticed that snacks do not replace a well rounded meal",
            "I wish I could taste food.. what kind of food are you in the mood for?",
        ),
    ),
    (
        # This pair matches for "homework" in the user input and provides some 
        # reminders tips for doing well at it. 
        r"(.*) homework (.*)",
        (
            "Oh bummer! Remember to start on it early so you have time to complete it.",
            "I feel for you.. Homework can be tedious but it's the best way to learn",
            "That sounds rough. What would happen if you started a little earlier next time?",
        ),
    ),
    (
        # This pair matches for the user explaining My ... broke up with me
        # and uses the reference to the first group-macro in the responses  
        # to provide some encouragement and support.
        r"My (.*) broke up with me (.*)",
        (
            "I'm so sorry to hear that your %1 broke up with you. Please tell me how you're feeling.",
            "That does sound like a tough day.. why do you think your %1 would do that?",
            "Okay I'm here for you! Tell me what happened with your %1.",
        ),
    ),
    
    #### Comments for original pairs below ####
    
    (
        # The r before the "" indicates the following string should be interpreted 
        # as a regular expression. If the user's input contains the string "I need" 
        # it will match to this input/output pair. The remaining portion of the user's  
        # input which comes after "I need" will be captured by the expression (.*) 
        # and stored for use in the output.
        r"I need (.*)",
        (
            # If the user input matches with "I need", one of the responses
            # below will be randomly selected. The portion of the user 
            # input captured by (.*) will replace the placeholder %1 in 
            # the selected response and will then be output to the console. 
            "Why do you need %1?",
            "Would it really help you to get %1?",
            "Are you sure you need %1?",
        ),
    ),
    (
        # In the regular expression below, the \' escapes the ' so
        # it will be captured in the pattern of characters to match. The remaining 
        # portion of the user's input after the matched characters will be captured 
        # by the expression (.*) and stored for use in the output.
        r"Why don\'t you (.*)",
        (
            # If matched, one of the responses below will be randomly selected. 
            # The portion of the user input captured by (.*) will replace the 
            # placeholder %1 in the selected response and will then be output to 
            # the console. 
            "Do you really think I don't %1?",
            "Perhaps eventually I will %1.",
            "Do you really want me to %1?",
        ),
    ),
    (
        # Differs from the previous input/output pair because the user is asking a 
        # question about themself as oppose to ELIZA in the previous pair.
        r"Why can\'t I (.*)",
        (
            "Do you think you should be able to %1?",
            # The response below shows that the captured portion of the input can 
            # replace a placeholder in the middle of a response statement.
            "If you could %1, what would you do?",
            "I don't know -- why can't you %1?",
            # The response below shows that the captured portion of the input 
            # doesn't need to be used in the response.
            "Have you really tried?",
        ),
    ),
    (
        # Similar structure as previous pair but matches with user making a 
        # statement about themself rather than asking a question.
        r"I can\'t (.*)",
        (
            # Similar structure to previous responses. ELIZA will create a
            # question from a user statement to provoke a thoughtful response 
            # from the user.
            "How do you know you can't %1?",
            "Perhaps you could %1 if you tried.",
            "What would it take for you to %1?",
        ),
    ),
    (
        # This pair matches a user statement about themself but the responses 
        # in this case have a greater likelihood of not making sense.
        r"I am (.*)",
        (
            # Similar structure as previous responses but if the user
            # inputs their name for example, "I am Andrew" the first two
            # responses are nonsensical.
            "Did you come to me because you are %1?",
            "How long have you been %1?",
            "How do you feel about being %1?",
        ),
    ),
    (
        # This input/output pair is very similar to the previous but the
        # responses are more flexible for a variety of input including the
        # users name or a statement about going somewhere.
        r"I\'m (.*)",
        (
            # The below responses are much more versatile for
            # different contexts in the input
            "How does being %1 make you feel?",
            "Do you enjoy being %1?",
            "Why do you tell me you're %1?",
            "Why do you think you're %1?",
        ),
    ),
    (
        # This pair matches a with user asking a question about ELIZA
        r"Are you (.*)",
        (
            # The responses below are flexible for a variety of 
            # user input contexts of the captured portion.
            "Why does it matter whether I am %1?",
            "Would you prefer it if I were not %1?",
            "Perhaps you believe I am %1.",
            "I may be %1 -- what do you think?",
        ),
    ),
    (
        # This pair matches any user input beginning with "What".
        # If the user is making a statement instead of a question for example
        # "What a day!" the responses would not make sense.
        r"What (.*)",
        (
            # The responses below are vague enough to respond to a 
            # wide variety of contexts if the input is a question
            # beginning with what. Notice no text is replaced in the responses.
            "Why do you ask?",
            "How would an answer to that help you?",
            "What do you think?",
        ),
    ),
    (
        # This pair matches any user input beginning with "How".
        # If the user is making a statement instead of a question like
        # "How rude!" the responses would not make sense.
        r"How (.*)",
        (
            # The responses below are vague enough to respond to a 
            # wide variety of contexts if the input is a question
            # beginning with how. Notice no text is replaced in the responses.
            "How do you suppose?",
            "Perhaps you can answer your own question.",
            "What is it you're really asking?",
        ),
    ),
    (
        # This pair matches a user's explanation beginning with "Because".
        r"Because (.*)",
        (
            # The responses are flexible to cover a wide variety of contexts.
            # Only the last one uses a replacement with a portion of the user input.
            "Is that the real reason?",
            "What other reasons come to mind?",
            "Does that reason apply to anything else?",
            "If %1, what else must be true?",
        ),
    ),
    (
        # This pair matches with a user statement or question containing the
        # word "sorry". It captures the portion of the input before sorry and 
        # references it in the first group-macro and the remaining portion after 
        # sorry and references it in the second group-macro.
        r"(.*) sorry (.*)",
        (
            # The responses don't reference either of the group-macros and
            # no text is replaced. The responses are not very flexible. Some user
            # statements or questions will result in a nonsensicle response, especially
            # to the second response.
            "There are many times when no apology is needed.",
            "What feelings do you have when you apologize?",
        ),
    ),
    (
        # This pair matches for a user input beginning with "Hello" even though
        # the remaining characters after hello are referenced in a group-macro
        # they are never used in the responses.
        r"Hello(.*)",
        (
            # The structure of the responses are a general response to
            # the greeting hello, no replacement is used.
            "Hello... I'm glad you could drop by today.",
            "Hi there... how are you today?",
            "Hello, how are you feeling today?",
        ),
    ),
    (
        # This pair matches a user input beginning with "I think"
        # and captures the remaining portion in a group-macro
        r"I think (.*)",
        (
            # The first and last response use the group macro in the 
            # reply question.
            "Do you doubt %1?", 
            "Do you really think so?", 
            "But you're not sure %1?"
        ),
    ),
    (
        # This pair matches for the word "friend" in the input stream similar
        # in structure to the pair matching for "sorry" previously.
        r"(.*) friend (.*)",
        (
            # In the responses no text is replaced and they a vague
            # questions regarding the topic friend
            "Tell me more about your friends.",
            "When you think of a friend, what comes to mind?",
            "Why don't you tell me about a childhood friend?",
        ),
    ),
    (
        # This pair matches for only an input of "Yes"
        r"Yes", 
        (
            # The first response does not provoke a further
            # input from the user as strongly as the second
            # response which directly asks a question.
            "You seem quite sure.", 
            "OK, but can you elaborate a bit?")
    ),
    (
        # This pair matches for the word "computer" in the input stream similar
        # in structure to the pair matching for "sorry" previously.
        r"(.*) computer(.*)",
        (
            # The responses to not replace any text using a 
            # group-macro. The first two responses assume the 
            # user is talking about ELIZA and the second two
            # are questions about computers in general.
            "Are you really talking about me?",
            "Does it seem strange to talk to a computer?",
            "How do computers make you feel?",
            "Do you feel threatened by computers?",
        ),
    ),
    (
        # The structure in this pair typically implies the user is asking a 
        # question.
        r"Is it (.*)",
        (
            # The responses user the portion captured by the group-macro in
            # each of the responses. The last response does not necessarily 
            # provoke a futher response from the user.
            "Do you think it is %1?",
            "Perhaps it's %1 -- what do you think?",
            "If it were %1, what would you do?",
            "It could well be that %1.",
        ),
    ),
    (
        # This pair matches a statement from the user input and captures
        # the portion at the end in a group-macro.
        r"It is (.*)",
        (
            # The group-macro is not used in the first response which is
            # very general but is used in the provoking second response.
            "You seem very certain.",
            "If I told you that it probably isn't %1, what would you feel?",
        ),
    ),
    (
        # This pair matches the pattern beginning with "Can you" which is
        # typically a question and captures the remainder in a group-macro.
        r"Can you (.*)",
        (
            # The responses all use the group-macro and ask the user a 
            # question provoking more conversation.
            "What makes you think I can't %1?",
            "If I could %1, then what?",
            "Why do you ask if I can %1?",
        ),
    ),
    (
        # This pair is very similar in structure to the previous pair but in
        # this case the user is referring to themself.
        r"Can I (.*)",
        (
            # The responses all use the group-macro and ask the user a 
            # question provoking more conversation.
            "Perhaps you don't want to %1.",
            "Do you want to be able to %1?",
            "If you could %1, would you?",
        ),
    ),
    (
        # This pair matches for a user making a statement about ELIZA
        r"You are (.*)",
        (
            # The first three responses user the group-macro from the
            # user input and the first two ask a question. The third response
            # does not explicitly ask a question but is implied with the word 
            # perhaps.
            "Why do you think I am %1?",
            "Does it please you to think that I'm %1?",
            "Perhaps you would like me to be %1.",
            "Perhaps you're really talking about yourself?",
        ),
    ),
    (
        # This pair is similar in structure to the previous pair but matches
        # the contraction form rather than the longer form
        r"You\'re (.*)",
        (
            # The first two responses use the group-macro but the third
            # is vague enough to not need it.
            "Why do you say I am %1?",
            "Why do you think I am %1?",
            "Are we talking about you, or me?",
        ),
    ),
    (
        # This pair matches for a user statement or question that begins with
        # "I don't"
        r"I don\'t (.*)",
        (
            # These reponses use the group-macro and all turn the input
            # into a question.
            "Don't you really %1?", 
            "Why don't you %1?", 
            "Do you want to %1?"
        ),
    ),
    (
        # This pair matches for a user statement beginning with "I feel"
        r"I feel (.*)",
        (
            # The first response does not use a group-macro because it implicitly
            # refers to the text in the group-macro by using the word "these". The
            # other responses use the group-macro
            "Good, tell me more about these feelings.",
            "Do you often feel %1?",
            "When do you usually feel %1?",
            "When you feel %1, what do you do?",
        ),
    ),
    (
        # This pair matches with an input beginning with "I have" which
        # could represent a statement or question.
        r"I have (.*)",
        (
            # The third response doesn't work very well if the input is a question.
            "Why do you tell me that you've %1?",
            "Have you really %1?",
            "Now that you have %1, what will you do next?",
        ),
    ),
    (
        # This pair matches for an input beginning with "I would" which typically 
        # is a statement.
        r"I would (.*)",
        (
            # The response each appropriately use the group-macro to
            # responed to a statement input.
            "Could you explain why you would %1?",
            "Why would you %1?",
            "Who else knows that you would %1?",
        ),
    ),
    (
        # This pair matching for an input beginning with "Is there" which
        # is typically a question.
        r"Is there (.*)",
        (
            # The response each appropriately use the group-macro to
            # responed to a question input.
            "Do you think there is %1?",
            "It's likely that there is %1.",
            "Would you like there to be %1?",
        ),
    ),
    (
        # This pair matches for an input beginning with "My" which
        # is typically a statement.
        r"My (.*)",
        (
            # The response each appropriately use the group-macro to
            # responed to a statement input.
            "I see, your %1.",
            "Why do you say that your %1?",
            "When your %1, how do you feel?",
        ),
    ),
    (
        # This pair matches for an input beginning with "You" which
        # is referring to ELIZA and is typically a statement.
        r"You (.*)",
        (
            # These responses all work for a variety of statement about
            # ELIZA.
            "We should be discussing you, not me.",
            "Why do you say that about me?",
            "Why do you care whether I %1?",
        ),
    ),
    (
        # This pair matches for an input beginning with "Why" which
        # is typically a question.
        r"Why (.*)", 
        (
            # Both of the responses use the group-macro to
            # ask the user the question they asked.
            "Why don't you tell me the reason why %1?", 
            "Why do you think %1?"
        )
    ),
    (
        # This pair matches for an input beginning with "I want" which
        # could be a statement or question.
        r"I want (.*)",
        (
            # Only the second response works well if the user is asking a 
            # question but all the responses work okay if a statement.
            "What would it mean to you if you got %1?",
            "Why do you want %1?",
            "What would you do if you got %1?",
            "If you got %1, then what would you do?",
        ),
    ),
    (
        # This pair matches for the word "mother" in the input stream similar
        # in structure to the pair matching for "friend" previously.
        r"(.*) mother(.*)",
        (
            # Rather than using the group-macro, the responses all
            # ask general questions about the user's mother.
            "Tell me more about your mother.",
            "What was your relationship with your mother like?",
            "How do you feel about your mother?",
            "How does this relate to your feelings today?",
            "Good family relations are important.",
        ),
    ),
    (
        # This pair matches for the word "father" in the input stream similar
        # in structure to the pair matching for "friend" previously.
        r"(.*) father(.*)",
        (
            # Rather than using the group-macro, the responses all
            # ask general questions about the user's father.
            "Tell me more about your father.",
            "How did your father make you feel?",
            "How do you feel about your father?",
            "Does your relationship with your father relate to your feelings today?",
            "Do you have trouble showing affection with your family?",
        ),
    ),
    (
        # This pair matches for the word "child" in the input stream similar
        # in structure to the pair matching for "friend" previously.
        r"(.*) child(.*)",
        (
            # The responses assume that the user is reflecting on when they were a 
            # child and not the user's child.
            "Did you have close friends as a child?",
            "What is your favorite childhood memory?",
            "Do you remember any dreams or nightmares from childhood?",
            "Did the other children sometimes tease you?",
            "How do you think your childhood experiences relate to your feelings today?",
        ),
    ),
    (
        # This pair is sort of a catch all matching additional user input that 
        # ends with a "?".
        r"(.*)\?",
        (
            # Rather than using the group-macro the responses ask very 
            # general questions.
            "Why do you ask that?",
            "Please consider whether you can answer your own question.",
            "Perhaps the answer lies within yourself?",
            "Why don't you tell me?",
        ),
    ),
    (
        # This pair matches for the single word "quit" and assumes the user is
        # intending to end the conversation.
        r"quit",
        (
            # The last response is funny.
            "Thank you for talking with me.",
            "Good-bye.",
            "Thank you, that will be $150.  Have a good day!",
        ),
    ),
    (
        # This pair is a catch all. If the user input is not matched by previous
        # pairs, it will be matched with this and stored in a single group-macro.
        r"(.*)",
        (
            # The responses are very general. Some of them provoke a user response but 
            # responses 5-7 might not provoke a user response.
            "Please tell me more.",
            "Let's change focus a bit... Tell me about your family.",
            "Can you elaborate on that?",
            "Why do you say that %1?",
            "I see.",
            "Very interesting.",
            "%1.",
            "I see.  And what does that tell you?",
            "How does that make you feel?",
            "How do you feel when you say that?",
        ),
    ),
)

# Initialize the stemmer
stemmer = PorterStemmer()

# Simple tokenize using regular expressions and splitting input 
# into tokens on space boundaries using the \S expression which matches
# any non space or tab character. Returns a list of tokens.
def simple_tokenizer(str):
    pattern = re.compile(r'\S+')
    return re.findall(pattern, str)

class StemmingEliza(Chat):
    # Initialize the improved chatbot using stemming.  
    # Pairs is a list of patterns and responses.
    # Reflections is a dictionary mapping between 
    # first and second person expressions.
    def __init__(self, pairs, reflections={}):
        super().__init__(pairs, reflections)
  
    # Override the respond method to use the PortStemmer.
    def respond(self, str):
        # Part 1
        # Generate a response to the user input using stemming.
        # Break user input into word tokens then
        # convert to stem version.
        #words = word_tokenize(str)
        #word_stems = [stemmer.stem(word) for word in words]
        
        # Create sequence of word stems from user input
        # then use the stemmed sequence in Chat.respond() method for 
        # improved performance.
        #stem_sequence = ' '.join(word_stems)
        #print(f"PorterStemmer Version: {stem_sequence}")
        #return super().respond(stem_sequence)
        
        # Part 2
        # In the second part we are only concerned with printing the 
        # number of tokens and types parsed from the user input using regex.
        
        # Using the simple tokenizer to split the user input into tokens 
        # based on spaces then finding word stems on these tokens
        words = simple_tokenizer(str)
        word_stems = [stemmer.stem(word) for word in words]
        num_tokens = len(word_stems)
        # set() creates a list while filtering out repeat values
        types = set(word_stems)
        num_types = len(types)
        print(f"Total types: {num_types}")
        print(f"Total tokens: {num_tokens}")
        print(f"Word stems: {types}")
        

# Initializes the chatbot with the predefined pairs in the program and 
# reflections imported from the class which change the structure of the 
# responses to address the user.
improved_eliza_chatbot = StemmingEliza(pairs, reflections)

# ELIZA chatbot method which introduces the chatbot and begins the conversation.
def eliza_chat():
    print("Therapist\n---------")
    print("Talk to the program by typing in plain English, using normal upper-")
    print('and lower-case letters and punctuation.  Enter "quit" when done.')
    print("=" * 72)
    print("Hello.  How are you feeling today?")
    
    # The converse method initiates the conversation with the user by using regular expressions
    # to match the users input with a predefined pair (tuple of inputs and responses). The ending 
    # punctuation is truncated from a captured portion of the input and a reponse is generated 
    # from the matching pair which may use one or more caputred portions of the user's input stored 
    # as group-macros. When a user input pattern is matched with a pair then the response is selected
    # at random from the available responses in that pair. The response is then output to the console.
    # The process continues until the user types "quit".
    improved_eliza_chatbot.converse()


# Method to start the ELIZA chatbot 
def demo():
    eliza_chat()



if __name__ == "__main__":
    demo()


Therapist
---------
Talk to the program by typing in plain English, using normal upper-
and lower-case letters and punctuation.  Enter "quit" when done.
Hello.  How are you feeling today?


> quit


Total types: 1
Total tokens: 1
Word stems: {'quit'}
None
