### Import libraries

In [2]:
import wikipediaapi
import re
import logging
import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\leaha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Retrieve information from Wikipedia

In [4]:
def fetch_wikipedia_summary(topic):
    user_agent = 'PA2/1.0 (lantler@gmu.edu)'
    wiki_api = wikipediaapi.Wikipedia('en', headers={'User-Agent': user_agent})
    page = wiki_api.page(topic)
    if page.exists():
        return page.summary
    else:
        return ""

### Generate response

In [6]:
def generate_response(question, summary):
    question_words = question.lower().split()
    subject = " ".join(question_words[2:]).replace('?', '')

    sentences = sent_tokenize(summary)
    
    for sentence in sentences:
        if subject.lower() in sentence.lower():
            return sentence.strip()

    return "I'm sorry, I don't know the answer."

### Main function

In [None]:
def main():
    print("Welcome to the Question Answering System by AIT 526 Group 2. Please ask me a question about a person, place, or thing.")
    print("Type 'Exit' to close the program.")
    while True:
        user_question = input("=? ")
        if user_question.lower() == "exit":
            print("Thank you! Goodbye.")
            break
        logging.info(f"Question: {user_question}")

        #a slightly more flexible way to get the search term? Try it out first
        #unimportant_words = r"\b(([Ww]here|[Ww]hat|[Ww]ho|[Ww]hen) (is|was|did))( (a|the))? \b"
        #noun= re.sub(unimportant_words, "", user_question)
        #search_term = "".join(noun)

        search_term = " ".join(user_question.split()[2:])
        if not search_term:
            search_term = user_question.split()[1]
        search_term = search_term.replace('?', '')

        content_summary = fetch_wikipedia_summary(search_term)
        logging.info(f"Wikipedia Content: {content_summary[:500]}")

        if not content_summary:
            print("I'm sorry, I don't know the answer.")
            logging.info("Answer: Sorry, I don't know the answer.")
            continue

        answer = generate_response(user_question, content_summary)
        logging.info(f"Answer: {answer}")

        print(f"=> {answer}")

if __name__ == "__main__":
    logging.basicConfig(filename='question_log.txt', level=logging.INFO)
    main()


Welcome to the Question Answering System by AIT 526 Group 2. Please ask me a question about a person, place, or thing.
Type 'Exit' to close the program.


=?  When was George Washington born?


I'm sorry, I don't know the answer.


=?  What did George Washington do?


I'm sorry, I don't know the answer.
