In [None]:
import numpy as np
import pandas as pd
import nltk
import string
import re
import random
from datetime import datetime
import glob

In [None]:
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# Load and preprocess data from multiple .txt files
file_paths = glob.glob('*.txt')  # This will match all .txt files in the current directory
raw_docs = []

In [None]:
for file_path in file_paths:
    with open(file_path, 'r', errors='ignore') as f:
        raw_docs.append(f.read().lower())

In [None]:
# Combine all text data into a single string
combined_raw_doc = ' '.join(raw_docs)

In [None]:
# Tokenization and lemmatization
lemmatizer = nltk.stem.WordNetLemmatizer()
tokens = nltk.word_tokenize(combined_raw_doc)
punctuation_removal = dict((ord(punctuation), None) for punctuation in string.punctuation)

In [None]:
def LemNormalize(text):
    return nltk.word_tokenize(re.sub(r'[^\w\s]', '', text.lower()))

In [None]:
# Store sentences for searching
sent_tokens = nltk.sent_tokenize(combined_raw_doc)

In [None]:
# Expanded greetings, responses, and "How are you?" handling
greetings = [
    "hello", "hi", "greetings", "sup", "what's up", "hey",
    "howdy", "good day", "yo", "hi there", "hiya"
]
greeting_responses = [
    "Hi there!", "Hey!", "*nods*", "Hello!", "Greetings!", "Hi!",
    "Howdy!", "Good day to you!", "Yo!", "Hi there!", "Hello, how can I help?"
]

In [None]:
how_are_you_phrases = [
    "how are you", "how's it going", "how do you do", "how are things"
]
how_are_you_responses = [
    "I'm just a program, but I'm functioning as expected! How about you?",
    "I'm here to assist you! How can I help today?",
    "Doing well, thank you! What can I do for you today?",
    "All systems operational! How are you?"
]

In [None]:
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greetings:
            return random.choice(greeting_responses)
        elif any(phrase in sentence.lower() for phrase in how_are_you_phrases):
            return random.choice(how_are_you_responses)

In [None]:
# Simple keyword-based response generation
def find_answer(user_response):
    user_response = LemNormalize(user_response)
    best_match = ""
    max_overlap = 0
    for sentence in sent_tokens:
        tokenized_sentence = LemNormalize(sentence)
        common_words = set(user_response).intersection(set(tokenized_sentence))
        if len(common_words) > max_overlap:
            max_overlap = len(common_words)
            best_match = sentence
    return best_match if best_match else "I am sorry, I don't have information on that."

In [None]:
# Recognizing and responding to date queries
def handle_special_queries(user_response):
    date_phrases = ["what is the date", "what's the date", "today's date", "current date", "what date is it"]
    if any(phrase in user_response for phrase in date_phrases):
        return f"Today's date is {datetime.now().strftime('%B %d, %Y')}."
    return None

In [None]:
# Context and sentiment analysis
context = {}
def set_context(user, context_data):
    context[user] = context_data

def get_context(user):
    return context.get(user, {})

In [None]:
# Fallback and clarification
def fallback():
    return "I'm not sure I understand. Could you please rephrase?"

def clarify():
    return "Do you mean...?"

In [None]:
# Main chatbot loop
def chatbot():
    flag = True
    print("Ruby: My name is Ruby. Let's have a conversation! Also, if you want to exit any time, just type Bye!")

    while flag:
        user_response = input().lower()
        if user_response != 'bye':
            if user_response in ('thanks', 'thank you'):
                flag = False
                print("Ruby: You're welcome!")
            else:
                greeting_response = greet(user_response)
                if greeting_response:
                    print("Ruby:", greeting_response)
                else:
                    special_response = handle_special_queries(user_response)
                    if special_response:
                        print("Ruby:", special_response)
                    else:
                        response_text = find_answer(user_response)
                        print("Ruby:", response_text)
        else:
            flag = False
            print("Ruby: Bye! Take care...")

In [None]:
chatbot()

Ruby: My name is Ruby. Let's have a conversation! Also, if you want to exit any time, just type Bye!
hello
Ruby: Hello!
how are you
Ruby: All systems operational! How are you?
well
Ruby: for example, towers of hanoi is well understood using recursive implementation.
what is random forest
Ruby: for classification tasks, the output of the random forest is the class selected by most trees.
what is decision tree
Ruby: the left tree is the decision tree we obtain from using information gain to split the nodes and the right tree is what we obtain from using the phi function to split the nodes.
what is the date
Ruby: Today's date is July 25, 2024.
bye
Ruby: Bye! Take care...
