<a href="https://colab.research.google.com/github/EdenKantor/Cloud-Computing-Course-Tut/blob/main/Cloud_Tut8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.chat.util import Chat, reflections
import re
from collections import defaultdict

class SearchChatbot:
    def __init__(self, search_logic):
        self.search_logic = search_logic

        # Download necessary NLTK data
        nltk.download('punkt')
        nltk.download('wordnet')

        # Define patterns and responses using the search_logic data
        self.patterns = [
            (r'hi|hello|hey', ['Hello! I\'m your Oracle Search Assistant. How can I help you today?']),
            (r'bye|goodbye', ['Goodbye! Feel free to return if you have more questions!']),

            # Search-related patterns with precomputed responses
            (r'.*what(?:\'s| is) the most searched term.*', [self.get_most_frequent_term(None)]),
            (r'.*how many (documents?|pages?|urls?) contain.*?(?:the )?(?:word |term )?([a-zA-Z]+).*',
            [lambda match: self.get_term_document_count(match.group(2))]),
            (r'.*which pages? (?:mention|contain|have).*?(?:the )?(?:word |term )?([a-zA-Z]+).*',
            [lambda match: self.get_term_pages(match.group(1))]),
            (r'.*(?:what|show) are the most frequent terms.*', [self.get_top_terms(None)]),
            (r'.*can you suggest related terms for ([a-zA-Z]+).*', [lambda match: self.suggest_search_term(match.group(1))]),

            # General patterns
            (r'what can you do\??', ['''I can help you with:
        1. Finding information about specific terms in the Oracle documentation
        2. Showing which pages contain specific terms
        3. Providing statistics about term frequencies
        4. Suggesting related search terms
        Just ask me what you'd like to know!''']),
            (r'thank you|thanks', ['You\'re welcome! Let me know if you need anything else.']),
            (r'.*', ['I\'m not sure about that. Try asking about specific terms or how I can help you search.'])
        ]

        # Initialize chatbot
        self.chatbot = Chat(self.patterns, reflections={})



    def get_most_frequent_term(self, groups):
        """Find the term with highest total frequency"""
        max_freq = 0
        max_term = None

        for entry in self.search_logic.index_data:
            total_freq = sum(doc.get('Frequency', 0) for doc in entry.get('DocIDs', []))
            if total_freq > max_freq:
                max_freq = total_freq
                max_term = entry.get('Term')

        return f"The most frequently appearing term is '{max_term}' with {max_freq} total occurrences."

    def get_term_document_count(self, term):
        """Count documents containing a specific term."""
        term = term.lower()
        matching_entry = next((entry for entry in self.search_logic.index_data if entry.get('Term') == term), None)

        if matching_entry:
            doc_count = len(matching_entry.get('DocIDs', []))
            return f"The term '{term}' appears in {doc_count} different documents."
        return f"I couldn't find any documents containing the term '{term}'."



    def get_term_pages(self, term):
        """List pages containing a specific term"""
        term = term.lower()
        matching_entry = next((entry for entry in self.search_logic.index_data if entry.get('Term') == term), None)

        if matching_entry:
            docs = matching_entry.get('DocIDs', [])
            if not docs:
                return f"I couldn't find any pages containing '{term}'."

            response = f"Here are up to 3 pages containing '{term}':\n"
            for doc in docs[:3]:
                response += f"- {doc.get('Title', 'Untitled')} (appears {doc.get('Frequency', 0)} times)\n"
            if len(docs) > 3:
                response += f"...and {len(docs) - 3} more pages"
            return response
        return f"I couldn't find any pages containing '{term}'."


    def get_top_terms(self, groups):
        """List top 5 most frequent terms"""
        terms = []
        for entry in self.search_logic.index_data:
            total_freq = sum(doc.get('Frequency', 0) for doc in entry.get('DocIDs', []))
            terms.append((entry.get('Term'), total_freq))

        terms.sort(key=lambda x: x[1], reverse=True)
        response = "Here are the 5 most frequent terms:\n"
        for term, freq in terms[:5]:
            response += f"- {term}: {freq} occurrences\n"
        return response

    def suggest_search_term(self, term):
        """Suggest related terms based on co-occurrence"""
        term = term.lower()
        term_docs = set()  # Set of DocIDs where the searched term appears
        related_terms = defaultdict(int)

        # Find the pages that contain the search term
        matching_entry = next((entry for entry in self.search_logic.index_data if entry.get('Term') == term), None)
        if matching_entry:
            term_docs = {doc.get('DocID') for doc in matching_entry.get('DocIDs', [])}

        # Find terms that appear in the same pages
        for entry in self.search_logic.index_data:
            current_term = entry.get('Term')
            if current_term != term:
                # Find the DocIDs of the current term
                current_docs = {doc.get('DocID') for doc in entry.get('DocIDs', [])}

                # Compute the intersection how many common pages there are
                common_docs = term_docs & current_docs

                if common_docs:  # If there are common pages
                    related_terms[current_term] = len(common_docs)


        if not related_terms:
            return f"I couldn't find any related terms for '{term}'."

        related = sorted(related_terms.items(), key=lambda x: x[1], reverse=True)[:3]
        response = f"Based on your search for '{term}', here are some related terms you might want to try:\n"
        for related_term, count in related:
            response += f"- {related_term} (appears together in {count} documents)\n"
        return response

    def respond(self, message):
        """Get a response from the chatbot based on the input message pattern."""
        for pattern, responses in self.patterns:
            match = re.match(pattern, message, re.IGNORECASE)
            if not match:
                continue

            response = responses[0]
            if not callable(response):
                return response

            result = response(match)
            return result if isinstance(result, str) else result[0]

        return "I'm not sure I understand. Could you rephrase your question?"

In [1]:
def show_chatbot_screen(self):
        """
        Displays the chat interface for interaction with the search assistant.
        Includes message history, input field, and navigation.
        """

        clear_output()
        chat_style = '''
        <style>
            body {
                background-color: #1a202c !important;
                min-height: 100vh;
                margin: 0;
                padding: 20px;
                box-sizing: border-box;
            }
            .chat-container {
                max-width: 1000px;
                margin: 20px auto;
                background-color: #2c2f33;
                border-radius: 12px;
                overflow: hidden;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            }
            .chat-header {
                background-color: rgb(244, 121, 0);
                color: white;
                padding: 15px 20px;
                font-size: 22px;
            }
            .chat-messages {
                height: 500px;
                overflow-y: auto;
                padding: 20px;
                background-color: #2d3748;
                display: flex;
                flex-direction: column;
                font-size: 16px;
            }
            .message {
                margin-bottom: 10px;
                padding: 10px 15px;
                border-radius: 12px;
                max-width: 80%;
            }
            .bot-message {
                background-color: #243447;
                color: white;
                align-self: flex-start;
            }
            .user-message {
                background-color: rgb(244, 121, 0);
                color: white;
                align-self: flex-end;
            }
            .chat-input-container {
                padding: 15px;
                background-color: #2c2f33;
                border-top: 1px solid #4a5568;
            }
            .widget-text input {
                width: 50% !important;
                padding: 12px 20px !important;
                border: 6px solid #4a5568 !important;
                border-radius: 30px !important;
                background-color: #2d3748 !important;
                color: white !important;
            }
            .widget-button button {
                background: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
                border: none !important;
                color: white !important;
                border-radius: 20px !important;
                padding: 8px 25px !important;
                font-weight: bold !important;
            }
        </style>
        '''

        # HTML for the chatbot
        chat_html = '''
        <div class="chat-container">
            <div class="chat-header">
                Oracle Search Assistant
            </div>
            <div class="chat-messages" id="chat-messages">
                <div class="message bot-message">
                    Hello! I'm your Oracle Search Assistant. How can I help you today?
                    You can ask me things like:
                    <ul>
                        <li>"What can you do?" for an overview of my capabilities</li>
                        <li>"Can you suggest related terms for [term]?" to find related terms in the Oracle documentation</li>
                        <li>"Which pages mention [term]?" to get a list of pages that contain a certain term</li>
                        <li>"How many documents contain [term]?" for term frequency statistics</li>
                        <li>"What is the most searched term?" to find out which term is the most searched</li>
                        <li>"What are the most frequent terms?" to get a list of terms that appear most often in the documentation</li>
                    </ul>
                    Just ask away, and I'll assist you with your search!
                </div>
            </div>
        </div>
        '''

        # Input for messages
        chat_input = widgets.Text(
            placeholder='Ask me anything about the search...',
            layout=widgets.Layout(width='50%')

        )

        # Handling messages
        def handle_message(sender):
            message = sender.value.strip()
            if message:
                # Add user message
                add_message(message, "user-message")
                # Chatbot response
                response = self.chatbot.respond(message)
                add_message(response, "bot-message")
                sender.value = ""

        chat_input.on_submit(handle_message)

        # Function to add messages to the chat container
        def add_message(text, message_type):
            # Create JavaScript to update the messages
            js_code = f'''
                const chatMessages = document.getElementById('chat-messages');
                const newMessage = document.createElement('div');
                newMessage.classList.add('message', '{message_type}');
                newMessage.innerText = `{text}`;
                chatMessages.appendChild(newMessage);
                chatMessages.scrollTop = chatMessages.scrollHeight;
            '''
            display(Javascript(js_code))

        # Display the design and components
        display(widgets.HTML(chat_style))
        display(widgets.HTML(chat_html))
        display(widgets.HBox([chat_input], layout=widgets.Layout(justify_content='center')))
        display(widgets.HBox([back_button], layout=widgets.Layout(justify_content='center')))