#**LIBRARIES AND INSTALLATIONS**

In [None]:
!pip install firebase
import time
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from ipywidgets import HTML, VBox, HBox, Button, Layout, widgets
from io import BytesIO
import base64
import matplotlib.pyplot as plt
from matplotlib import colormaps
from matplotlib.colors import LinearSegmentedColormap
from firebase import firebase
import nltk
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
from collections import deque, Counter
import random
import google.generativeai as genai
import numpy as np
nltk.download('wordnet')



[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

#**CRAWLER MICROSERVICE**

In [None]:
# Service to crawl in a domain
class CrawlerService:
    def __init__(self, domain = "https://aws.amazon.com/", max_urls = 200, non_relevant_language_codes = None, non_relevant_keywords = None):
        self.domain = domain # Domain to fetch URLs from
        self.max_urls = max_urls # Maximum URLs to fetch

        # A set of language codes that appear in URLs and indicate they are not in English
        self.non_english_language_codes = non_relevant_language_codes or {"/ar/", "/id/", "/de/", "/es/", "/fr/", "/it/",
                                                                           "/pt/", "/vi/", "/tr/", "/ru/", "/th/", "/ja/",
                                                                           "/ko/", "/zh/", "/zh-tw/", "/tw/", "/jp/", "/cn/"}
        # A set of keywords that appear in URLs and indicate non-relevant content
        self.non_relevant_keywords = non_relevant_keywords or {"signup", "signin", "contact", "agreement", "partners",
                                                               "events", "customer", "privacy", "legal", "marketplace",
                                                               "terms", "careers", "blogs"}

In [None]:
# Private method to check if a URL is a page within the given domain to crawl in
def __is_within_domain(self, url):
    parsed_url = urlparse(url)
    parsed_domain = urlparse(self.domain)
    return parsed_url.netloc == parsed_domain.netloc # Return true if the URL is within the domain

# Attach the method to the service
CrawlerService.__is_within_domain = __is_within_domain
del __is_within_domain # Removes the standalone function

In [None]:
# Private method to check if a page is non-english
def __contains_non_english_language_codes(self, url):
    return any(code in url.lower() for code in self.non_english_language_codes) # Return true if the URL contains non-english code

# Attach the method to the service
CrawlerService.__contains_non_english_language_codes = __contains_non_english_language_codes
del __contains_non_english_language_codes # Removes the standalone function

In [None]:
# Check if the URL contains non-relevant keywords
def __contains_non_relevant_keywords(self, url):
    return any(keyword in url.lower() for keyword in self.non_relevant_keywords)

# Attach the method to the service
CrawlerService.__contains_non_relevant_keywords = __contains_non_relevant_keywords
del __contains_non_relevant_keywords # Removes the standalone function

In [None]:
# Private method to filter non-relevant URL
def __not_relevant_url(self, url):
    return (not self.__is_within_domain(url) or
            self.__contains_non_relevant_keywords(url) or
            self.__contains_non_english_language_codes(url))

# Attach the method to the service
CrawlerService.__not_relevant_url = __not_relevant_url
del __not_relevant_url # Removes the standalone function

In [None]:
# Private method to normalize a URL by removing query parameters and fragments
def __normalize_url(self, url):
    parsed_url = urlparse(url)
    normalized_url = parsed_url._replace(query='', fragment='').geturl()
    if not normalized_url.endswith('/'):
        normalized_url += '/'
    return normalized_url

# Attach the method to the service
CrawlerService.__normalize_url = __normalize_url
del __normalize_url # Removes the standalone function

In [None]:
# Method to crawl and fetch URLs from the given domain using the BFS algorithm
def crawl(self):
    urls_soups = {} # URLs and their soups object
    queue = deque([self.domain])  # Queue for BFS traversal

    while queue and len(urls_soups.keys()) < self.max_urls:
        current_url = queue.popleft()  # Dequeue the first URL element

        if self.__not_relevant_url(current_url):
            continue

        try:
            response = requests.get(current_url)
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                urls_soups[current_url] = soup

                for anchor in soup.find_all('a', href = True):
                    fetched_url = anchor.get('href')
                    normalized_url = self.__normalize_url(fetched_url)
                    if normalized_url not in urls_soups.keys():
                        queue.append(normalized_url)  # Enqueue the URL

        except requests.exceptions.RequestException as e:
            print(f"Error fetching URL '{current_url}': {e}")

    return urls_soups

# Attach the method to the service
CrawlerService.crawl = crawl
del crawl # Removes the standalone function

#**INDEX CREATOR MICROSERVICE**

In [None]:
# Service to create an index
class IndexCreatorService:
    def __init__(self, crawler_service, stop_words = None, freq_threshold = 7):
        self.stop_words = stop_words or { 'a', 'an', 'are', 'the', 'and', 'or', 'in', 'on', 'at', 'to', 'through', 'content', 'contact',
                                          'across', 'after', 'all', 'allow', 'also', 'as', 'ask', 'be', 'been', 'best', 'better', 'between',
                                          'beyond', 'both', 'by', 'can', 'co', 'come', 'contain', 'down', 'e', 'each', 'even', 'few', 'first',
                                          'for', 'from', 'further', 'get', 'go', 'got', 'have', 'help', 'here', 'how', 'i', 'if', 'inc',
                                          'into', 'is', 'it', 'just', 'keep', 'less', 'let', 'like', 'look', 'may', 'mean', 'might', 'more',
                                          'most', 'my', 'name', 'near', 'need', 'new', 'no', 'not', 'now', 'o', 'of', 'one', 'other', 'our',
                                          'out', 'over', 'own', 'per', 'r', 're', 'right', 'same', 'second', 'see', 'self', 'sent', 'so',
                                          'announce', 'soon', 'such', 'take', 'than', 'that', 'their', 'them', 'then', 'there', 'these', 'they',
                                          'third', 'those', 'three', 'two', 'up', 'us', 'use', 'via', 'vs', 'want', 'we', 'well', 'were',
                                          'what', 'when', 'where', 'whether', 'which', 'while', 'who', 'will', 'with', 'within', 'without',
                                          'you', 'your', 'fast', 'effect', 'innovate', 'speed', 'visit', 'world', 'include', 'provide', 'enable',
                                          'make', 'build', 'use', 'start', 'stop', 'allow', 'accelerate', 'common', 'high', 'low', 'large', 'small',
                                          'simple', 'example', 'number', 'write', 'add', 'read', 'china', 'pro', 'understand', 's', 'efficiency',
                                          'instance', 'explore', 'support', 'keynote'}
        self.freq_threshold = freq_threshold
        self.crawler_service = crawler_service

In [None]:
# Private method to create histogram of terms frequencies for a given soup object
def __create_terms_histogram(self, soup):
    histogram = {}
    pattern = r'\b[a-zA-Z0-9]+\b'  # Match alphanumeric terms
    terms = re.findall(pattern, soup.get_text()) # Find all words in the html page
    for term in terms:
        term = term.lower() # Normalize all words to lower case
        if not term.isdigit():  # Exclude purely numeric terms
            if term in histogram:
                histogram[term] += 1
            else:
                histogram[term] = 1
    return histogram

# Attach the method to the service
IndexCreatorService.__create_terms_histogram = __create_terms_histogram
del __create_terms_histogram  # Removes the standalone function

In [None]:
# Private method to normalize a term
def __apply_lemmatization(self, term, lemmatizer):
    lemmatizer = nltk.stem.WordNetLemmatizer()
    for pos in ['n', 'v', 'a']:  # Check noun, verb, adjective
        lemmatized = lemmatizer.lemmatize(term, pos)
        if lemmatized != term:  # If lemmatized form differs, return it
            return lemmatized
    return term  # No change after checking all POS

# Attach the method to the service
IndexCreatorService.__apply_lemmatization = __apply_lemmatization
del __apply_lemmatization  # Removes the standalone function

In [None]:
# Private method to delete the stop words from the histogram
def __remove_stop_words(self, histogram):
    for stop_word in self.stop_words:
        if stop_word in histogram:
            del histogram[stop_word]
    return histogram

# Attach the method to the service
IndexCreatorService.__remove_stop_words = __remove_stop_words
del __remove_stop_words  # Removes the standalone function

In [None]:
# Private method to normalize histogram terms to their base form
def __normalize_terms(self, histogram):
    lemmatizer = nltk.stem.WordNetLemmatizer()
    normalized_histogram = {}
    for term, count in histogram.items():
        normalized_term = self.__apply_lemmatization(term, lemmatizer)
        if normalized_term in normalized_histogram:
            normalized_histogram[normalized_term] += count
        else:
            normalized_histogram[normalized_term] = count
    return normalized_histogram

# Attach the method to the service
IndexCreatorService.__normalize_terms = __normalize_terms
del __normalize_terms  # Removes the standalone function

In [None]:
# Private method to remove terms with low frequencies from the histogram
def __remove_low_freqs(self, histogram, threshold):
    return {term: count for term, count in histogram.items() if count >= threshold}

# Attach the method to the service
IndexCreatorService.__remove_low_freqs = __remove_low_freqs
del __remove_low_freqs  # Removes the standalone function

In [None]:
# Private method to add a term entry directly into the index if it doesn't exist
def __add_term(self, index, term):
    if term not in index:
        index[term] = {"DocIDs": {}}
    return index[term]

# Attach the method to the service
IndexCreatorService.__add_term = __add_term
del __add_term  # Removes the standalone function

In [None]:
# Private method to add document data (url, count) under a specific term and DocID
def __add_doc(self, term_entry, doc_id, url, title, count):
    term_entry["DocIDs"][f"doc_{doc_id}"] = {
        "url": url,
        "title": title,
        "count": count,
    }

# Attach the method to the service
IndexCreatorService.__add_doc = __add_doc
del __add_doc  # Removes the standalone function

In [None]:
# Private method to fetch title from a URL
def __fetch_doc_title(self, url, soup):
    title_tag = soup.find('title')
    if title_tag:
          return title_tag.text.strip()
    else:
          h1_tag = soup.find('h1')
          return h1_tag.text.strip() if h1_tag else "No Title Available"
    return "No Title Available"

# Attach the method to the service
IndexCreatorService.__fetch_doc_title = __fetch_doc_title
del __fetch_doc_title  # Removes the standalone function

In [None]:
# Method to create the index
def create_index(self):
    urls_soups = self.crawler_service.crawl() # Crawl in AWS domain

    index = {}
    doc_id = 0

    for url, soup in urls_soups.items():
        doc_id += 1

        # Fetch the title for the document
        title = self.__fetch_doc_title(url, soup)

        # Generate term frequency histogram
        histogram = self.__create_terms_histogram(soup)
        histogram = self.__normalize_terms(histogram)
        histogram = self.__remove_stop_words(histogram)
        histogram = self.__remove_low_freqs(histogram, self.freq_threshold)

        # Add terms and their respective counts to the index
        for term, count in histogram.items():
            term_entry = self.__add_term(index, term)
            self.__add_doc(term_entry, doc_id, url, title, count)

    return {"index": index}

# Attach the method to the service
IndexCreatorService.create_index = create_index
del create_index  # Removes the standalone function

#**DATA FETCHER MICROSERVICE**

In [None]:
# Service to fetch index data
class DataFetcherService:
    def __init__(self, FBconn):
          self.FBconn = FBconn

In [None]:
# Method to fetch the entire index from the database
def fetch_index(self):
    try:
        index = self.FBconn.get(f"/index/", None)
        if index:
            return index
    except Exception as e:
          print(f"Error fetching index: {e}")
    return {}

# Attach the method to the service
DataFetcherService.fetch_index = fetch_index
del fetch_index  # Removes the standalone function

In [None]:
def fetch_term_docs(self, term):
    try:
        docs = FBconn.get(f"/index/{term}/DocIDs", None)
        if docs: # Term found
            return docs
    except Exception as e:
        print(f"Error fetching term '{term}' from Firebase: {e} ")
    return {}

# Attach the method to the service
DataFetcherService.fetch_term_docs = fetch_term_docs
del fetch_term_docs  # Removes the standalone function

#**ADMINISTRATION MICROSERVICE**

In [None]:
# Service for admin
class AdministrationService:
    def __init__(self, FBconn, index_creator_service, data_fetcher_service):
        self.FBconn = FBconn
        self.index_creator_service = index_creator_service
        self.index = data_fetcher_service.fetch_index()

In [None]:
# Method to delete the entire index from the database
def delete_index(self):
    try:
        self.FBconn.delete("/index/", None)
        self.index = {}
    except Exception as e:
        print(f"Error deleting index: {e}")

# Attach the method to the service
AdministrationService.delete_index = delete_index
del delete_index  # Removes the standalone function

In [None]:
# Method to re-create an entire index using the "Index Creator" and upload it to the root of the database
def recreate_index(self):
    try:
        self.delete_index() # Remove existing index
        index = self.index_creator_service.create_index() # Create the index
        if self.FBconn.put('/', 'index', index["index"]):  # Upload the index
          self.index = index["index"]
          return "Index uploaded successfully!"
    except Exception as e:
        return f"Error uploading index: {e}"

# Attach the method to the service
AdministrationService.recreate_index = recreate_index
del recreate_index  # Removes the standalone function

In [None]:
# Method to fetch the entire terms from the index
def fetch_terms(self):
    terms = []
    for term in self.index.keys():
        terms.append(term)
    return terms

# Attach the method to the service
AdministrationService.fetch_terms = fetch_terms
del fetch_terms  # Removes the standalone function

In [None]:
# Method to fetch urls by term and each url term count
def fetch_urls(self, term):
    return self.index[term]["DocIDs"]

# Attach the method to the service
AdministrationService.fetch_urls = fetch_urls
del fetch_urls  # Removes the standalone function

In [None]:
# Method to delete URLs for a given term from the database
def delete_docs(self, term, docs):
    try:
        for doc in docs:
            self.FBconn.delete(f"/index/{term}/DocIDs/{doc}", None)
            del self.index[term]["DocIDs"][doc]
            if not self.index[term]["DocIDs"]:
                self.delete_term(term)
    except Exception as e:
            print(f"Error deleting docs: {e}")

# Attach the method to the service
AdministrationService.delete_docs = delete_docs
del delete_docs  # Removes the standalone function

In [None]:
def delete_term(self, term):
    try:
        self.FBconn.delete(f"/index/{term}", None)
        del self.index[term]
    except Exception as e:
        print(f"Error deleting '{term}': {e}")

# Attach the method to the service
AdministrationService.delete_term = delete_term
del delete_term  # Removes the standalone function

#**QUERY MICROSERVICE**

In [None]:
class QueryService:
    def __init__(self, data_fetcher_service):
        self.data_fetcher_service = data_fetcher_service

In [None]:
def __apply_lemmatization(self, term, lemmatizer):
    lemmatizer = nltk.stem.WordNetLemmatizer()
    for pos in ['n', 'v', 'a']:  # Check noun, verb, adjective
        lemmatized = lemmatizer.lemmatize(term, pos)
        if lemmatized != term:  # If lemmatized form differs, return it
            return lemmatized
    return term  # No change after checking all POS

# Attach the method to the service
QueryService.__apply_lemmatization = __apply_lemmatization
del __apply_lemmatization  # Removes the standalone function

In [None]:
# Private method to normalize query terms
def __normalize_query(self, query):
    lemmatizer = nltk.stem.WordNetLemmatizer()
    query_terms = re.findall(r'\w+', query.lower()) # List of query words
    normalized_query = []
    for term in query_terms:
        normalized_term = self.__apply_lemmatization(term, lemmatizer)
        normalized_query.append(normalized_term)
    return normalized_query

# Attach the method to the service
QueryService.__normalize_query = __normalize_query
del __normalize_query  # Removes the standalone function

In [None]:
# Method to check if the URL contains any term of the query terms, returns matches if there are
def __url_contains_terms(self, url, query_terms):
    matches = 0

    # Find which terms from the query are contained in the URL
    for term in query_terms:
        if term in url:
            matches += 1

    return matches

# Attach the method to the service
QueryService.__url_contains_terms = __url_contains_terms
del __url_contains_terms  # Removes the standalone function

In [None]:
# Private method to update document-related information in the maps for query processing
def __update_maps(self, doc_id, doc_info, docs_urls, docs_titles, docs_ranks, docs_matches):
    url = doc_info["url"]
    title = doc_info["title"]
    count = doc_info["count"]

    if doc_id not in docs_urls:
        docs_urls[doc_id] = url
        docs_titles[doc_id] = title
        docs_ranks[doc_id] = count
        docs_matches[doc_id] = 1
    else:
        docs_ranks[doc_id] += count
        docs_matches[doc_id] += 1

# Attach the method to the service
QueryService.__update_maps = __update_maps
del __update_maps  # Removes the standalone function

In [None]:
def __calculate_ranks(self, docs_ranks, docs_urls, docs_matches, normalized_query):
    query_length = len(normalized_query)
    boosts = {}

    # Apply URL-based boost
    for doc_id in docs_urls:
        url = docs_urls[doc_id]
        url_matches = self.__url_contains_terms(url, normalized_query)
        boosts[doc_id] = 2 * url_matches  # Apply boost for rank

    # Calculate final ranks
    for doc_id in docs_urls:
        if boosts[doc_id]:
            match_multiplier = (boosts[doc_id] + docs_matches[doc_id]) / query_length
        else:
            match_multiplier = docs_matches[doc_id] / query_length
        docs_ranks[doc_id] *= match_multiplier  # Adjust rank based on term matches

# Attach the method to the service
QueryService.__calculate_ranks = __calculate_ranks
del __calculate_ranks  # Removes the standalone function

In [None]:
def __fetch_results(self, docs_ranks, docs_urls, docs_titles):
    # Sort documents by their ranks in descending order
    sorted_docs = sorted(docs_ranks.items(), key=lambda x: x[1], reverse=True)

    results = []
    for doc_id, rank in sorted_docs:
        url = docs_urls[doc_id]  # Retrieve the URL for each doc_id
        title = docs_titles[doc_id] # Retrieve the URL title for each doc_id
        results.append({"title": title, "url": url, "rank": rank})

    return results

# Attach the method to the service
QueryService.__fetch_results = __fetch_results
del __fetch_results  # Removes the standalone function

In [None]:
def process_query(self, query):
    normalized_query = self.__normalize_query(query)
    docs_urls = {} # Map document IDs to their URLs
    docs_titles = {} # Map docs to their titles
    docs_ranks = {} # Map document IDs to their rank
    docs_matches = {} # Map document to number of terms in the query match this document

    for term in normalized_query:
        docs = self.data_fetcher_service.fetch_term_docs(term)

        if not docs: # No docs found for that term
            continue

        for doc_id, doc_info in docs.items():
            self.__update_maps(doc_id, doc_info, docs_urls, docs_titles, docs_ranks, docs_matches)

    self.__calculate_ranks(docs_ranks, docs_urls, docs_matches, normalized_query)

    return self.__fetch_results(docs_ranks, docs_urls, docs_titles)

# Attach the method to the service
QueryService.process_query = process_query
del process_query  # Removes the standalone function

#**STATISTICS MICROSERVICE**

In [None]:
class StatisticService:
    def __init__(self, data_fetcher_service):
        self.index = data_fetcher_service.fetch_index()

In [None]:
def get_most_common_words(self, num_words):
    if num_words < 3 or num_words > 10:
        return False
    if not self.index:
        print("No data found in index.")
        return []
    word_counts = Counter()
    for word, data in self.index.items():
        total_count = sum(doc["count"] for doc in data["DocIDs"].values())
        word_counts[word] = total_count

    # Get the top N most common words
    most_common = word_counts.most_common(num_words)
    return most_common

# Attach the method to the service
StatisticService.get_most_common_words = get_most_common_words
del get_most_common_words  # Removes the standalone function

In [None]:
def get_least_common_words(self, num_words):
    if num_words < 3 or num_words > 10:
        return False
    if not self.index:
        print("No data found in index.")
        return []

    # Create a dictionary to group words by their count
    count_groups = {}
    for word, data in self.index.items():
        total_count = sum(doc["count"] for doc in data["DocIDs"].values())
        if total_count not in count_groups:
            count_groups[total_count] = []
        count_groups[total_count].append(word)

    # Get unique words taking one from each count group, starting with smallest counts
    result = []
    for count in sorted(count_groups.keys()):  # Sort counts in ascending order
        # Take one random word from this count group
        word = random.choice(count_groups[count])
        result.append((word, count))
        if len(result) == num_words:
            break

    return result

# Attach the method to the service
StatisticService.get_least_common_words = get_least_common_words
del get_least_common_words  # Removes the standalone function

In [None]:
def get_random_words(self, num_words):
    if num_words < 3 or num_words > 10:
        return False
    if not self.index:
        print("No data found in Firebase index.")
        return []
    word_counts = Counter()
    for word, data in self.index.items():
        total_count = sum(doc["count"] for doc in data["DocIDs"].values())
        word_counts[word] = total_count
    # Get random words
    if len(word_counts) < num_words:
        print("Not enough words in the index to get random words.")
        return list(word_counts.items())
    random_words = random.sample(list(word_counts.items()), num_words)
    return random_words

# Attach the method to the service
StatisticService.get_random_words = get_random_words
del get_random_words  # Removes the standalone function

In [None]:
def get_common_docs(self, num_docs):
    doc_counts = {}

    # Iterate through each term in the index
    for term, data in self.index.items():
        # Check if 'DocIDs' exists in the term data
        if 'DocIDs' in data:
            for doc_id, doc_data in data['DocIDs'].items():
                # Accumulate the count for each document
                if doc_id not in doc_counts:
                    doc_counts[doc_id] = {'title': doc_data['title'], 'count': 1}
                else:
                  doc_counts[doc_id]['count'] += 1

    # Sort the documents by count in descending order
    sorted_docs = sorted(doc_counts.items(), key=lambda x: x[1]['count'], reverse=True)

    # Prepare the result with the required fields
    result = []
    for doc_id, doc_info in sorted_docs[:num_docs]:
        result.append({'doc_id': doc_id, 'title': doc_info['title'], 'count': doc_info['count']})

    return result

# Attach the method to the service
StatisticService.get_common_docs = get_common_docs
del get_common_docs  # Removes the standalone function

In [None]:
def _convert_fig_to_html(self, fig):
    buf = BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    base64_img = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()
    plt.close(fig)
    return f"<img src='data:image/png;base64,{base64_img}' style='max-width:100%;'>"

# Attach the method to the service
StatisticService._convert_fig_to_html = _convert_fig_to_html
del _convert_fig_to_html  # Removes the standalone function

In [None]:
def generate_pie_chart(self, data, labels, legend_title, legend_labels=None, title="Pie Chart"):
    # Create the figure and axes
    fig, ax = plt.subplots(figsize=(8, 8))

     # Generate a gradient of blue colors based on data values
    cmap = colormaps.get_cmap('Blues')  # Use the "Blues" colormap
    normalized_data = [value / max(data) for value in data]  # Normalize data between 0 and 1
    colors = [cmap(value) for value in normalized_data]  # Map normalized data to the colormap

    # Draw the pie chart
    wedges, texts, autotexts = ax.pie(
        data,
        labels=labels,
        autopct=lambda p: f"{int(round(p * sum(data) / 100.0))}",  # Display values
        startangle=90,
        radius=0.8,
        colors = colors
    )

    # Customize the text inside the slices
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontsize(10)

    ax.set_title(title, pad=5)

    # Add the legend, directly below the pie chart
    if legend_labels:
        ax.legend(
            wedges,
            legend_labels,
            title=legend_title,
            loc="lower center",
            bbox_to_anchor=(0.5, -0.15),  # Position the legend just below the pie chart
            fontsize='small',
            ncol=1
        )

    # Adjust spacing to reduce extra space
    plt.subplots_adjust(top=0.9, bottom=0.3)  # Adjust top and bottom spacings

    # Convert the figure to HTML for rendering
    return self._convert_fig_to_html(fig)

# Attach the method to the service
StatisticService.generate_pie_chart = generate_pie_chart
del generate_pie_chart  # Removes the standalone function

In [None]:
def generate_bar_chart(self, data, labels, title="Bar Chart", stacked=False):
    fig, ax = plt.subplots()
    num_series = len(data)
    bar_width = 0.8 / num_series
    x_indices = range(len(labels))

    # Create a gradient colormap (light to dark blue)
    cmap = LinearSegmentedColormap.from_list("blue_gradient", ["#cce7ff", "#003366"])
    norm = plt.Normalize(vmin=min(map(min, data)), vmax=max(map(max, data)))

    for i, series_data in enumerate(data):
        offset = (i - (num_series - 1) / 2) * bar_width
        # Apply gradient colors
        colors = [cmap(norm(value)) for value in series_data]
        ax.bar(
            [x + offset for x in x_indices],
            series_data,
            width=bar_width,
            color=colors
        )

    ax.set_xticks(range(len(labels)))
    ax.set_xticklabels(labels)
    ax.set_title(title)
    return self._convert_fig_to_html(fig)

# Attach the method to the service
StatisticService.generate_bar_chart = generate_bar_chart
del generate_bar_chart  # Removes the standalone function

In [None]:
def generate_line_chart(self, y_series, x, title="Line Chart"):
    fig, ax = plt.subplots()

    # Plot each series in y_series
    for i, data in enumerate(y_series):
        ax.plot(x, data, marker='o', label=f"Metric {i + 1}")  # Add a label for each series

    ax.set_title(title)
    ax.legend()  # Show the legend for all series
    return self._convert_fig_to_html(fig)

# Attach the method to the service
StatisticService.generate_line_chart = generate_line_chart
del generate_line_chart  # Removes the standalone function

#**HTML & CSS**

In [None]:
# ---------------------------
# CSS Injection
# ---------------------------
def inject_css():
    styles = """
    <style>
      @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap'); /* Import Font */

      .my-gradient {
        background: linear-gradient(360deg, white 0%, #A1AFE5 100%) !important;
      }

      /* Floats the photo container to the right with margin */
      .photo-container {
        float: right;
        width: 350px;       /* approximate width */
        margin: 15px;       /* space around it */
        text-align: center;
      }

      .photo-container img {
        width: 350px;       /* ensure the image is 350px wide */
        height: auto;       /* keep aspect ratio */
      }

      /* Default Button Style */
      .default-button {
        background-color: #213C73; /* Blue background */
        color: white;              /* White text */
        border: none;              /* Remove border */
        border-radius: 5px;        /* Rounded corners */
        font-size: 16px;           /* Text size */
        font-weight: bold;         /* Make the font bold */
        font-family: "Roboto";     /* Font */
        cursor: pointer;           /* Pointer cursor on hover */
        transition: background-color 0.3s ease; /* Smooth hover transition */
      }

      /* Warning Button Style */
      .warning-button {
        background-color: #FC7F03; /* Orange background */
        color: white;
        border: none;
        border-radius: 5px;
        font-size: 16px;
        font-weight: bold;
        font-family: "Roboto";
        cursor: pointer;
      }

      /* Danger Button Style */
      .danger-button {
        background-color: #AD0000; /* Red background */
        color: white;
        border: none;
        border-radius: 5px;
        font-size: 16px;
        font-weight: bold;
        font-family: "Roboto";
        cursor: pointer;
      }

      .custom-dropdown label {
            font-size: 16px !important;
            font-weight: bold;
        }

      .custom-label {
            font-size: 16px !important;  /* Change size */
        }

      /* Hover effect for buttons */
      .default-button:hover {
        background-color: #298FCE; /* Brighter blue on hover */
      }

    </style>
    """
    display(HTML(styles))

In [None]:
# ---------------------------
# Header Component
# ---------------------------
def create_header():
    header_html = widgets.HTML(
        value="""
        <div style="
            background-color: #FFBE5F;
            padding: 20px;
            position: relative;   /* for absolute positioning of the image */
            text-align: center;   /* centers the title text */
        ">
          <!-- Position the image on the left -->
          <img src="https://www.svgrepo.com/show/353443/aws.svg"
               alt="Logo"
               style="
                 position: absolute;
                 left: 20px;
                 top: 20px;
                 height: 40px;
               " />
          <h1 style="margin: 0; color: black;">Kakadoo Engine</h1>
        </div>
        """
    )
    return header_html

In [None]:
def create_ResultView_page():
    style_html = widgets.HTML("""
        <style>
            .table-wrapper {
                width: 100%;
                margin: 0 auto;
                border: 1px solid #ccc;
                background: white;
                max-width: 1300px;
                min-width: 500px;
            }
            .table-scroll {
                height: 400px;
                overflow-y: auto;
                overflow-x: hidden;
            }
            .table-header {
                background-color: #f8f9fa;
                border-bottom: 2px solid #dee2e6;
                padding: 10px 20px;
                position: sticky;
                top: 0;
                z-index: 1;
                font-weight: bold;
                text-align: center;
                font-size: 16px;
            }
            /* Each row is a flex container so we can pin items left & right */
            .table-row {
                display: flex;
                justify-content: space-between;  /* Left text pinned left, right text pinned right */
                align-items: center;
                padding: 8px 20px;
                border-bottom: 1px solid #eee;
                white-space: nowrap;
                font-size: 16px;
            }
            .table-row:hover {
                background-color: #f5f5f5;
            }
            /* Left cell pinned to the left, right cell pinned to the right */
            .left-cell {
                flex: 0 0 auto;  /* do not expand */
            }
            .right-cell {
                flex: 0 0 auto;  /* do not expand */
                margin-left: 40px; /* push to the far right */
                text-align: right; /* optional, if you want the text itself right-aligned */
            }
        </style>
    """)
    return style_html


In [None]:
def create_admin_page():
    return widgets.HTML("""
        <style>
            .table-wrapper {
                width: 100%;
                margin: 0 auto;
                border: 1px solid #ccc;
                background: white;
                max-width: 1300px;
            }
            .table-scroll {
                height: 400px;
                overflow-y: auto;
                overflow-x: hidden;
            }
            .checkbox-row {
                display: flex;
                justify-content: space-between;
                align-items: center;
                padding: 0px 20px;
                border-bottom: 1px solid #eee;
                align-items: center;
                gap: 60px;
                margin: 0px;
                width: auto;
            }
            .checkbox-row:hover {
                background-color: #f5f5f5;
            }
            .checkbox-header {
                background-color: #f8f9fa;
                border-bottom: 2px solid #dee2e6;
                padding: 5px 20px;
                position: sticky;
                top: 0;
                z-index: 1;
                font-weight: bold;
                display: flex;
                align-items: center;
                gap: 10px;
                justify-content: flex-end;
            }
            .row-content {
                display: flex;
                flex-direction: column;
                width: 100%;
                align-items: left;
                margin-left: 4px;
            }
            .row-content > .left-cell,
            .row-content > .right-cell,
            .term-count {
                color: #000;
            }
            .url-link {
                color: #0366d6;
                text-decoration: none;
            }
            .ipywidget-checkbox {
                width: 15px;
            }
        </style>
    """)

In [None]:
# ---------------------------
# Blur Divider Component
# ---------------------------
def create_blur_divider():
    blur_div = widgets.HTML(
        value="""
        <div style="
            height: 2px;
            background: linear-gradient(to top, #9c9c9c, #9c9c9c);
            filter: blur(2px);
        "></div>
        """
    )
    return blur_div

In [None]:
def create_statistics_chart_page():
    return widgets.HTML("""
        <style>
                /* Let each tab expand as needed, but remain flexible */
                .jupyter-widgets.widget-tab > .p-TabBar .p-TabBar-tab {
                    flex: 1;
                    min-width: fit-content;
                    padding: 0 15px;
                }
                /* Center the label text of each tab */
                .jupyter-widgets.widget-tab > .p-TabBar .p-TabBar-tabLabel {
                    text-align: center;
                    justify-content: center;
                }
                /* Remove all scrollbars from tab bar */
                .jupyter-widgets.widget-tab > .p-TabBar {
                    overflow: hidden !important;
                }
                /* Remove bottom scrollbar from the entire widget */
                .jupyter-widgets.widget-tab {
                    overflow: hidden !important;
                }
                /* Remove scrollbars from tab panels */
                .jupyter-widgets.widget-tab > .widget-tab-contents {
                    overflow: hidden !important;
                }
                /* Ensure tab content is visible without scrollbars */
                .widget-vbox {
                    overflow: visible !important;
                }
        </style>
    """)


#**MAIN VIEW GUI**

In [None]:
# ---------------------------
# Buttons Component
# ---------------------------
def create_navigation_buttons():
    Admin_Page = widgets.Button(
        description='Admin Page',
        layout=widgets.Layout(width='120px'),
    )
    Admin_Page.add_class('default-button')

    admin_button_hbox = widgets.HBox(
        [Admin_Page],
        layout=widgets.Layout(
            justify_content='flex-end',
            width='95%',
            padding='0 5px 10px 0',
            overflow='hidden'
        )
    )

    Enter_Query_Btn = widgets.Button(
        description='Run Query',
        layout=widgets.Layout(width='180px')
    )
    Enter_Query_Btn.add_class('default-button')

    Statistics_Btn = widgets.Button(
        description='Statistics',
        layout=widgets.Layout(width='180px')
    )
    Statistics_Btn.add_class('default-button')

    AWS_Chatbot_Btn = widgets.Button(
        description='AWS Chatbot',
        layout=widgets.Layout(width='180px')
    )
    AWS_Chatbot_Btn.add_class('default-button')

    buttons_hbox = widgets.HBox(
        [Enter_Query_Btn, Statistics_Btn, AWS_Chatbot_Btn],
        layout=widgets.Layout(
            justify_content='center',
            padding='20px 0 0 0'
        )
    )

    return admin_button_hbox, buttons_hbox

In [None]:
# ---------------------------
# Photo Container Component
# ---------------------------
def create_photo_container():
    photo_html = widgets.HTML(
        value="""
        <div class="photo-container">
          <img src="https://www.svgrepo.com/show/429687/cloud-moving-cloud-computing.svg"
               alt="Moving Cloud"
               style="width: 350px; height: auto;" />
        </div>
        """
    )
    return photo_html

In [None]:
# ---------------------------
# View Components
# ---------------------------

# Base class for all views
class BaseView(widgets.VBox):
    def __init__(self, main_dashboard, photo_html, *children, **kwargs):
        # Combine provided children with photo_html
        combined_children = list(children) + [photo_html]
        super().__init__(
            children=combined_children,
            layout=widgets.Layout(align_items='center', background='transparent', width='100%', max_width='800px', overflow='visible'),
            **kwargs
        )
        self.main_dashboard = main_dashboard
        self.photo_html = photo_html

In [None]:
# ---------------------------
# Main Dashboard Area Component
# ---------------------------
class MainDashboardView:
    def __init__(self, photo_html):
        self.photo_html = photo_html
        self.main_area = widgets.VBox(
            layout=widgets.Layout(
                width='100%',
                min_height='300px',
                padding='20px',
                align_items='center'
            )
        )
        self.main_area.add_class("my-gradient")
        self.dashboard_logo = self.create_dashboard_logo()
        self.current_view = None

    def create_dashboard_logo(self):
        dashboard_logo = widgets.HTML(
            value="""
              <img src="https://github.com/tomerot/cloud-computing-kakadoo/blob/main/Logo/Logo.png?raw=true"
                   alt="Logo"
                   style="
                     position: center;
                     height: 150px;
                   " />
            """
        )
        return dashboard_logo

    def show_main_buttons(self, admin_button_hbox, buttons_hbox):
        self.main_area.children = [
            admin_button_hbox,
            self.dashboard_logo,
            buttons_hbox
        ]

    def show_view(self, view):
        self.main_area.children = [view]

    def get_widget(self):
        return self.main_area

#**ADMIN PAGE GUI**

In [None]:
class AdminPageView(BaseView):
    def __init__(self, main_dashboard, photo_html, term_list):
        self.main_dashboard = main_dashboard
        self.selected_urls = []
        self.data = {}  # Store the current data
        # -------------------------------
        # Top Row: dropdown, and Search button
        # -------------------------------
        self.term_input = widgets.Dropdown(
            options=term_list,
            value=term_list[0] if term_list else None,
            description='Term:',
            layout=widgets.Layout(width='220px')
        )
        self.term_input.add_class("custom-dropdown")

        self.display_button = widgets.Button(
            description="Display Term Information",
            layout=widgets.Layout(width='230px')
        )
        self.display_button.add_class('default-button')
        self.display_button.on_click(self.on_search)

        top_row = widgets.HBox([self.term_input, self.display_button])
        top_row.layout = widgets.Layout(
            width='500px',
            align_items='center',
            justify_content='center',
            margin='5px auto'
        )

        #--------------------------------
        # Loading Messages
        #--------------------------------
        self.spinner = widgets.HTML(
         value="""
            <img src="https://media.tenor.com/On7kvXhzml4AAAAi/loading-gif.gif" alt="Loading..." style="width:50px;height:50px;">
        """,
        layout=widgets.Layout(
              width='50px',
              height='50px',
              margin='0 auto',
              align_items='center',
              justify_content='center'
          )
        )

        self.message = widgets.HTML(
            value="",
            layout=widgets.Layout(
                width='300px',
                margin='5px auto',
                text_align='center'
            )
        )
        self.loading_box = widgets.VBox(
          [self.spinner, self.message],
          layout=widgets.Layout(
            align_items='center',
            justify_content='center',
            width='300px',
            margin='0 auto',
            display='none'
          )
        )

        # -------------------------------
        # Error Message
        # -------------------------------
        self.error_message = widgets.HTML(
            value="",
            layout=widgets.Layout(
                width='200px',
                margin='5px auto',
                align_items='center',
                justify_content='center'
            )
        )
        self.error_message.add_class('error-message')
        self.error_message.layout.display = 'none'

        # -------------------------------
        # Table Styling
        # -------------------------------
        style_html = create_admin_page()

        # -------------------------------
        # Initialize empty checkbox container
        # -------------------------------
        self.checkboxes = []
        self.checkbox_box = None
        self.initialize_checkbox_container()

        # -------------------------------
        # Bottom Row: Action Buttons
        # -------------------------------
        self.delete_term_button = widgets.Button(
            description="Delete Current Term",
            layout=widgets.Layout(width='200px')
        )
        self.delete_term_button.add_class('warning-button')
        self.delete_term_button.on_click(self.on_delete_term)

        self.delete_url_button = widgets.Button(
            description="Delete Selected URLs",
            layout=widgets.Layout(width='200px')
        )
        self.delete_url_button.add_class('warning-button')
        self.delete_url_button.on_click(self.on_delete_url)

        self.recreate_index = widgets.Button(
            description="Recreate Index",
            layout=widgets.Layout(width='200px')
        )
        self.recreate_index.add_class('danger-button')
        self.recreate_index.on_click(self.on_recreate_index)

        return_button = widgets.Button(
            description="Return to Main Page",
            layout=widgets.Layout(width='200px')
        )
        return_button.add_class('default-button')
        return_button.on_click(self.on_return)

        button_row = widgets.HBox([
            self.delete_term_button,
            self.delete_url_button,
            self.recreate_index
        ])
        button_row.layout = widgets.Layout(
            width='90%',
            margin='20px auto',
            justify_content='center',
            display='flex'
        )

        return_row = widgets.HBox([
            return_button
        ])
        return_row.layout = widgets.Layout(
            margin = '20px'
        )

        # An extra label, initially hidden
        self.outer_label = widgets.Label("Term Count and Relative URL link")
        self.outer_label.add_class("custom-label")
        self.outer_label.layout.display = 'none'

        # Finally, build the view using super().__init__
        super().__init__(
            main_dashboard,
            photo_html,
            top_row,
            self.error_message,
            self.outer_label,
            style_html,
            self.checkbox_box,
            self.loading_box,
            button_row,
            return_row
        )

    def initialize_checkbox_container(self):
        """Initialize the checkbox container with basic structure"""
        # Create the Select All checkbox
        self.select_all_checkbox = widgets.Checkbox(
            value=False,
            description='',
            indent=False,
            layout=widgets.Layout(margin='0')
        )
        self.select_all_checkbox.add_class('ipywidget-checkbox')
        self.select_all_checkbox.observe(self.on_select_all_change, names='value')

        # Create label for "Select All"
        select_all_label = widgets.Label("Select All")
        select_all_label.add_class("custom-label")

        # Place them side by side in an HBox
        select_all_container = widgets.HBox([select_all_label, self.select_all_checkbox])
        select_all_container.add_class('checkbox-header')

        # Create scrollable container for checkboxes
        self.checkbox_box = widgets.VBox(
            [select_all_container],
            layout=widgets.Layout(
                border='1px solid #ccc',
                height='400px',
                overflow_y='scroll',
                width='auto'
            )
        )
        self.checkbox_box.add_class('table-wrapper')
        self.checkbox_box.layout.display = 'none'

    def update_table_data(self):
        """Update the table with current data"""
        # Clear existing checkboxes
        self.checkboxes = []
        checkbox_containers = []

        # If data is empty, reset the table
        if not self.data:
            select_all_container = self.checkbox_box.children[0]  # Keep the select all container
            self.checkbox_box.children = [select_all_container]
            self.select_all_checkbox.value = False
            return

        # Create new checkboxes based on current data
        for doc_id, doc_info in self.data.items():
            # Create checkbox
            cb = widgets.Checkbox(
                value=False,
                description='',
                indent=False,
            )
            cb.add_class('ipywidget-checkbox')
            self.checkboxes.append((cb, doc_info['url']))
            # Create the row content
            row_content = widgets.HTML(f"""
                <div class="row-content" style = "font-size: 16px">
                    <div class="left-cell">
                        <span class="term-count">Term count: {doc_info['count']}</span>
                    </div>
                    <div class="right-cell">
                        URL: <a href="{doc_info['url']}" target="_blank" class="url-link">{doc_info['url']}</a>
                    </div>
                </div>
            """)
            # Create container for the entire row
            cb.doc_id = doc_id
            row = widgets.HBox([row_content, cb])
            row.add_class('checkbox-row')
            checkbox_containers.append(row)

        # Update the checkbox_box with new data
        select_all_container = self.checkbox_box.children[0]  # Keep the select all container
        self.checkbox_box.children = [select_all_container] + checkbox_containers

    def on_select_all_change(self, change):
        """Handle the select-all checkbox changes"""
        if change['name'] == 'value':
            for checkbox, _ in self.checkboxes:
                checkbox.value = change['new']

    def on_search(self, b):
        """Handle search button clicks"""
        self.loading_box.layout.display = 'none'
        self.error_message.value = ""
        self.error_message.layout.display = 'none'

        if self.term_input.value is None:
            self.outer_label.layout.display = 'none'
            self.error_message.value = "<span style='color: red;'>Index is empty.</span>"
            self.error_message.layout.display = 'block'
            self.checkbox_box.layout.display = 'none'
        else:
            self.error_message.value = ""
            self.error_message.layout.display = 'none'
            self.outer_label.layout.display = 'inline'

            # Fetch new data and update table
            self.data = administration_service.fetch_urls(self.term_input.value)
            self.update_table_data()
            self.checkbox_box.layout.display = 'block'

    def on_delete_url(self, b):
        """Handle deletion of selected URLs"""
        self.error_message.value = ""
        self.error_message.layout.display = 'none'
        selected_doc_ids = [cb.doc_id for (cb, url) in self.checkboxes if cb.value]
        if selected_doc_ids:
            administration_service.delete_docs(self.term_input.value, selected_doc_ids)
            #remove seleceted_doc_ids from self.data
            self.data = {doc_id: doc_info for doc_id, doc_info in self.data.items() if doc_id not in selected_doc_ids}
            if not self.data:
                self.checkbox_box.layout.display = 'none'
                current_term_list = administration_service.fetch_terms()
                self.term_input.options = current_term_list
                self.term_input.value = current_term_list[0] if current_term_list else None
            self.update_table_data()
        else:
            self.error_message.value = "<span style='color: red;'>Please select URLs to delete.</span>"
            self.error_message.layout.display = 'block'

        # Clear checkboxes after deletion
        self.select_all_checkbox.value = False
        for checkbox, _ in self.checkboxes:
            checkbox.value = False

    def on_delete_term(self, b):
        """Handle deletion of current term"""
        administration_service.delete_term(self.term_input.value)
        current_term_list = administration_service.fetch_terms()
        self.term_input.options = current_term_list
        self.term_input.value = current_term_list[0] if current_term_list else None
        self.data = {}
        self.checkbox_box.layout.display = 'none'
        self.update_table_data()

    def on_recreate_index(self, b):
        self.message.value = """
            <div style="width: 100%; text-align: center;">
                <span style='color: blue;'>Recreating Index. This will take a while...</span>
            </div>
        """
        self.loading_box.layout.display = 'flex'  # Make the box visible
        response = administration_service.recreate_index()
        self.spinner.layout.display='none'
        self.message.value = f"""
            <div style="width: 100%; text-align: center;">
                <span style='color: blue;'>{response}</span>
            </div>
        """
        current_term_list = administration_service.fetch_terms()
        self.term_input.options = current_term_list
        self.term_input.value = current_term_list[0] if current_term_list else None
        self.data = {}
        self.checkbox_box.layout.display = 'none'
        self.update_table_data()

    def on_return(self, b):
        """Handle return to main page"""
        self.main_dashboard.show_main_buttons(self.main_dashboard.controller.admin_button_hbox, self.main_dashboard.controller.buttons_hbox)

#**ENTER QUERY GUI**

In [None]:
class EnterQueryView(BaseView):
    def __init__(self, main_dashboard, photo_html):
        self.page_label = widgets.HTML(
            value = '<b style = "font-size: 20px;">Run Query</b>',
            layout=Layout(margin="10px", align_self="center")
        )
        placeholders = ["Example: Relational Database Service", "Example: Auto Scaling", "Example: EC2 Documentation"]
        self.text_input = widgets.Text(
            placeholder = random.choice(placeholders),
            layout=widgets.Layout(width='300px')
        )
        self.search_button = widgets.Button(description="Search")
        self.search_button.add_class('default-button')
        self.return_button_main = widgets.Button(description="Return to Main Page",
                                                 layout=widgets.Layout(width='200px'))
        self.return_button_main.add_class('default-button')
        self.fun_fact_button = widgets.Button(description="AWS Fun Fact")  # Fun fact button
        self.fun_fact_button.add_class('default-button')
        self.fun_fact_label = widgets.HTML(value="", layout=widgets.Layout(margin="30px", width="100%"))  # Label to display the fun fact
        self.error_label = widgets.HTML(
            value="",
            layout=widgets.Layout(margin="10px 0 0 0")  # top margin for spacing
        )

        # Create layout
        query_hbox = widgets.HBox(
            [self.text_input],
            layout=widgets.Layout(justify_content="center", align_items="center",overflow="hidden",width="100%")
        )
        buttons_hbox = widgets.HBox(
            [self.search_button, self.fun_fact_button],
            layout=widgets.Layout(justify_content="center", align_items="center",overflow="hidden",width="100%")
        )
        main_page_hbox = widgets.HBox(
            [self.return_button_main],
            layout=widgets.Layout(justify_content="center", align_items="center",overflow="hidden",width="100%")
        )

        # Bind events
        self.return_button_main.on_click(self.on_return_main_clicked)
        self.search_button.on_click(self.on_search_clicked)
        self.fun_fact_button.on_click(self.on_fun_fact_clicked)  # Bind event for fun fact button

        # Initialize the view
        super().__init__(
            main_dashboard,
            photo_html,
            self.page_label,
            query_hbox,
            buttons_hbox,
            self.fun_fact_label,
            main_page_hbox,
            self.error_label
        )

    def on_return_main_clicked(self, b):
        self.main_dashboard.show_main_buttons(self.main_dashboard.controller.admin_button_hbox, self.main_dashboard.controller.buttons_hbox)

    def on_search_clicked(self, b):
        user_input_value = self.text_input.value.strip()

        if user_input_value == "":
            self.fun_fact_label.layout.display = 'none'
            self.error_label.value = (
                "<p style='color:red; font-weight:bold;'>"
                "Please enter a query first!"
                "</p>"
            )
            self.error_label.layout.display = 'inline'
        else:
            self.error_label.value = ""
            result_view = ResultView(self.main_dashboard, self.photo_html, user_input_value)
            self.main_dashboard.show_view(result_view)

    def on_fun_fact_clicked(self, b):
        aws_fun_facts = [
            "Amazon Web Services (AWS) was launched in 2006 with just three services: S3, EC2, and SQS.",
            "AWS is the world’s largest cloud provider, hosting over 30% of all cloud workloads.",
            "Netflix uses AWS to stream content to over 200 million subscribers worldwide.",
            "AWS S3 (Simple Storage Service) stores trillions of objects and processes millions of requests per second.",
            "The name of the Amazon Elastic Compute Cloud service, EC2, reflects its ability to provide 'elastic' scaling of compute capacity.",
            "AWS operates in over 30 geographic regions and has more than 100 Availability Zones worldwide.",
            "AWS Lambda allows you to run code without provisioning or managing servers, which popularized the term 'serverless computing.'",
            "AWS Snowmobile is a literal 18-wheeler truck designed to transfer up to 100 petabytes of data to AWS.",
            "Amazon DynamoDB is used for low-latency databases by companies like Lyft, Airbnb, and Samsung.",
            "AWS Ground Station enables users to connect and communicate with satellites."
        ] # array thats hold all the facts
        random_fact = random.choice(aws_fun_facts)
        self.fun_fact_label.value = f"<div style='text-align: center; font-weight: bold; font-size: 16px;'>{random_fact}</div>"
        self.error_label.layout.display = 'none'
        self.fun_fact_label.layout.display = 'inline'


#**QUERY RESULTS GUI**

In [None]:
class ResultView(BaseView):
    def __init__(self, main_dashboard, photo_html, query):
        # Create result label
        #requested_label = widgets.Label(f"Requested Query: {query}")
        requested_label = widgets.HTML(value=f"<span style='font-weight: bold; font-size: 16px;'>Requested Query: {query}</span>",
                                       layout=widgets.Layout(margin="10px", width="100%"))

        centered_label = widgets.HBox(
          [requested_label],
          layout=widgets.Layout(justify_content="center", align_items="center")
        )

        # Create table styling
        style_html = create_ResultView_page()

        # Process query to fetch results
        self.query_results = query_service.process_query(query)

        # Create table header
        header_html = widgets.HTML("""
            <div class='table-header'>
                Query Results
            </div>
        """)

        # Conditional handling for query results
        if not self.query_results:
        # Display "No Results Found" when no results exist
          rows_html = """
                      <div class='table-row' style="display: flex; justify-content: center; align-items: center; height: 100%;">
                        <div>No Results Found</div>
                      </div>
        """

        else:
            # Generate rows for actual query results
            rows_html = "".join([
                f"""
                <div class='table-row'>
                    <div class='left-cell'><a href="{result['url']}" target="_blank">{result['title']}</a></div>
                    <div class='right-cell'>Rank: [{result['rank']:.1f}]</div>
                </div>
                """
                for result in self.query_results
            ])

        # Combine all rows into a scrollable table
        table_html = widgets.HTML(f"""
            <div class='table-wrapper'>
                {header_html.value}
                <div class='table-scroll'>
                    {rows_html}
                </div>
            </div>
        """)

        # Create a return button
        return_button = widgets.Button(
            description="Return Back",
            layout=widgets.Layout(width='200px', margin='20px 0')
        )
        return_button.add_class('default-button')
        return_button.on_click(self.on_return_to_query)

        # Initialize the view with all components
        super().__init__(
            main_dashboard,
            photo_html,
            centered_label,
            style_html,
            table_html,
            return_button
        )

    def on_return_to_query(self, b):
        # Go back to the "EnterQueryView" or the main query page
        enter_query_view = EnterQueryView(self.main_dashboard, self.photo_html)
        self.main_dashboard.show_view(enter_query_view)


#**AWS Chatbot GUI**

In [None]:
# AWS Chatbot GUI
genai.configure(api_key="AIzaSyBHc-RLeXW4AY4c1e2XUtOg27-1D6Oj5SY")

model = genai.GenerativeModel("gemini-1.5-flash")

AWS_KEYWORDS = [
    "aws", "ec2", "s3", "eks", "lambda", "rds", "dynamodb", "cloudfront",
    "cloudformation", "elastic beanstalk", "vpc", "iam", "route 53",
    "cloudwatch", "autoscaling", "sns", "sqs", "glacier", "athena",
    "redshift", "elasticache", "kinesis", "api gateway", "waf",
    "kms", "secrets manager", "efs", "rekognition", "translate", "sagemaker"
]

def ask_aws_chatbot(question):
    if not any(keyword in question.lower() for keyword in AWS_KEYWORDS):
        return "This chatbot only answers questions about AWS and its services. Please ask an AWS-related question."

    try:
        prompt = f"Answer this AWS question in a simple and summarized way that any new aws user could use: {question}"
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"An error occurred: {e}"

class AWSChatbotView(BaseView):
    def __init__(self, main_dashboard, photo_html):
        self.chat_input = widgets.Text(
            placeholder="Ask me about AWS!",
            layout=widgets.Layout(width='400px')
        )
        self.chat_button = widgets.Button(description="Ask")
        self.chat_button.add_class('default-button')
        self.response_label = widgets.HTML(value="", layout=widgets.Layout(width="100%"))
        self.return_button = widgets.Button(description="Return to Main Page", layout=widgets.Layout(width='200px'))
        self.return_button.add_class('default-button')

        input_hbox = widgets.HBox([self.chat_input, self.chat_button], layout=widgets.Layout(justify_content="center"))
        return_hbox = widgets.HBox([self.return_button], layout=widgets.Layout(justify_content="center"))

        self.chat_button.on_click(self.on_chat_button_clicked)
        self.return_button.on_click(self.on_return_clicked)

        super().__init__(
            main_dashboard,
            photo_html,
            widgets.HTML("<span style='font-size: 20px; font-weight: bold;'>AWS Chatbot</span>", layout=widgets.Layout(margin="10px")),
            input_hbox,
            self.response_label,
            return_hbox
        )


    def on_chat_button_clicked(self, b):
        user_input = self.chat_input.value.strip()
        if not user_input:
            self.response_label.value = "<p style='color: red;'>Please enter a question.</p>"
            return
        response = ask_aws_chatbot(user_input)
        self.response_label.value = f"<div style='padding: 10px; background: #f8f9fa;'>{response}</div>"

    def on_return_clicked(self, b):
        self.main_dashboard.show_main_buttons(
            self.main_dashboard.controller.admin_button_hbox,
            self.main_dashboard.controller.buttons_hbox
        )


#**STATISTICS GUI**

In [None]:
class StatisticsView(BaseView):
    def __init__(self, main_dashboard, photo_html):
        self.main_dashboard = main_dashboard
        self.photo_html = photo_html
        # Single Title Label
        stats_css_widget = create_statistics_chart_page() # CSS to center tab text and hide horizontal scrollbar
        label = widgets.HTML(
            value = '<b style = "font-size: 20px;">Index Statistics</b>',
            layout=Layout(margin="10px", align_self="center")
        )
        # Create tab contents
        self.most_common_box = self.create_most_common_tab()
        self.least_common_box = self.create_least_common_tab()
        self.random_box = self.create_random_tab()
        self.most_common_docs_box = self.create_most_common_docs_tab()

        # Create tabs
        self.tab = widgets.Tab(
            layout=Layout(
                width='100%',
                display='flex',
                flex_flow='column',
                overflow='visible'
            )
        )

        self.tab.children = [self.random_box, self.most_common_box, self.least_common_box, self.most_common_docs_box]

        # Set tab titles with adjusted layout
        tab_titles = [ 'Random Words', 'Most Common Words', 'Least Common Words', 'Most Common Documents']
        for i, title in enumerate(tab_titles):
            self.tab.set_title(i, title)

        # Single Return Button
        return_button = widgets.Button(description="Return to Main Page", layout=Layout(width="170px"))
        return_button.add_class('default-button')
        return_button.on_click(self.on_return)

        # Main Layout
        self.main_layout = VBox(
            [label, self.tab, return_button],
            layout=Layout(padding="10px", align_items="center", width="100%")
        )

        super().__init__(main_dashboard, photo_html, stats_css_widget, self.main_layout)

    def create_most_common_tab(self):
        """Creates the most common words tab content"""
        most_common_chart = self.generate_most_common_chart()
        self.most_common_chart_widget = HTML(most_common_chart)
        return VBox([
            self.most_common_chart_widget
        ], layout=Layout(width="100%", height="auto", align_items='center'))

    def create_least_common_tab(self):
        """Creates the least common words tab content"""
        # Create the explanation label
        explanation = widgets.HTML(
            value="""
            <div style='text-align: center; margin: 10px 0; color: #666; font-size: 13px;'>
                *The chart displays five of the least common words.<br>
                Each bar shows a unique minimal count, and each word is randomly chosen from those with the same minimal count.
            </div>
            """,
            layout=Layout(width='100%')
        )

        # Generate the chart
        least_common_chart = self.generate_least_common_chart()
        self.least_common_chart_widget = HTML(least_common_chart)

        # Return VBox with explanation and chart
        return VBox(
            [self.least_common_chart_widget, explanation],
            layout=Layout(width="100%", height="auto", align_items='center')
        )

    def create_random_tab(self):
        """Creates the random words tab content including the refresh button"""
        random_chart = self.generate_random_chart()
        self.random_chart_widget = HTML(random_chart)

        refresh_button = widgets.Button(
            description="Random Again",
            layout=widgets.Layout(width="150px", margin="10px 0", border="1px solid #ccc")
        )
        refresh_button.add_class('default-button')
        refresh_button.on_click(self.refresh_random_chart)

        return VBox([
            self.random_chart_widget,
            refresh_button
        ], layout=Layout(width="100%",align_items="center", height="auto"))

    def create_most_common_docs_tab(self):
        """Creates the document statistics tab content"""
        # Generate the pie chart HTML
        doc_stats_chart = self.generate_common_docs_chart()
        self.doc_stats_chart_widget = widgets.HTML(doc_stats_chart)

        # Return a VBox with the chart and the refresh button
        return VBox(
            [self.doc_stats_chart_widget],
            layout=Layout(width="100%", align_items="center", height="auto")
        )


    def generate_most_common_chart(self):
        """Generates a most common words chart"""
        most_common_words = statistics_service.get_most_common_words(5) or []
        return statistics_service.generate_bar_chart(
            [[count for _, count in most_common_words]],
            [word for word, _ in most_common_words],
            title="Frequency of Top 5 Words in Different Links"
        ) if most_common_words else "<p>No data available for Most Common Words.</p>"

    def generate_least_common_chart(self):
        """Generates a least common words chart"""
        least_common_words = statistics_service.get_least_common_words(5) or []
        return statistics_service.generate_bar_chart(
            [[count for _, count in least_common_words]],
            [word for word, _ in least_common_words],
            title="Frequency of Least Common Words in Different Links"
        ) if least_common_words else "<p>No data available for Least Common Words.</p>"

    def generate_random_chart(self):
        """Generates a random words chart"""
        random_words = statistics_service.get_random_words(5) or []
        return statistics_service.generate_bar_chart(
            [[count for _, count in random_words]],
            [word for word, _ in random_words],
            title="Frequency of 5 Random Words in Different Links"
        ) if random_words else "<p>No data available for Random Words.</p>"

    def generate_common_docs_chart(self):
        """Generates a document statistics pie chart"""
        # Get the top 5 most common documents
        common_docs = statistics_service.get_common_docs(5) or []

        if not common_docs:
            return "<p>No data available for Document Statistics.</p>"

        # Prepare data for the pie chart
        data = [doc['count'] for doc in common_docs]  # Use counts for data
        labels = [doc['doc_id'] for doc in common_docs]  # Use doc_id for labels on the chart
        legend_labels = [f"{doc['title']} ({doc['doc_id']})" for doc in common_docs]  # Legend with title and doc_id

        # Generate the pie chart
        return statistics_service.generate_pie_chart(
            data=data,
            labels=labels,
            legend_title="Documents Titles",
            legend_labels=legend_labels,
            title="Most Referenced Documents in Index Terms"
        )

    def refresh_random_chart(self, b):
        """Callback for refresh button click"""
        new_chart = self.generate_random_chart()
        self.random_chart_widget.value = new_chart

    def on_return(self, b):
        self.main_dashboard.show_main_buttons(self.main_dashboard.controller.admin_button_hbox, self.main_dashboard.controller.buttons_hbox)

#**CONTROLLERS**

In [None]:
# ---------------------------
# Dashboard Controller
# ---------------------------
class DashboardController:
    def __init__(self, main_dashboard, admin_button_hbox, buttons_hbox, photo_html):
        self.main_dashboard = main_dashboard
        self.admin_button_hbox = admin_button_hbox
        self.buttons_hbox = buttons_hbox
        self.photo_html = photo_html
        self.admin_box = None
        # Assign callbacks
        self.setup_callbacks()

    def setup_callbacks(self):
        # Bind button clicks to handler methods
        self.main_dashboard.controller = self  # To access buttons_hbox from views
        self.admin_button_hbox.children[0].on_click(self.handle_admin_page)
        self.buttons_hbox.children[0].on_click(self.handle_enter_query)
        self.buttons_hbox.children[1].on_click(self.handle_statistics)
        self.buttons_hbox.children[2].on_click(self.handle_aws_chatbot)

    def handle_admin_page(self, b):
        if self.admin_box is not None:
            self.admin_box.close()
        # Create password input box
        password_input = widgets.Text(placeholder='Enter Password', layout=widgets.Layout(width='180px'))
        enter_button = widgets.Button(description='Enter')
        close_button = widgets.Button(description='Close')
        output = widgets.Output()

        # Create a layout for the password box
        box = widgets.VBox([output, password_input, enter_button, close_button], layout=widgets.Layout(
            top='400px',
            right='400',
            display='flex',
            justify_content='center',
            align_items='center',
            width='auto',
            height='115px',
            background_color='transparent'
       ))
        self.admin_box = box

        # Handle button clicks
        def on_enter_button_clicked(b):
            with output:
                clear_output()
                time.sleep(0.2)
                if password_input.value == '123456':  # Replace with your actual password
                    # Close the password box
                    box.close()
                    self.admin_box = None
                    # Proceed to admin page
                    term_list = administration_service.fetch_terms()
                    admin_page_view = AdminPageView(self.main_dashboard, self.photo_html, term_list)
                    self.main_dashboard.show_view(admin_page_view)
                else:
                    # Incorrect password
                    display(HTML("<p style='color:red;'>Incorrect Password. Please try again.</p>"))

        def on_close_button_clicked(b):
            # Close the password box
            box.close()
            self.admin_box = None

        enter_button.on_click(on_enter_button_clicked)
        close_button.on_click(on_close_button_clicked)

        # Display the password box
        display(box)
    def handle_enter_query(self, b):
        if self.admin_box is not None:
            self.admin_box.close()
        enter_query_view = EnterQueryView(self.main_dashboard, self.photo_html)
        self.main_dashboard.show_view(enter_query_view)

    def handle_statistics(self, b):
        if self.admin_box is not None:
            self.admin_box.close()
        statistics_view = StatisticsView(self.main_dashboard, self.photo_html)
        self.main_dashboard.show_view(statistics_view)

    def handle_aws_chatbot(self, b):
        if self.admin_box is not None:
            self.admin_box.close()
        aws_chatbot_view = AWSChatbotView(self.main_dashboard, self.photo_html)
        self.main_dashboard.show_view(aws_chatbot_view)

In [None]:
# ---------------------------
# Dashboard Logic
# ---------------------------
def display_main_dashboard():
    clear_output()

    # Create components
    header = create_header()
    blur_div = create_blur_divider()
    admin_button_hbox, buttons_hbox = create_navigation_buttons()
    photo_html = create_photo_container()
    main_dashboard = MainDashboardView(photo_html)

    # Initialize main dashboard with buttons
    main_dashboard.show_main_buttons(admin_button_hbox, buttons_hbox)

    # Initialize controller
    controller = DashboardController(main_dashboard, admin_button_hbox, buttons_hbox, photo_html)

    # Attach controller to main_dashboard for access in views
    main_dashboard.controller = controller

    # Stack the header, blur line, and main dashboard
    page_layout = widgets.VBox([header, blur_div, main_dashboard.get_widget()])
    display(page_layout)

    # Inject CSS
    inject_css()

#**MAIN**

In [None]:
# ---------------------------------------------------------
# Execute: Database Connectivity, Microservices, Dashboard
# ---------------------------------------------------------
FBconn = firebase.FirebaseApplication("https://kakadoo-db-default-rtdb.europe-west1.firebasedatabase.app/", None)
crawler_service = CrawlerService()
index_creator_service = IndexCreatorService(crawler_service)
data_fetcher_service = DataFetcherService(FBconn)
administration_service = AdministrationService(FBconn, index_creator_service, data_fetcher_service)
query_service = QueryService(data_fetcher_service)
statistics_service = StatisticService(data_fetcher_service)
display_main_dashboard()

##################
# ADMIN PASSWORD:#
#    123456      #
##################

VBox(children=(HTML(value='\n        <div style="\n            background-color: #FFBE5F;\n            padding…

HTML(value='\n    <style>\n      @import url(\'https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&di…