<a href="https://colab.research.google.com/github/muffindonor/cloud_computing_project/blob/main/UpdateHereStatSelector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# התקנת ספריית Firebase Admin
!pip install firebase

# התקנת חבילות נדרשות במידת הצורך
!pip install firebase beautifulsoup4



In [20]:
import requests
from bs4 import BeautifulSoup
import re
from collections import defaultdict
from urllib.parse import urljoin
from firebase import firebase
import nltk

nltk.download('stopwords')
from nltk.corpus import stopwords


class SearchEngine:
    def __init__(self):
        """Initialize the search engine"""
        self.pages = []
        self.word_locations = defaultdict(list)  # word -> [(page_id, frequency), ...]
        self.stop_words = set(stopwords.words('english'))
        self.firebase_conn = firebase.FirebaseApplication('https://cloudproject-27420-default-rtdb.firebaseio.com/', None)

    def is_valid_term(self, term):
        """
        Validate if a term should be included in the index based on specified rules.

        Rules:
        1. Term must be longer than 2 characters
        2. Term must not contain numbers
        3. Term must only contain English letters
        4. Term must not contain underscores or special characters
        """
        # Check if term is too short (rule 1)
        if len(term) <= 2:
            return False

        # Check if term contains only English letters (rules 2, 3, and 4)
        if not re.match(r'^[a-zA-Z]+$', term):
            return False

        return True

    def fetch_pages(self, base_url, num_pages=5):
        """Fetch pages from the provided base URL"""
        try:
            response = requests.get(base_url)
            soup = BeautifulSoup(response.text, 'html.parser')

            links = []
            for a_tag in soup.find_all('a', href=True):
                full_url = urljoin(base_url, a_tag['href'])
                if full_url.startswith(base_url):
                    links.append(full_url)

            links = list(set(links))[:num_pages]
            print(f"Found {len(links)} links. Processing the first {num_pages}.")

            for link in links:
                try:
                    page_response = requests.get(link)
                    page_soup = BeautifulSoup(page_response.text, 'html.parser')

                    # Extract text from all visible elements
                    for script in page_soup(["script", "style", "meta", "link"]):
                        script.decompose()

                    page_text = page_soup.get_text(" ", strip=True)

                    self.pages.append({
                        'id': link,
                        'title': page_soup.title.string if page_soup.title else 'No Title',
                        'url': link,
                        'content': page_text
                    })
                    print(f"Retrieved: {link}")
                except Exception as e:
                    print(f"Error processing {link}: {str(e)}")
                    continue

            return True

        except Exception as e:
            print(f"Error fetching pages: {str(e)}")
            return False

    def merge_index_data(self, existing_data, new_data):
        """Merge existing index data with new data"""
        merged = defaultdict(list)

        # Convert existing data to dictionary for easier lookup
        if existing_data:
            for entry in existing_data:
                # Only merge terms that pass validation
                if self.is_valid_term(entry['Term']):
                    merged[entry['Term']] = entry['DocIDs']

        # Merge new data
        for term, locations in new_data.items():
            # Only merge terms that pass validation
            if self.is_valid_term(term):
                existing_locations = merged[term]
                # Check for duplicate DocIDs
                existing_doc_ids = {loc['DocID'] for loc in existing_locations}

                # Add only new locations
                for new_loc in locations:
                    if new_loc['DocID'] not in existing_doc_ids:
                        existing_locations.append(new_loc)
                        existing_doc_ids.add(new_loc['DocID'])

                merged[term] = existing_locations

        return [{'Term': term, 'DocIDs': locations} for term, locations in merged.items()]

    def build_index(self):
        """Build or update the word location index"""
        # Clear local index before building new one
        self.word_locations.clear()

        for page in self.pages:
            content = page['content']
            word_counts = defaultdict(int)

            # Process content for each word
            for word in re.findall(r'\b\w+\b', content):
                word_lower = word.lower()
                if word_lower not in self.stop_words and self.is_valid_term(word_lower):
                    word_counts[word_lower] += 1

            # Process compound words
            for compound in re.finditer(r'\b[A-Z][a-zA-Z]*[A-Z][a-zA-Z]*\b', content):
                word = compound.group()
                parts = re.findall(r'[A-Z][a-z]*', word)
                for part in parts:
                    part_lower = part.lower()
                    if part_lower not in self.stop_words and self.is_valid_term(part_lower):
                        word_counts[part_lower] += 1

            # Add counts to index
            for word, count in word_counts.items():
                self.word_locations[word].append({
                    'DocID': page['id'],
                    'Title': page['title'],
                    'Frequency': count
                })

        print("Local index built successfully!")

        existing_index = self.firebase_conn.get('/', 'word_index')
        new_index_data = {term: locations for term, locations in self.word_locations.items()}
        merged_index = self.merge_index_data(existing_index, new_index_data)
        self.firebase_conn.put('/', 'word_index', merged_index)
        print("Index updated in Firebase successfully!")

def search(self, query, num_results=5, search_type='AND'):
    """
    Search for a query in the indexed data
    For multi-word queries, returns results based on search_type (AND/OR logic)
    """
    query_words = [word.lower() for word in re.findall(r'\w+', query)
                  if word.lower() not in self.stop_words and self.is_valid_term(word.lower())]
    if not query_words:
        return []

    # Dictionary to store documents that match query words
    page_scores = defaultdict(lambda: {'matches': 0, 'total_freq': 0, 'words_matched': set()})

    # Process each query word
    for query_word in query_words:
        for entry in self.word_locations.get(query_word, []):
            page_id = entry['DocID']
            page_scores[page_id]['matches'] += 1
            page_scores[page_id]['total_freq'] += entry['Frequency']
            page_scores[page_id]['words_matched'].add(query_word)

    # Filter results based on search type
    if search_type == 'AND':
        # Only include documents that match all query words
        matching_pages = {
            page_id: scores
            for page_id, scores in page_scores.items()
            if len(scores['words_matched']) == len(query_words)
        }
    else:  # OR search
        # Include all documents that match any query word
        matching_pages = page_scores

    # Rank the results
    ranked_results = sorted(
        [(page_id, scores['matches'], scores['total_freq'])
         for page_id, scores in matching_pages.items()],
        key=lambda x: (x[1], x[2]),
        reverse=True
    )

    results = []
    for page_id, matches, total_freq in ranked_results[:num_results]:
        page = next((p for p in self.pages if p['id'] == page_id), None)
        if page:
            results.append({
                'title': page['title'],
                'url': page['url'],
                'matching_words': matches,
                'total_frequency': total_freq
            })
    return results

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [21]:
import math
from collections import Counter

class RelevanceScorer:
    def __init__(self):
        self.total_documents = 0
        self.term_document_frequency = {}  # How many documents contain each term

    def update_statistics(self, index_data):
        """Update document statistics from the index data"""
        # Check if index_data is valid
        if not index_data or not isinstance(index_data, list):
            self.total_documents = 0
            self.term_document_frequency = {}
            return

        # Count total unique documents
        all_docs = set()
        term_docs = {}

        for entry in index_data:
            # Ensure each entry is a valid dictionary with 'DocIDs'
            if isinstance(entry, dict) and 'DocIDs' in entry and isinstance(entry['DocIDs'], list):
                term = entry.get('Term', 'Unknown')
                # Count in how many documents each term appears
                docs_with_term = set(doc.get('DocID') for doc in entry['DocIDs'] if isinstance(doc, dict) and 'DocID' in doc)
                term_docs[term] = len(docs_with_term)
                all_docs.update(docs_with_term)

        self.total_documents = len(all_docs)
        self.term_document_frequency = term_docs


    def calculate_idf(self, term):
        """Calculate Inverse Document Frequency for a term"""
        docs_with_term = self.term_document_frequency.get(term, 0)
        if docs_with_term == 0:
            return 0
        return math.log(1 + self.total_documents / (1 + docs_with_term))


    def calculate_tf(self, term_freq, max_freq):
        """Calculate normalized Term Frequency"""
        return 0.5 + 0.5 * (term_freq / max_freq)

    def score_document(self, doc_frequencies, query_terms):
        """
        Calculate the relevance score for a document
        Parameters:
        - doc_frequencies: Dict mapping terms to their frequencies in this document
        - query_terms: List of terms in the query
        """
        if not doc_frequencies:
            return 0

        # Find maximum frequency in this document for normalization
        max_freq = max(doc_frequencies.values())

        # Calculate score as sum of TF-IDF for each query term
        score = 0
        for term in query_terms:
            if term in doc_frequencies:
                tf = self.calculate_tf(doc_frequencies[term], max_freq)
                idf = self.calculate_idf(term)
                score += tf * idf

        # Normalize by number of terms
        return score / len(query_terms)

    def format_score(self, score):
        """Format the score for display"""
        return f"{score:.2f}"

In [22]:
# Logic for search and result screen
from firebase import firebase
import json

class SearchLogic:
    def __init__(self, firebase_url):
        self.firebase_conn = firebase.FirebaseApplication(firebase_url, None)
        self.index_data = []  # Initialize as empty list
        self.refresh_index()  # Load initial data
        self.scorer = RelevanceScorer()
        self.scorer.update_statistics(self.index_data)

    def refresh_index(self):
        """Refresh index data from Firebase and ensure valid structure"""
        try:
            index = self.firebase_conn.get('/word_index', None)
            if isinstance(index, dict):
                # Convert dictionary to list of entries if needed
                self.index_data = [{'Term': term, **data} for term, data in index.items()]
            elif isinstance(index, list):
                self.index_data = index
            else:
                self.index_data = []

            # Filter out None values and ensure valid structure
            self.index_data = [entry for entry in self.index_data if entry is not None]

            # Ensure each entry has required fields
            for entry in self.index_data:
                if 'DocIDs' not in entry:
                    entry['DocIDs'] = []
                if 'Term' not in entry:
                    entry['Term'] = 'Unknown'

        except Exception as e:
            print(f"Error fetching index from Firebase: {e}")
            self.index_data = []

    def fetch_index(self):
        """Return a copy of the current index data"""
        return self.index_data.copy() if self.index_data else []

    def delete_index(self, term):
        try:
            # Find the index to delete
            for numerical_entry, entry in enumerate(self.index_data):
                if entry['Term'] == term:
                    # Delete from Firebase
                    self.firebase_conn.delete('/word_index', str(numerical_entry))

                    # Refresh local data
                    self.refresh_index()

                    # Update scorer statistics
                    self.scorer.update_statistics(self.index_data)


                    return True

            print(f"Index '{term}' not found.")
            return False

        except Exception as e:
            print(f"Error deleting index '{term}': {e}")
            return False

    def search_query(self, query, search_type='AND'):
        if not self.index_data:
            print("Index data is empty. No results to search.")
            return []

        query_words = [word.lower() for word in query.split()]
        if not query_words:
            return []

        # Dictionary to store document frequencies for each word
        doc_frequencies = {}

        # Process each query word
        for term in query_words:
            for entry in self.index_data:
                if entry.get('Term') == term and isinstance(entry.get('DocIDs'), list):
                    for occurrence in entry['DocIDs']:
                        if not isinstance(occurrence, dict):
                            continue

                        doc_id = occurrence.get('DocID')
                        if not doc_id:
                            continue

                        if doc_id not in doc_frequencies:
                            doc_frequencies[doc_id] = {
                                'title': occurrence.get('Title', 'No Title'),
                                'frequency': occurrence.get('Frequency', 0),
                                'term_frequencies': {},
                                'matched_words': set()
                            }

                        doc_frequencies[doc_id]['term_frequencies'][term] = occurrence.get('Frequency', 0)
                        doc_frequencies[doc_id]['matched_words'].add(term)

        # Filter and prepare results
        results = []
        for doc_id, data in doc_frequencies.items():
            if search_type == 'AND' and len(data['matched_words']) != len(query_words):
                continue

            score = self.scorer.score_document(data['term_frequencies'], query_words)
            results.append({
                'page_id': doc_id,
                'title': data['title'],
                'frequency': data['frequency'],
                'ranking': self.scorer.format_score(score)
            })

        # Sort results by ranking score
        results.sort(key=lambda x: float(x['ranking']), reverse=True)
        return results

    def fetch_top_indexes(self):
        try:
            if not self.index_data:
                return json.dumps({"indexes": [], "frequency": []})

            # Create term frequencies list from index data
            term_frequencies = []
            for entry in self.index_data:
                if isinstance(entry, dict) and 'DocIDs' in entry and 'Term' in entry:
                    total_frequency = sum(doc.get('Frequency', 0) for doc in entry['DocIDs'] if isinstance(doc, dict))
                    term_frequencies.append({'term': entry['Term'], 'frequency': total_frequency})

            # Sort by frequency and get top 10
            top_terms = sorted(term_frequencies, key=lambda x: x['frequency'], reverse=True)[:10]

            # Separate into two lists for the chart
            indexes = [term['term'] for term in top_terms]
            frequencies = [term['frequency'] for term in top_terms]

            return json.dumps({"indexes": indexes, "frequency": frequencies})

        except Exception as e:
            print(f"Error fetching top indexes: {e}")
            return json.dumps({"indexes": [], "frequency": []})

    ########## ADD THIS #############
    def fetch_top_index_URL(self):
        try:
            if not self.index_data:
                return json.dumps({"indexes": [], "frequency": []})

            # Create list of terms with their URL counts
            term_url_counts = []
            for entry in self.index_data:
                if isinstance(entry, dict) and 'DocIDs' in entry and 'Term' in entry:
                    # Count the number of URLs (DocIDs) for each term
                    url_count = len(entry['DocIDs'])
                    term_url_counts.append({'term': entry['Term'], 'count': url_count})

            # Sort by URL count and get top 10
            top_terms = sorted(term_url_counts, key=lambda x: x['count'], reverse=True)[:10]

            # Separate into two lists for the chart
            indexes = [term['term'] for term in top_terms]
            frequencies = [term['count'] for term in top_terms]

            return json.dumps({"indexes": indexes, "frequency": frequencies})

        except Exception as e:
            print(f"Error fetching top indexes: {e}")
            return json.dumps({"indexes": [], "frequency": []})

In [23]:
# Style for search and result screen
import ipywidgets as widgets
from IPython.display import display, clear_output
import json
import os
from PIL import Image
import io
import base64

class SearchUI:
    def __init__(self, logic):
        self.logic = logic
        self.current_index = None
        self.initial_docid = []
        self.image_path = None
        self.encoded_image = None
        self.last_search_type = 'AND'  # Added to store the last search type
        self.load_logo_from_github()



    def load_logo_from_github(self):
        try:
            # URL to raw content of the image in GitHub
            github_raw_url = "https://raw.githubusercontent.com/muffindonor/cloud_computing_project/main/badgerSearch.png"

            # Fetch the image from GitHub
            response = requests.get(github_raw_url)
            if response.status_code == 200:
                # Open the image from binary content
                img = Image.open(io.BytesIO(response.content))

                # Convert to RGBA if needed
                if img.mode != 'RGBA':
                    img = img.convert('RGBA')

                # Convert to base64
                buffer = io.BytesIO()
                img.save(buffer, format='PNG')
                self.encoded_image = base64.b64encode(buffer.getvalue()).decode()
                print("Logo loaded successfully from GitHub")
            else:
                print(f"Failed to fetch image from GitHub. Status code: {response.status_code}")
                self.encoded_image = None

        except Exception as e:
            print(f"Error loading logo from GitHub: {str(e)}")
            self.encoded_image = None

    def show_search_screen(self):
        clear_output()

        background_style = '''
        <style>
            body {
                background-color: #1a202c !important;
                min-height: 100vh;
                margin: 0;
                padding: 20px;
                box-sizing: border-box;
            }
            .widget-button button {
                background: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
                border: none !important;
                color: white !important;
                border-radius: 20px !important;
                padding: 8px 25px !important;
                font-weight: bold !important;
                transition: transform 0.2s !important;
                box-shadow: none !important;
                margin-top: 30px !important;
            }
            .widget-button button:hover {
                transform: scale(1.05) !important;
            }
            .widget-text input {
                background-color: #2d3748 !important;
                border: none !important;
                color: white !important;
                border-radius: 9999px !important;
                padding: 15px 25px !important;
                width: 100% !important;
                margin-bottom: 10px !important;
            }
            .widget-text input::placeholder {
                color: #718096 !important;
            }
            .admin-button {
                background-color: #3b82f6 !important;
                color: white !important;
                padding: 8px 16px !important;
                border-radius: 6px !important;
                border: none !important;
                cursor: pointer !important;
                position: absolute !important;
                right: 20px !important;
                top: 20px !important;
            }
            .widget-dropdown {
                margin-bottom: 50px !important;
            }
            .widget-dropdown select {
                background: #2d3748 !important;
                border: none !important;
                color: white !important;
                border-radius: 20px !important;
                padding: 8px 15px !important;
                margin: 10px 0 !important;
                background-image: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
                background-size: 2px 100% !important;
                background-repeat: no-repeat !important;
                background-position: right !important;
                 z-index: 1000 !important;  /* Ensure dropdown appears above other elements */
            }
            .search-container {
                display: flex !important;
                flex-direction: column !important;
                align-items: center !important;
                gap: 15px !important;
            }
        </style>
        '''



        # Create logo HTML with GitHub image
        if self.encoded_image:
            logo_html = f'''
            <div style="text-align: center; margin: 40px auto;">
                <div style="width: 300px; height: 300px; margin: 0 auto; position: relative; background: transparent;">
                    <img src="data:image/png;base64,{self.encoded_image}"
                         style="width: 300px; height: 300px; border-radius: 0%; object-fit: cover; position: relative; z-index: 1;">
                </div>
            </div>
            '''
        else:
            logo_html = '''
            <div style="text-align: center; margin: 40px auto;">
                <div style="width: 200px; height: 200px; margin: 0 auto; position: relative;">
                    <div style="width: 200px; height: 200px; border-radius: 50%; background: white;
                              display: flex; align-items: center; justify-content: center; position: relative; z-index: 1;">
                        <span style="color: #718096; font-size: 16px;">Logo</span>
                    </div>
                    <div style="position: absolute; top: -4px; left: -4px; right: -4px; bottom: -4px;
                              background: linear-gradient(90deg, #ef4444 0%, #f59e0b 50%, #ef4444 100%);
                              border-radius: 50%; z-index: 0;"></div>
                </div>
            </div>
            '''

        # Admin button
        admin_button = widgets.Button(
            description="Admin Page",
            button_style="warning",
            layout=widgets.Layout(width="auto")
        )

        # Stats button
        stats_button = widgets.Button(
            description="Statistics Page",
            button_style="warning",
            layout=widgets.Layout(width="auto")
        )

        # Search bar and button
        search_bar = widgets.Text(
            placeholder="Search...",
            layout=widgets.Layout(
                width="400px",
                margin="20px 0 10px 0"
            )
        )

        # Search type dropdown
        search_type = widgets.Dropdown(
            options=[('AND', 'AND'), ('OR', 'OR')],
            value=self.last_search_type,  # Use the stored value
            description='Search Type:',
            layout=widgets.Layout(
                width="200px",
                margin="0 0 50px 0"
            )
        )

        search_button = widgets.Button(
            description="Badger Search",
            button_style="warning",
            layout=widgets.Layout(
                width="200px",
                margin="30px 0 0 0"
            )
        )

        # Define handlers
        def on_admin_clicked(b):
            self.show_password_prompt()

        def on_stats_clicked(b):
            self.show_stats_page()

        def on_search_clicked(b):
            query = search_bar.value.strip()
            if query:
                # Store the current search type before performing search
                self.last_search_type = search_type.value
                results = self.logic.search_query(query, search_type.value)
                self.show_results_screen(query, results)
            else:
                output = widgets.HTML("<div style='color: #ef4444; text-align: center; margin-top: 10px;'>Please enter a valid query!</div>")
                display(output)

        # Attach handlers
        admin_button.on_click(on_admin_clicked)
        stats_button.on_click(on_stats_clicked)
        search_button.on_click(on_search_clicked)

        # Display elements
        display(widgets.HTML(background_style))
        display(admin_button)
        display(stats_button)
        display(widgets.HTML(logo_html))

        search_container = widgets.VBox([
            search_bar,
            widgets.VBox([
                search_type,
                widgets.Box(layout=widgets.Layout(height="10px")),  # Spacer
                search_button
            ], layout=widgets.Layout(
                display='flex',
                flex_flow='column',
                align_items='center',
                width='100%',
                max_width='700px',
                margin='0 auto'
            ))
        ], layout=widgets.Layout(
            display='flex',
            flex_flow='column',
            align_items='center',
            width='100%',
            max_width='700px',
            margin='0 auto',
            padding='10px 0'
        ))

        display(search_container)


    def show_password_prompt(self):
        # Create a pop-up modal for password input
        password_input = widgets.Password(
            placeholder="Enter Admin's password",
            layout=widgets.Layout(width="250px")
        )

        submit_button = widgets.Button(
            description="Submit",
            button_style="warning",
            layout=widgets.Layout(width="auto")
        )

        status_message = widgets.HTML('')

        def on_submit_clicked(b):
            password = password_input.value.strip()
            if password == "123456":  # Replace with actual password logic
                self.show_admin_page()  # Pass the stored query
            else:
                status_message.value = "<div style='color: red;'>Incorrect password!</div>"

        submit_button.on_click(on_submit_clicked)

        # Modal container
        modal = widgets.VBox([
            widgets.HTML('<h3 style="color: white; text-align: center;">Password Required</h3>'),
            password_input,
            submit_button,
            status_message
        ], layout=widgets.Layout(
            background_color="#2d3748",
            padding="20px",
            width="300px",
            margin="auto",
            border_radius="10px",
            box_shadow="0 0 10px rgba(0, 0, 0, 0.3)"
        ))

        # Display the modal
        display(modal)


############## CHANGES IN THIS METHOD - COPY FROM HERE TO /SCRIPT #############
    def show_stats_page(self):
        clear_output()
        # Get both JSON strings from the logic instance
        frequency_json = self.logic.fetch_top_indexes()
        url_json = self.logic.fetch_top_index_URL()

        # Parse both JSON strings into Python dictionaries
        frequency_data = json.loads(frequency_json)
        url_data = json.loads(url_json)

        # Extract both sets of lists
        frequency_indexes = frequency_data['indexes']
        frequency_values = frequency_data['frequency']

        url_indexes = url_data['indexes']
        url_values = url_data['frequency']

        background_style ='''
        <style>
            body {
                background-color: #1a202c !important;
                min-height: 100vh;
                margin: 0;
                padding: 20px;
                box-sizing: border-box;
            }
            .widget-button button {
                background: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
                border: none !important;
                color: white !important;
                border-radius: 20px !important;
                padding: 8px 25px !important;
                font-weight: bold !important;
            }
            .dropdown select {
                background-color: #2d3748 !important;
                color: white !important;
                padding: 10px !important;
                border-radius: 10px !important;
                width: 100% !important;
                margin-bottom: 20px !important;
            }
        </style>
        '''
        chart_html = f'''
         <div class="chart" style="max-width: 600px; margin: 0 auto;">
          <div style="text-align: center; margin-bottom: 20px;">
            <select id="dataSelector" style="padding: 5px 20px; margin-bottom: 20px;" onchange="switchData(this.value)">
              <option value="frequency">Top 10 Terms Frequencies</option>
              <option value="urls">Top 10 Terms with most URLs</option>
            </select>
            <div class="chart_types">
              <button style="padding: 5px 20px;" onclick="setChartType('bar')">Bars</button>
              <button style="padding: 5px 20px;" onclick="setChartType('line')">Line</button>
              <button style="padding: 5px 20px;" onclick="setChartType('doughnut')">Doughnut</button>
            </div>
          </div>
            <canvas id="myChart" width="400" height="300"></canvas>
        </div>
        <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
        <script>
            let chartInstance = null;
            let currentType = 'bar';

            const chartData = {{
                frequency: {{
                    labels: {json.dumps(frequency_indexes)},
                    datasets: [{{
                        label: 'Frequency of Terms',
                        data: {json.dumps(frequency_values)},
                        backgroundColor: [
                            'rgba(255, 99, 132, 0.2)',
                            'rgba(54, 162, 235, 0.2)',
                            'rgba(255, 206, 86, 0.2)',
                            'rgba(75, 192, 192, 0.2)',
                            'rgba(153, 102, 255, 0.2)',
                            'rgba(255, 159, 64, 0.2)',
                            'rgba(201, 203, 207, 0.2)',
                            'rgba(100, 150, 200, 0.2)',
                            'rgba(80, 200, 120, 0.2)',
                            'rgba(200, 80, 80, 0.2)'
                        ],
                        borderColor: [
                            'rgba(255, 99, 132, 1)',
                            'rgba(54, 162, 235, 1)',
                            'rgba(255, 206, 86, 1)',
                            'rgba(75, 192, 192, 1)',
                            'rgba(153, 102, 255, 1)',
                            'rgba(255, 159, 64, 1)',
                            'rgba(201, 203, 207, 1)',
                            'rgba(100, 150, 200, 1)',
                            'rgba(80, 200, 120, 1)',
                            'rgba(200, 80, 80, 1)'
                        ],
                        borderWidth: 1
                    }}]
                }},
                urls: {{
                    labels: {json.dumps(url_indexes)},
                    datasets: [{{
                        label: 'Top Terms by URL Count',
                        data: {json.dumps(url_values)},
                        backgroundColor: [
                            'rgba(255, 99, 132, 0.2)',
                            'rgba(54, 162, 235, 0.2)',
                            'rgba(255, 206, 86, 0.2)',
                            'rgba(75, 192, 192, 0.2)',
                            'rgba(153, 102, 255, 0.2)',
                            'rgba(255, 159, 64, 0.2)',
                            'rgba(201, 203, 207, 0.2)',
                            'rgba(100, 150, 200, 0.2)',
                            'rgba(80, 200, 120, 0.2)',
                            'rgba(200, 80, 80, 0.2)'
                        ],
                        borderColor: [
                            'rgba(255, 99, 132, 1)',
                            'rgba(54, 162, 235, 1)',
                            'rgba(255, 206, 86, 1)',
                            'rgba(75, 192, 192, 1)',
                            'rgba(153, 102, 255, 1)',
                            'rgba(255, 159, 64, 1)',
                            'rgba(201, 203, 207, 1)',
                            'rgba(100, 150, 200, 1)',
                            'rgba(80, 200, 120, 1)',
                            'rgba(200, 80, 80, 1)'
                        ],
                        borderWidth: 1
                    }}]
                }}
            }};

            function createChart(type, dataType) {{
                const ctx = document.getElementById('myChart');
                const data = chartData[dataType];

                // If a chart exists, destroy it
                if (chartInstance) {{
                    chartInstance.destroy();
                }}

                chartInstance = new Chart(ctx, {{
                    type: type,
                    data: data,
                    options: {{
                        responsive: true,
                        plugins: {{
                            legend: {{
                                display: false,
                                position: 'top',
                            }},
                            title: {{
                                display: true,
                                text: data.datasets[0].label
                            }}
                        }}
                    }}
                }});
            }}

            function setChartType(type) {{
                currentType = type;
                const currentData = document.getElementById('dataSelector').value;
                createChart(type, currentData);
            }}

            function switchData(dataType) {{
                createChart(currentType, dataType);
            }}

            // Initial chart creation with frequency data
            createChart('bar', 'frequency');
        </script>
        '''

        header = widgets.HTML('<h2 style="color: white; text-align: center;">Statistics Dashboard</h2>')

        back_button = widgets.Button(
            description="Back to Search",
            button_style="warning",
            layout=widgets.Layout(width="150px")
        )

        def on_back_clicked(b):
            self.show_search_screen()

        back_button.on_click(on_back_clicked)

        # Display elements
        display(widgets.HTML(background_style))
        display(header)
        display(widgets.HTML(chart_html))
        display(widgets.VBox([
            widgets.HBox([back_button], layout=widgets.Layout(justify_content='center'))
        ], layout=widgets.Layout(align_items='center')))

####################################
    def show_admin_page(self):
        clear_output()

        background_style = '''
        <style>
            body {
                background-color: #1a202c !important;
                min-height: 100vh;
                margin: 0;
                padding: 20px;
                box-sizing: border-box;
            }
            .widget-button button {
                background: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
                border: none !important;
                color: white !important;
                border-radius: 20px !important;
                padding: 8px 25px !important;
                font-weight: bold !important;
            }
            .dropdown select {
                background-color: #2d3748 !important;
                color: white !important;
                padding: 10px !important;
                border-radius: 10px !important;
                width: 100% !important;
                margin-bottom: 20px !important;
            }
        </style>
        '''

        header = widgets.HTML('<h2 style="color: white; text-align: center;">Admin Dashboard</h2>')

        # Dropdown for index selection
        dropdown = widgets.Dropdown(
            options=[(str(idx['Term']), idx) for idx in self.logic.index_data],
            description='Select Index:',
            layout=widgets.Layout(width='400px')
        )

        # Text area for DocIDs
        text_area = widgets.Textarea(
            placeholder='DocIDs will appear here when an index is selected...',
            layout=widgets.Layout(width='600px', height='300px')
        )

        # Update button (initially disabled)
        update_button = widgets.Button(
            description="Update DocIDs",
            button_style="warning",
            layout=widgets.Layout(width="150px"),
            disabled=True
        )

        delete_button = widgets.Button(
            description="Delete Index",
            button_style="danger",
            layout=widgets.Layout(width="150px"),
            disabled=True
        )

        back_button = widgets.Button(
            description="Back to Search",
            button_style="warning",
            layout=widgets.Layout(width="150px")
        )

        # Status message
        status = widgets.HTML('')

        def format_docids(docids):
            # Format DocIDs as simple URLs, one per line
            return '\n'.join([doc['DocID'] for doc in docids])

        def parse_docids(text):
            # Parse the text area content into a list of URLs
            return [url.strip() for url in text.split('\n') if url.strip()]

        def on_dropdown_change(change):
            if change['new']:
              self.current_index = change['new']
              self.saved_docids = self.current_index.get('DocIDs', []).copy()
              text_area.value = format_docids(self.saved_docids)
              self.initial_docid = [doc['DocID'] for doc in self.saved_docids]
              update_button.disabled = False
              delete_button.disabled = False


        def on_update_clicked(b):
            # Parse the text area content into a list of URLs
            current_docids = parse_docids(text_area.value)

            # Initialize lists for added and removed URLs
            added_urls = [url for url in current_docids if url not in self.initial_docid]
            removed_urls = [url for url in self.initial_docid if url not in current_docids]

            # Display changes in the status bar
            changes_message = ""
            if added_urls:
                changes_message += f"Added: {', '.join(added_urls)}"
            if removed_urls:
                if changes_message:
                    changes_message += ", "
                changes_message += f"Removed: {', '.join(removed_urls)}"

            if not changes_message:
                changes_message = "No changes made."

            status.value = changes_message


            self.logic.update_index(
              term=self.current_index['Term'],
              added_urls=added_urls,
              removed_urls=removed_urls,
              current_docids=self.current_index.get('DocIDs', []).copy()
              )


        def on_delete_clicked(b):
            term = self.current_index['Term']
            self.logic.delete_index(term)
            status.value = f"Index '{term}' has been deleted."
            self.logic.index_data = [idx for idx in self.logic.index_data if idx['Term'] != term]
            dropdown.options = [(str(idx['Term']), idx) for idx in self.logic.index_data]
            update_button.disabled = True
            delete_button.disabled = True
            text_area.value = ""
            dropdown.value = None

        def on_back_clicked(b):
            self.show_search_screen()

        # Attach handlers
        dropdown.observe(on_dropdown_change, names='value')
        back_button.on_click(on_back_clicked)
        update_button.on_click(on_update_clicked)
        delete_button.on_click(on_delete_clicked)


        # Display elements
        display(widgets.HTML(background_style))
        display(widgets.VBox([
            header,
            dropdown,
            text_area,
            widgets.HBox([update_button, delete_button], layout=widgets.Layout(justify_content='center')),
            back_button,
            status
        ], layout=widgets.Layout(align_items='center')))


  #########################################################################
    def show_results_screen(self, query, results):
      clear_output()

      display(widgets.HTML('''
      <style>
          body {
              background-color: #1a202c !important;
              color: white !important;
          }
          .widget-button button {
              background: linear-gradient(90deg, #ef4444 0%, #f59e0b 100%) !important;
              border: none !important;
              color: white !important;
              border-radius: 20px !important;
              padding: 8px 25px !important;
              font-weight: bold !important;
          }
      </style>
      '''))

      term_count = sum([r['frequency'] for r in results])
      doc_count = len(results)

      # Calculate average ranking
      if results:
          avg_ranking = sum(float(r['ranking']) for r in results) / len(results)
      else:
          avg_ranking = 0

      header = widgets.HTML(
          f'''
          <div style="margin: 20px; color: white;">
              <h3>Results for: <i>{query}</i></h3>
              <p>Your search appeared <b>{term_count}</b> times in the following links<br>
              About <b>{doc_count}</b> results</p>
              <p>Ranking: <b>{avg_ranking:.2f}</b></p>
          </div>
          '''
      )

      results_html = []
      for result in results:
          results_html.append(f'''
              <div style="display: flex; justify-content: space-between;
              align-items: center; margin: 15px 0; padding: 10px; color: white;">
                  <a href="{result['page_id']}"
                  style="color: #3b82f6; font-size: 16px; text-decoration: none;">
                      {result['title']}
                  </a>
                  <span style="color: #718096;">
                      Frequency: {result['frequency']} | Ranking: {result['ranking']}
                  </span>
              </div>
          ''')

      results_widget = widgets.HTML('\n'.join(results_html))

      back_button = widgets.Button(
          description="Back to Search",
          button_style="warning",
          layout=widgets.Layout(width="150px")
      )

      def on_back_clicked(b):
          self.show_search_screen()

      back_button.on_click(on_back_clicked)
      display(widgets.VBox([header, results_widget, back_button]))



In [24]:
# Logic init
firebase_url = "https://cloudproject-27420-default-rtdb.firebaseio.com/"
logic = SearchLogic(firebase_url)

# Style init
ui = SearchUI(logic)
ui.show_search_screen()

HTML(value='\n        <style>\n            body {\n                background-color: #1a202c !important;\n    …

HTML(value='<h2 style="color: white; text-align: center;">Statistics Dashboard</h2>')

HTML(value='\n         <div class="chart" style="max-width: 600px; margin: 0 auto;">\n          <div style="te…

