# Working Doc Header Search

In [11]:
import os
import glob
import json
import re
import sys


def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers
#

def generate_html_with_links(base_dir, search_phrase, base_url="http://localhost:8888", case_sensitive=True):
    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)
    
    html_content = """<html>
    <head>
        <title>Search Results</title>
    </head>
    <body>
        <h1>Search Results</h1>
    """
    
    processed_notebooks = set()  # To avoid duplicates
    
    for notebook, headers in results.items():
        if notebook in processed_notebooks:
            continue
        processed_notebooks.add(notebook)
        
        relative_path = os.path.relpath(notebook, os.path.dirname(os.path.dirname(base_dir)))
        sharable_link = f"{base_url}/lab/tree/{relative_path}"
        html_content += f"<h2><a href=\"{sharable_link}\">{relative_path}</a></h2>\n"
        html_content += "<ul>\n"
        for header in headers:
            html_content += f"    <li>{header}</li>\n"
        html_content += "</ul>\n"

    html_content += """</body>
    </html>"""

    # Append an integer to the filename to avoid overwriting
    counter = 1
    output_file = os.path.join(base_dir, f"search_results_{counter}.html")
    while os.path.exists(output_file):
        counter += 1
        output_file = os.path.join(base_dir, f"search_results_{counter}.html")

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"HTML file generated: {output_file}")
    return output_file


def main(base_directory, search_term, case_sensitive=True):
    html_file = generate_html_with_links(base_directory, search_term, case_sensitive=case_sensitive)
    print(f"Search results saved to: {html_file}")

# In your notebook, you can then run:
base_directory = "/home/rebelford/sabbat/"
search_term = "heap"
main(base_directory, search_term)


if __name__ == "__main__":
    main(base_directory, search_term)

HTML file generated: /home/rebelford/sabbat/search_results_9.html
Search results saved to: /home/rebelford/sabbat/search_results_9.html
HTML file generated: /home/rebelford/sabbat/search_results_10.html
Search results saved to: /home/rebelford/sabbat/search_results_10.html


In [15]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers


def search_with_toggle():
    # Get user input for the directory to be searched
    directory_searched = input("Enter the directory to be searched (default is ~/sabbat): ") or "~/sabbat"
    
    # Expand the user directory
    base_directory = os.path.expanduser(directory_searched)

    # If the input was just 'sabbat', convert it to an absolute path
    if os.path.basename(directory_searched) == 'sabbat' and not os.path.isabs(directory_searched):
        base_directory = os.path.join(os.path.expanduser("~"), 'sabbat')

    # Get user input for search phrase and case sensitivity
    search_phrase = input("Enter the header you would like to search for: ")
    case_sensitive_input = input("Should the search be case sensitive? (y/n): ").strip().lower()
    
    case_sensitive = case_sensitive_input == 'y'

    # Call the search function with user inputs
    results = search_notebooks_for_headers(base_directory, search_phrase, case_sensitive)

    # Print results
    if results:
        for notebook, headers in results.items():
            print(f"Notebook: {notebook}")
            for header in headers:
                print(f"  Header: {header}")
    else:
        print("No matching headers found.")


# Example usage
search_with_toggle()


Enter the directory to be searched (default is ~/sabbat):  ~/sabbat/Weiss
Enter the header you would like to search for:  nmr
Should the search be case sensitive? (y/n):  n


Notebook: /home/rebelford/sabbat/Weiss/gdocMarkdown.ipynb
  Header: ### 9.1.1 NMR splitting
  Header: #  11\. NMR
  Header: ## 11.1 nmrglue
Notebook: /home/rebelford/sabbat/Weiss/SciCompChemNotebooks/chapter_11/chap_11_notebook.ipynb
  Header: # Chapter 11: Nuclear Magnetic Resonance with nmrglue and nmrsim
  Header: ## 11.1 NMR Processing with nmrglue
  Header: ### 11.1.1 Importing Data with nmrglue
  Header: ## 11.2 Simulating NMR with nmrsim
  Header: ### 11.2.4 Dynamic NMR Simulations
Notebook: /home/rebelford/sabbat/Weiss/SciCompChemNotebooks/appendix_00/appendix_00_notebook.ipynb
  Header: ## Simulating NMR Splitting Patterns


In [4]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers


def search_with_toggle():
    while True:
        # Get user input for the directory to be searched
        directory_searched = input("Enter the directory to be searched (default is ~/sabbat): ") or "~/sabbat"
        
        # Expand the user directory
        base_directory = os.path.expanduser(directory_searched)
    
        # If the input was just 'sabbat', convert it to an absolute path
        if os.path.basename(directory_searched) == 'sabbat' and not os.path.isabs(directory_searched):
            base_directory = os.path.join(os.path.expanduser("~"), 'sabbat')
    
        # Get user input for search phrase and case sensitivity
        search_phrase = input("Enter the header you would like to search for: ")
        case_sensitive_input = input("Should the search be case sensitive? (y/n): ").strip().lower()
        
        case_sensitive = case_sensitive_input == 'y'
    
        # Call the search function with user inputs
        results = search_notebooks_for_headers(base_directory, search_phrase, case_sensitive)
    
        # Print results
        if results:
            for notebook, headers in results.items():
                print(f"Notebook: {notebook}")
                for header in headers:
                    print(f"  Header: {header}")
        else:
            print("No matching headers found.")
            
        another_search = input("Do you want to perform another search? (yes/no): ").strip().lower()
        if another_search != "yes":
            print("Exiting the search tool.")
            break
        

# Example usage
search_with_toggle()


Enter the directory to be searched (default is ~/sabbat):  sabbat
Enter the header you would like to search for:  heap
Should the search be case sensitive? (y/n):  n


Notebook: /home/rebelford/sabbat/ClassNoteBooks/pp01_gPCHardSoftware.ipynb
  Header: # memory heap
  Header: # HEAP memory block
  Header: ### **1. Variable Assignment and Heap Memory**
  Header: # Chips HEAP integers


Do you want to perform another search? (yes/no):  no


Exiting the search tool.


In [None]:
#adding toggle for second search
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    """
    Search all Jupyter notebooks in the given base directory and its subdirectories
    for a specific phrase in markdown headers.

    Args:
        base_dir (str): The base directory to search.
        search_phrase (str): The phrase to search for in markdown headers.
        case_sensitive (bool): Whether the search should be case-sensitive.

    Returns:
        dict: A dictionary where keys are notebook file paths and values are lists of matching headers.
    """
    matching_headers = {}

    # Use glob to find all Jupyter notebooks recursively
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        # Combine the source lines into a single string
                        markdown_content = ''.join(cell.get('source', []))
                        # Use regex to find headers (lines starting with #)
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        # Check if the search phrase is in any of the headers
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers


###
def search_with_toggle(base_dir):
    #directory_searched = input("Enter the directory to be searched with path starting from home directory")
    search_phrase = input("Enter the header you would like to search for: ")
    case_sensitive = input("Should the search be case sensitive? (y/n): ").strip().lower() == 'y'

    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

    # Print results
    for notebook, headers in results.items():
        print(f"Notebook: {notebook}")
        for header in headers:
            print(f"  Header: {header}")

    another_search = input("Do you want to perform another search? (yes/no): ").strip().lower()
    if another_search != "yes":
        print("Exiting the search tool.")
        break
    
###
# Example usage
#base_directory = "sabbat"  # Replace with the path to your base directory
base_directory = os.path.expanduser("~/sabbat")
#search_term = "lists"  # Replace with the phrase you want to search for
#search_term = input("Enter the header you would like to search for")

search_with_toggle(base_directory)

#results = search_notebooks_for_headers(base_directory, search_term)


In [None]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    """
    Search all Jupyter notebooks in the given base directory and its subdirectories
    for a specific phrase in markdown headers.

    Args:
        base_dir (str): The base directory to search.
        search_phrase (str): The phrase to search for in markdown headers.
        case_sensitive (bool): Whether the search should be case-sensitive.

    Returns:
        dict: A dictionary where keys are notebook file paths and values are lists of matching headers.
    """
    matching_headers = {}

    # Use glob to find all Jupyter notebooks recursively
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        # Combine the source lines into a single string
                        markdown_content = ''.join(cell.get('source', []))
                        # Use regex to find headers (lines starting with #)
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        # Check if the search phrase is in any of the headers
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers


###
def search_with_toggle(base_dir):
    #directory_searched = input("Enter the directory to be searched with path starting from home directory")
    search_phrase = input("Enter the header you would like to search for: ")
    case_sensitive = input("Should the search be case sensitive? (y/n): ").strip().lower() == 'y'

    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

    # Print results
    for notebook, headers in results.items():
        print(f"Notebook: {notebook}")
        for header in headers:
            print(f"  Header: {header}")
###
# Example usage
#base_directory = "sabbat"  # Replace with the path to your base directory
base_directory = os.path.expanduser("~/sabbat")
#search_term = "lists"  # Replace with the phrase you want to search for
#search_term = input("Enter the header you would like to search for")

search_with_toggle(base_directory)

#results = search_notebooks_for_headers(base_directory, search_term)


import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    """
    Search all Jupyter notebooks in the given base directory and its subdirectories
    for a specific phrase in markdown headers.

    Args:
        base_dir (str): The base directory to search.
        search_phrase (str): The phrase to search for in markdown headers.
        case_sensitive (bool): Whether the search should be case-sensitive.

    Returns:
        dict: A dictionary where keys are notebook file paths and values are lists of matching headers.
    """
    matching_headers = {}

    # Use glob to find all Jupyter notebooks recursively
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        # Combine the source lines into a single string
                        markdown_content = ''.join(cell.get('source', []))
                        # Use regex to find headers (lines starting with #)
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        # Check if the search phrase is in any of the headers
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers


###
def search_with_toggle(base_dir):
    #directory_searched = input("Enter the directory to be searched with path starting from home directory")
    search_phrase = input("Enter the header you would like to search for: ")
    case_sensitive = input("Should the search be case sensitive? (y/n): ").strip().lower() == 'y'

    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

    # Print results
    for notebook, headers in results.items():
        print(f"Notebook: {notebook}")
        for header in headers:
            print(f"  Header: {header}")
###
# Example usage
#base_directory = "sabbat"  # Replace with the path to your base directory
base_directory = os.path.expanduser("~/sabbat")
#search_term = "lists"  # Replace with the phrase you want to search for
#search_term = input("Enter the header you would like to search for")

search_with_toggle(base_directory)

#results = search_notebooks_for_headers(base_directory, search_term)


In [None]:
import sys
import os

sabbat_path = os.path.join(os.path.expanduser('~'), 'sabbat')
sys.path.append(sabbat_path)
import my_fun
my_fun.search_headers()

In [None]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    """
    Search all Jupyter notebooks in the given base directory and its subdirectories
    for a specific phrase in markdown headers.

    Args:
        base_dir (str): The base directory to search.
        search_phrase (str): The phrase to search for in markdown headers.
        case_sensitive (bool): Whether the search should be case-sensitive.

    Returns:
        dict: A dictionary where keys are notebook file paths and values are lists of matching headers.
    """
    matching_headers = {}

    # Use glob to find all Jupyter notebooks recursively
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        # Combine the source lines into a single string
                        markdown_content = ''.join(cell.get('source', []))
                        # Use regex to find headers (lines starting with #)
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        # Check if the search phrase is in any of the headers
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers

def search_with_toggle(base_dir):
    while True:
        directory_searched= input("Directory to be searched: ")
        search_phrase = input("Enter the header you would like to search for: ")
        case_sensitive = input("Should the search be case sensitive? (y/n): ").strip().lower() == 'y'

        results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

        # Print results
        for notebook, headers in results.items():
            print(f"Notebook: {notebook}")
            for header in headers:
                print(f"  Header: {header}")

        another_search = input("Do you want to perform another search? (yes/no): ").strip().lower()
        if another_search != "yes":
            print("Exiting the search tool.")
            break
        

# Example usage
#base_directory = os.path.expanduser("~/sabbat")
base_directory = os.path.expanduser("~/{directory searched}")
search_with_toggle(base_directory)


In [None]:
import os
import nbformat

def search_notebook_headers_with_prompt(directory="."):
    while True:
        search_term = input("Enter the search term: ")
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(".ipynb"):
                    notebook_path = os.path.join(root, file)
                    with open(notebook_path, "r", encoding="utf-8") as f:
                        try:
                            notebook = nbformat.read(f, as_version=4)
                            for cell in notebook.cells:
                                if cell.cell_type == "markdown":
                                    for line in cell.source.splitlines():
                                        if line.startswith("#") and search_term in line:
                                            print(f"Notebook: {notebook_path}")
                                            print(f"Header: {line}")
                        except Exception as e:
                            print(f"Error reading {notebook_path}: {e}")
        
        another_search = input("Do you want to perform another search? (yes/no): ").strip().lower()
        if another_search != "yes":
            print("Exiting the search tool.")
            break
search_notebook_headers_with_prompt

In [None]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    """
    Search all Jupyter notebooks in the given base directory and its subdirectories
    for a specific phrase in markdown headers.

    Args:
        base_dir (str): The base directory to search.
        search_phrase (str): The phrase to search for in markdown headers.
        case_sensitive (bool): Whether the search should be case-sensitive.

    Returns:
        dict: A dictionary where keys are notebook file paths and values are lists of matching headers.
    """
    matching_headers = {}

    # Use glob to find all Jupyter notebooks recursively
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        # Combine the source lines into a single string
                        markdown_content = ''.join(cell.get('source', []))
                        # Use regex to find headers (lines starting with #)
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        # Check if the search phrase is in any of the headers
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers

def search_with_toggle(base_dir):
    while True:
        search_phrase = input("Enter the header you would like to search for: ")
        case_sensitive = input("Should the search be case sensitive? (y/n): ").strip().lower() == 'y'

        results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

        # Print results
        for notebook, headers in results.items():
            print(f"Notebook: {notebook}")
            for header in headers:
                print(f"  Header: {header}")

        another_search = input("Do you want to perform another search? (yes/no): ").strip().lower()
        if another_search != "yes":
            print("Exiting the search tool.")
            break

# Example usage
base_directory = os.path.expanduser("~/sabbat")
search_with_toggle(base_directory)


In [6]:
import os
import glob
import json
import re

def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers

'''def generate_html_with_links(base_dir, search_phrase, case_sensitive=True):
    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

    html_content = """<html>
<head>
    <title>Search Results</title>
</head>
<body>
    <h1>Search Results</h1>
"""

    for notebook, headers in results.items():
        relative_path = os.path.relpath(notebook, base_dir)
        sharable_link = f"./{relative_path}"
        html_content += f"<h2><a href=\"{sharable_link}\">{relative_path}</a></h2>\n"
        html_content += "<ul>\n"
        for header in headers:
            html_content += f"    <li>{header}</li>\n"
        html_content += "</ul>\n"

    html_content += """</body>
</html>"""

    output_file = os.path.join(base_dir, "search_results.html")
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"HTML file generated: {output_file}")
    return output_file'''

def generate_html_with_links(base_dir, search_phrase, case_sensitive=True):
    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)

    html_content = """<html>
<head>
    <title>Search Results</title>
</head>
<body>
    <h1>Search Results</h1>
"""

    for notebook, headers in results.items():
        relative_path = os.path.relpath(notebook, base_dir)
        sharable_link = f"./{relative_path}"
        # Replace 'files' with 'lab/tree' in the URL
        sharable_link = sharable_link.replace('files', 'lab/tree')
        html_content += f"<h2><a href=\"{sharable_link}\">{relative_path}</a></h2>\n"
        html_content += "<ul>\n"
        for header in headers:
            html_content += f"    <li>{header}</li>\n"
        html_content += "</ul>\n"

    html_content += """</body>
</html>"""

    output_file = os.path.join(base_dir, "search_results.html")
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"HTML file generated: {output_file}")
    return output_file



# Example usage
base_directory = os.path.expanduser("~/sabbat")
search_term = "lists"
generate_html_with_links(base_directory, search_term, case_sensitive=True)


HTML file generated: /home/rebelford/sabbat/search_results.html


'/home/rebelford/sabbat/search_results.html'

In [7]:
import os
import glob
import json
import re
import sys


def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers

def generate_html_with_links(base_dir, search_phrase, base_url="http://localhost:8888", case_sensitive=True):
    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)
    
    html_content = """<html>
    <head>
        <title>Search Results</title>
    </head>
    <body>
        <h1>Search Results</h1>
    """
    
    processed_notebooks = set()  # To avoid duplicates

    for notebook, headers in results.items():
        if notebook in processed_notebooks:
            continue
        processed_notebooks.add(notebook)
    
        relative_path = os.path.relpath(notebook, base_dir)
        sharable_link = f"{base_url}/lab/tree/{relative_path}"
        
        html_content += f"<h2>Notebook: {notebook}</h2>\n"
        html_content += f"<p>Link: <a href=\"{sharable_link}\">{sharable_link}</a></p>\n"
        html_content += "<ul>\n"
        for header in headers:
            html_content += f"    <li>{header}</li>\n"
        html_content += "</ul>\n"
    
    html_content += """</body>
    </html>"""
    
    # Append an integer to the filename to avoid overwriting
    counter = 1
    output_file = os.path.join(base_dir, f"search_results_{counter}.html")


    while os.path.exists(output_file):
        counter += 1
        output_file = os.path.join(base_dir, f"search_results_{counter}.html")

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"HTML file generated: {output_file}")
    return output_file


def main(base_directory, search_term, case_sensitive=True):
    html_file = generate_html_with_links(base_directory, search_term, case_sensitive=case_sensitive)
    print(f"Search results saved to: {html_file}")

# In your notebook, you can then run:
base_directory = "/home/rebelford/sabbat/"
search_term = "heap"
main(base_directory, search_term)


if __name__ == "__main__":
    main(base_directory, search_term)



HTML file generated: /home/rebelford/sabbat/search_results_3.html
Search results saved to: /home/rebelford/sabbat/search_results_3.html
HTML file generated: /home/rebelford/sabbat/search_results_4.html
Search results saved to: /home/rebelford/sabbat/search_results_4.html


In [9]:
import os
import glob
import json
import re
import sys


def search_notebooks_for_headers(base_dir, search_phrase, case_sensitive=True):
    matching_headers = {}
    notebook_files = glob.glob(os.path.join(base_dir, '**', '*.ipynb'), recursive=True)

    for notebook in notebook_files:
        with open(notebook, 'r', encoding='utf-8') as f:
            try:
                notebook_content = json.load(f)
                for cell in notebook_content.get('cells', []):
                    if cell.get('cell_type') == 'markdown':
                        markdown_content = ''.join(cell.get('source', []))
                        headers = re.findall(r'^(#+\s.*)', markdown_content, re.MULTILINE)
                        if case_sensitive:
                            matching = [header for header in headers if search_phrase in header]
                        else:
                            matching = [header for header in headers if search_phrase.lower() in header.lower()]
                        if matching:
                            if notebook not in matching_headers:
                                matching_headers[notebook] = []
                            matching_headers[notebook].extend(matching)
            except json.JSONDecodeError:
                print(f"Error reading {notebook}. Skipping.")

    return matching_headers
#

def generate_html_with_links(base_dir, search_phrase, base_url="http://localhost:8888", case_sensitive=True):
    results = search_notebooks_for_headers(base_dir, search_phrase, case_sensitive)
    
    html_content = """<html>
    <head>
        <title>Search Results</title>
    </head>
    <body>
        <h1>Search Results</h1>
    """
    
    processed_notebooks = set()  # To avoid duplicates
    
    for notebook, headers in results.items():
        if notebook in processed_notebooks:
            continue
        processed_notebooks.add(notebook)
        
        relative_path = os.path.relpath(notebook, os.path.dirname(os.path.dirname(base_dir)))
        sharable_link = f"{base_url}/lab/tree/{relative_path}"
        html_content += f"<h2><a href=\"{sharable_link}\">{relative_path}</a></h2>\n"
        html_content += "<ul>\n"
        for header in headers:
            html_content += f"    <li>{header}</li>\n"
        html_content += "</ul>\n"

    html_content += """</body>
    </html>"""

    # Append an integer to the filename to avoid overwriting
    counter = 1
    output_file = os.path.join(base_dir, f"search_results_{counter}.html")
    while os.path.exists(output_file):
        counter += 1
        output_file = os.path.join(base_dir, f"search_results_{counter}.html")

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"HTML file generated: {output_file}")
    return output_file


def main(base_directory, search_term, case_sensitive=True):
    html_file = generate_html_with_links(base_directory, search_term, case_sensitive=case_sensitive)
    print(f"Search results saved to: {html_file}")

# In your notebook, you can then run:
base_directory = "/home/rebelford/sabbat/"
search_term = "heap"
main(base_directory, search_term)


if __name__ == "__main__":
    main(base_directory, search_term)



HTML file generated: /home/rebelford/sabbat/search_results_5.html
Search results saved to: /home/rebelford/sabbat/search_results_5.html
HTML file generated: /home/rebelford/sabbat/search_results_6.html
Search results saved to: /home/rebelford/sabbat/search_results_6.html
