In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import tkinter as tk
from threading import Thread
from tkinter import scrolledtext
import json
import re
import tkinter.messagebox
from itertools import product
from tkinter import filedialog, messagebox
import string

# Set up the webdriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

def extract_decay_data():
    driver.switch_to.frame('RightFrame')
    script = driver.find_element(By.XPATH, "//script[contains(text(), 'array_results_1')]")
    script_content = script.get_attribute('innerHTML')
    
    match = re.search(r"getElementById\('array_results_1'\)\.value = (.*?);", script_content, re.DOTALL)
    if match:
        data_string = match.group(1)
        # Remove function calls and clean up the string
        data_string = re.sub(r'Modif_chaine\d+\((.*?)\)', r'\1', data_string)
        data_string = re.sub(r'SepDec\((.*?)\)', r'\1', data_string)
        data_string = re.sub(r'HL_Unit\((.*?)\)', '', data_string)
        data_string = re.sub(r'Recalc_emi_JS\((.*?)\)', '', data_string)
        data_string = data_string.replace("'", "").replace(" + ", "").replace("\\n", "\n").strip()
        return data_string
    else:
        return "Data not found"

    driver.switch_to.default_content()

def extract_page_info():
    # Get the main page source
    main_html = driver.execute_script("return document.documentElement.outerHTML")
    
    # Function to get frame content
    def get_frame_content(frame_id):
        return driver.execute_script(f"""
            var frame = document.getElementById('{frame_id}');
            if (frame && frame.contentDocument) {{
                return frame.contentDocument.documentElement.outerHTML;
            }}
            return 'Unable to access frame content';
        """)

    # Get content from each frame
    left_frame = get_frame_content('LeftFrame')
    middle_frame = get_frame_content('MiddleFrame')
    right_frame = get_frame_content('RightFrame')

    # Try to interact with elements in the left frame
    driver.switch_to.frame('LeftFrame')
    try:
        # Look for select elements and print their options
        selects = driver.find_elements(By.TAG_NAME, 'select')
        select_info = []
        for select in selects:
            options = [option.text for option in select.find_elements(By.TAG_NAME, 'option')]
            select_info.append(f"Select '{select.get_attribute('name')}': {options}")
    except Exception as e:
        select_info = [f"Error interacting with left frame: {str(e)}"]
    driver.switch_to.default_content()

    # Combine all information
    full_content = f"Current URL: {driver.current_url}\n\n"
    full_content += f"Page Title: {driver.title}\n\n"
    full_content += "Main HTML Content:\n\n" + main_html + "\n\n"
    full_content += "Left Frame Content:\n\n" + left_frame + "\n\n"
    full_content += "Middle Frame Content:\n\n" + middle_frame + "\n\n"
    full_content += "Right Frame Content:\n\n" + right_frame + "\n\n"
    full_content += "Select Elements in Left Frame:\n" + '\n'.join(select_info) + "\n\n"

    text_area.delete('1.0', tk.END)  # Clear previous content
    text_area.insert(tk.END, full_content)

def extract_page_info_sectioned():
    # Get the main page source
    main_html = driver.execute_script("return document.documentElement.outerHTML")
    
    # Function to get frame content
    def get_frame_content(frame_id):
        return driver.execute_script(f"""
            var frame = document.getElementById('{frame_id}');
            if (frame && frame.contentDocument) {{
                return frame.contentDocument.documentElement.outerHTML;
            }}
            return 'Unable to access frame content';
        """)

    # Get content from each frame
    left_frame = get_frame_content('LeftFrame')
    middle_frame = get_frame_content('MiddleFrame')
    right_frame = get_frame_content('RightFrame')

    # Function to split HTML into major sections and subsections
    def split_html(html):
        soup = BeautifulSoup(html, 'html.parser')
        sections = []

        # Add doctype if present
        if soup.doctype:
            sections.append(('DOCTYPE', [('a', str(soup.doctype))]))

        # Add html tag
        html_tag = soup.find('html')
        if html_tag:
            sections.append(('HTML', [('a', str(html_tag.extract()))]))

            # Add head and its contents
            head_tag = soup.find('head')
            if head_tag:
                head_subsections = []
                for i, child in enumerate(head_tag.children):
                    if child.name:
                        head_subsections.append((chr(97 + i % 26), str(child)))
                sections.append(('HEAD', head_subsections))

            # Add body and its major children
            body_tag = soup.find('body')
            if body_tag:
                body_subsections = []
                for i, child in enumerate(body_tag.find_all(recursive=False)):
                    if child.name:
                        body_subsections.append((chr(97 + i % 26), str(child)))
                sections.append(('BODY', body_subsections))

        return sections

    # Clear previous content
    text_area.delete('1.0', tk.END)

    # Define colors for different sections
    colors = ['#FF0000', '#00FF00', '#0000FF', '#FF00FF', '#00FFFF', '#FFFF00', '#800000', '#008000', '#000080']

    # Function to insert colored text
    def insert_colored_text(text, color):
        text_area.insert(tk.END, text, color)
        text_area.tag_configure(color, foreground=color)

    # Insert general information
    insert_colored_text(f"Current URL: {driver.current_url}\n\n", 'black')
    insert_colored_text(f"Page Title: {driver.title}\n\n", 'black')

    # Function to insert section content
    def insert_section_content(section_name, subsections, section_number, frame_name=""):
        color = colors[section_number % len(colors)]
        prefix = f"{frame_name} Frame " if frame_name else ""
        insert_colored_text(f"Section {section_number} - {prefix}{section_name}:\n", 'black')
        for subsection_letter, subsection_content in subsections:
            insert_colored_text(f"  {subsection_letter}. ", 'black')
            insert_colored_text(f"{subsection_content}\n", color)
        insert_colored_text("\n", 'black')

    # Insert main HTML content
    insert_colored_text("Main HTML Content:\n\n", 'black')
    section_counter = 0
    for section_name, subsections in split_html(main_html):
        section_counter += 1
        insert_section_content(section_name, subsections, section_counter)

    # Insert frame contents
    for frame_name, frame_content in [("Left", left_frame), ("Middle", middle_frame), ("Right", right_frame)]:
        insert_colored_text(f"{frame_name} Frame Content:\n\n", 'black')
        for section_name, subsections in split_html(frame_content):
            section_counter += 1
            insert_section_content(section_name, subsections, section_counter, frame_name)

    # Insert select elements info
    insert_colored_text("Select Elements in Left Frame:\n", 'black')
    driver.switch_to.frame('LeftFrame')
    try:
        selects = driver.find_elements(By.TAG_NAME, 'select')
        for select in selects:
            options = [option.text for option in select.find_elements(By.TAG_NAME, 'option')]
            insert_colored_text(f"Select '{select.get_attribute('name')}': {options}\n", 'black')
    except Exception as e:
        insert_colored_text(f"Error interacting with left frame: {str(e)}\n", 'black')
    driver.switch_to.default_content()


def display_specific_section():
    def show_section():
        section_number = section_entry.get()
        try:
            section_number = int(section_number)
            content = text_area.get('1.0', tk.END)
            pattern = f"Section {section_number} - .*?(?=Section {section_number + 1}|$)"
            match = re.search(pattern, content, re.DOTALL)
            if match:
                text_area.delete('1.0', tk.END)  # Clear the main text area
                text_area.insert(tk.END, match.group())  # Insert the selected section
                section_window.destroy()  # Close the input window
            else:
                tk.messagebox.showinfo("Section Not Found", f"Section {section_number} not found.")
        except ValueError:
            tk.messagebox.showerror("Invalid Input", "Please enter a valid section number.")

    section_window = tk.Toplevel(root)
    section_window.title("Display Specific Section")
    section_window.geometry("300x100")

    tk.Label(section_window, text="Enter section number:").pack(pady=5)
    section_entry = tk.Entry(section_window)
    section_entry.pack(pady=5)
    tk.Button(section_window, text="Display Section", command=show_section).pack(pady=5)

def display_html_code():
    # Get the main page source
    main_html = driver.execute_script("return document.documentElement.outerHTML")
    
    # Clear previous content
    text_area.delete('1.0', tk.END)
    
    # Display the HTML code
    text_area.insert(tk.END, main_html)

def add_line_numbers():
    content = text_area.get('1.0', tk.END).splitlines()
    numbered_content = [f"{i+1}. {line}" for i, line in enumerate(content)]
    
    # Clear previous content
    text_area.delete('1.0', tk.END)
    
    # Display the numbered content
    text_area.insert(tk.END, "\n".join(numbered_content))

def display_html_range():
    def show_range():
        try:
            start = int(start_entry.get())
            end = int(end_entry.get())
            content = driver.execute_script("return document.documentElement.outerHTML")
            lines = content.split('\n')
            
            if start < 1 or end > len(lines) or start > end:
                tk.messagebox.showerror("Invalid Range", f"Please enter a valid range between 1 and {len(lines)}.")
                return
            
            selected_lines = lines[start-1:end]
            numbered_lines = [f"{i+start}. {line}" for i, line in enumerate(selected_lines)]
            
            text_area.delete('1.0', tk.END)  # Clear the main text area
            text_area.insert(tk.END, '\n'.join(numbered_lines))  # Insert the selected range
            range_window.destroy()  # Close the input window
        except ValueError:
            tk.messagebox.showerror("Invalid Input", "Please enter valid line numbers.")

    range_window = tk.Toplevel(root)
    range_window.title("Display HTML Range")
    range_window.geometry("300x150")

    tk.Label(range_window, text="Enter start line number:").pack(pady=5)
    start_entry = tk.Entry(range_window)
    start_entry.pack(pady=5)

    tk.Label(range_window, text="Enter end line number:").pack(pady=5)
    end_entry = tk.Entry(range_window)
    end_entry.pack(pady=5)

    tk.Button(range_window, text="Display Range", command=show_range).pack(pady=5)

def extract_visible_text():
    # Switch to the main frame
    driver.switch_to.default_content()
    
    # Get the page source
    html_content = driver.page_source
    
    # Parse the HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.decompose()
    
    # Get text
    text = soup.get_text()
    
    # Break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    
    # Break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    
    # Drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)
    
    # Display the extracted text
    text_area.delete('1.0', tk.END)  # Clear previous content
    text_area.insert(tk.END, text)

def extract_visible_text_from_frames():
    # Function to extract text from a frame
    def extract_from_frame(frame_id):
        driver.switch_to.default_content()
        try:
            driver.switch_to.frame(frame_id)
            html_content = driver.page_source
            soup = BeautifulSoup(html_content, 'html.parser')
            for script in soup(["script", "style"]):
                script.decompose()
            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            return '\n'.join(chunk for chunk in chunks if chunk)
        except:
            return f"Unable to access {frame_id} frame"

    # Extract text from each frame
    left_text = extract_from_frame('LeftFrame')
    middle_text = extract_from_frame('MiddleFrame')
    right_text = extract_from_frame('RightFrame')

    # Combine and display the extracted text
    combined_text = f"Left Frame:\n{left_text}\n\nMiddle Frame:\n{middle_text}\n\nRight Frame:\n{right_text}"
    text_area.delete('1.0', tk.END)  # Clear previous content
    text_area.insert(tk.END, combined_text)

def extract_visible_content_range():
    def show_visible_range():
        try:
            start = int(start_entry.get())
            end = int(end_entry.get())
            
            current_content = text_area.get('1.0', tk.END)
            lines = current_content.splitlines()
            
            if start < 1 or end > len(lines) or start > end:
                tk.messagebox.showerror("Invalid Range", f"Please enter a valid range between 1 and {len(lines)}.")
                return
            
            selected_lines = lines[start-1:end]
            
            if all(line.split('.')[0].isdigit() for line in selected_lines):
                selected_lines = ['.'.join(line.split('.')[1:]).strip() for line in selected_lines]
            
            visible_content = []
            for line in selected_lines:
                soup = BeautifulSoup(line, 'html.parser')
                for script in soup(["script", "style"]):
                    script.decompose()
                text = soup.get_text().strip()
                if text:
                    visible_content.append(text)
            
            numbered_lines = [f"{i+start}. {line}" for i, line in enumerate(visible_content)]
            
            text_area.delete('1.0', tk.END)
            text_area.insert(tk.END, '\n'.join(numbered_lines))
            range_window.destroy()
        except ValueError:
            tk.messagebox.showerror("Invalid Input", "Please enter valid line numbers.")

    range_window = tk.Toplevel(root)
    range_window.title("Display Visible Content Range")
    range_window.geometry("300x150")

    tk.Label(range_window, text="Enter start line number:").pack(pady=5)
    start_entry = tk.Entry(range_window)
    start_entry.pack(pady=5)

    tk.Label(range_window, text="Enter end line number:").pack(pady=5)
    end_entry = tk.Entry(range_window)
    end_entry.pack(pady=5)

    tk.Button(range_window, text="Display Visible Range", command=show_visible_range).pack(pady=5)

def extract_energy_intensity():
    content = text_area.get("1.0", "end-1c")
    pattern = r'(\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?)'
    matches = re.findall(pattern, content)
    result = ' '.join(f"{energy} {intensity}" for energy, intensity in matches)
    display_result(result)

def transform_input():
    content = text_area.get("1.0", "end-1c")
    words = content.split()
    
    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    transformed = []
    
    for i, word in enumerate(words):
        if i == 0:
            transformed.append(word)
        else:
            # Calculate the letter index
            n = i
            letters = []
            while n >= 0:
                letters.append(alphabet[n % 26])
                n = n // 26 - 1
            letters.reverse()
            letter = ''.join(letters)
            transformed.append(f"{word} {letter}")
    
    result = ' '.join(transformed)
    display_result(result)

def select_text_between_letters():
    content = text_area.get("1.0", "end-1c")
    start_letter = start_letter_entry.get()
    end_letter = end_letter_entry.get()

    # Escape special regex characters in start_letter and end_letter
    start_letter = re.escape(start_letter)
    end_letter = re.escape(end_letter)

    # Find the text starting from start_letter
    pattern = rf'({start_letter}.*)'
    match = re.search(pattern, content, re.DOTALL)

    if match:
        # Get the matched text starting from the start letter/word
        result = match.group(1)
        
        # Find the end letter/word in the result
        end_match = re.search(rf'(.*?{end_letter})', result, re.DOTALL)
        
        if end_match:
            # If end letter/word is found, keep text up to and including it
            final_result = end_match.group(1)
        else:
            # If end letter/word is not found, keep all text from start letter/word
            final_result = result
        
        display_result(final_result)
    else:
        messagebox.showinfo("No Match", f"'{start_letter_entry.get()}' not found in the text")

def display_result(result):
    text_area.delete("1.0", "end")
    text_area.insert("1.0", result)

def remove_letters():
    # Get the current content of the text area
    content = text_area.get("1.0", "end-1c")
    
    # Remove all letters (both uppercase and lowercase)
    content_without_letters = re.sub(r'[a-zA-Z]', '', content)
    
    # Clear the text area
    text_area.delete("1.0", "end")
    
    # Insert the content without letters back into the text area
    text_area.insert("1.0", content_without_letters)
    
    messagebox.showinfo("Letters Removed", "All letters have been removed from the text.")

def save_text_to_file():
    input_text = text_entry.get()
    
    try:
        # Define the file path
        output_path = r"C:\Users\admin\Documents\jupyter_lab\application_cea\user_input.txt"
        
        # Ensure the directory exists
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        # Save the input text to the file
        with open(output_path, 'w') as file:
            file.write(input_text)
        
        messagebox.showinfo(" Saved", f"Text has been saved to {output_path}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {str(e)}")

def save_numbers_to_file(input_numbers, filepath):
    """Takes a list of numbers and saves them in two columns to a file.

    Args:
        input_numbers: A list of numbers (floats).
        filepath: The path to the file where the numbers will be saved.
    """
    # Ensure the directory for the file exists
    os.makedirs(os.path.dirname(filepath), exist_ok=True)

    # Separate odd- and even-indexed numbers (could use slicing instead)
    si1 = input_numbers[::2]
    sp1 = input_numbers[1::2]

    with open(filepath, 'w') as file:
        file.write('SI1\t' + '\t'.join(map(str, si1)) + '\n')
        file.write('SP1\t' + '\t'.join(map(str, sp1)) + '\n')

def on_save_button_click():
    input_numbers = entry.get()
    try:
        numbers = [float(num) for num in input_numbers.split()]
        output_path = r"C:\Users\admin\Documents\jupyter_lab\application_cea\numbers.txt"
        save_numbers_to_file(numbers, output_path)
        messagebox.showinfo("Success", f"Numbers have been saved to {output_path}")
    except ValueError:
        messagebox.showerror("Error", "Please enter a valid list of numbers separated by spaces.")

def split_numbers_and_save():
    # Get the current content of the text area
    content = text_area.get("1.0", "end-1c")
    
    # Split the content into numbers
    numbers = re.findall(r'\d+\.?\d*', content)
    
    # Separate odd- and even-indexed numbers
    si1 = numbers[::2]
    sp1 = numbers[1::2]
    
    # Create the formatted output
    output = f"SI1\t{' '.join(si1)}\nSP1\t{' '.join(sp1)}"
    
    try:
        file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt")])
        if file_path:
            with open(file_path, 'w') as file:
                file.write(output)
            
            # Update the text area with the new content
            text_area.delete("1.0", "end")
            text_area.insert("1.0", output)
            
            messagebox.showinfo("Success", f"Numbers have been split into SI1 and SP1 lines and saved to {file_path}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {str(e)}")

def create_tkinter_app():
    global root, text_area, start_letter_entry, end_letter_entry
    root = tk.Tk()
    root.title("Page Info Extractor")
    root.geometry("1000x800")
    
    text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=40)
    text_area.pack(padx=10, pady=10)
    
    button_frame = tk.Frame(root)
    button_frame.pack(pady=5)
    
    extract_button = tk.Button(button_frame, text="Extract Page Info", command=extract_page_info)
    extract_button.pack(side=tk.LEFT, padx=5)
    
    extract_sectioned_button = tk.Button(button_frame, text="Extract Sectioned Page Info", command=extract_page_info_sectioned)
    extract_sectioned_button.pack(side=tk.LEFT, padx=5)
    
    display_section_button = tk.Button(button_frame, text="Display Specific Section", command=display_specific_section)
    display_section_button.pack(side=tk.LEFT, padx=5)
    
    extract_decay_button = tk.Button(button_frame, text="Extract Decay Data", command=lambda: text_area.insert(tk.END, extract_decay_data() + "\n\n"))
    extract_decay_button.pack(side=tk.LEFT, padx=5)
    
    save_decay_button = tk.Button(button_frame, text="Save Decay Data", command=save_decay_data)
    save_decay_button.pack(side=tk.LEFT, padx=5)
    
    display_html_button = tk.Button(button_frame, text="Display HTML Code", command=display_html_code)
    display_html_button.pack(side=tk.LEFT, padx=5)
    
    add_line_numbers_button = tk.Button(button_frame, text="Add Line Numbers", command=add_line_numbers)
    add_line_numbers_button.pack(side=tk.LEFT, padx=5)
    
    display_html_range_button = tk.Button(button_frame, text="Display HTML Range", command=display_html_range)
    display_html_range_button.pack(side=tk.LEFT, padx=5)
    
    extract_energy_intensity_button = tk.Button(button_frame, text="Extract Energy and Intensity", command=extract_energy_intensity)
    extract_energy_intensity_button.pack(side=tk.LEFT, padx=5)
    
    extract_visible_range_button = tk.Button(button_frame, text="Extract Visible Content Range", command=extract_visible_content_range)
    extract_visible_range_button.pack(side=tk.LEFT, padx=5)
    
    # New button for transforming input
    transform_input_button = tk.Button(button_frame, text="Transform Input", command=transform_input)
    transform_input_button.pack(side=tk.LEFT, padx=5)

    # New frame for letter selection
    letter_frame = tk.Frame(root)
    letter_frame.pack(pady=5)
    
    tk.Label(letter_frame, text="Start Letter:").pack(side=tk.LEFT)
    start_letter_entry = tk.Entry(letter_frame, width=5)
    start_letter_entry.pack(side=tk.LEFT, padx=5)
    
    tk.Label(letter_frame, text="End Letter:").pack(side=tk.LEFT)
    end_letter_entry = tk.Entry(letter_frame, width=5)
    end_letter_entry.pack(side=tk.LEFT, padx=5)
    
    select_letters_button = tk.Button(letter_frame, text="Select Text Between Letters", command=select_text_between_letters)
    select_letters_button.pack(side=tk.LEFT, padx=5)

    remove_letters_button = tk.Button(button_frame, text="Remove Letters", command=remove_letters)
    remove_letters_button.pack(side=tk.LEFT, padx=5)

    # New frame for text input and file saving
    text_frame = tk.Frame(root)
    text_frame.pack(pady=5)
    
    tk.Label(text_frame, text="Enter text to save:").pack(side=tk.LEFT)
    text_entry = tk.Entry(text_frame, width=50)
    text_entry.pack(side=tk.LEFT, padx=5)
    
    save_text_button = tk.Button(text_frame, text="Save Text to File", command=save_text_to_file)
    save_text_button.pack(side=tk.LEFT, padx=5)

    # In the create_tkinter_app() function, add this button:
    split_save_button = tk.Button(button_frame, text="Split Numbers and Save", command=split_numbers_and_save)
    split_save_button.pack(side=tk.LEFT, padx=5)

        # Create and place the entry widget
    entry_label = tk.Label(root, text="Enter a list of numbers separated by spaces:")
    entry_label.pack(pady=10)
    entry = tk.Entry(root, width=50)
    entry.pack(pady=5)
    
    # Create and place the save button
    save_button = tk.Button(root, text="Save to File", command=on_save_button_click)
    save_button.pack(pady=20)
    
    quit_button = tk.Button(root, text="Quit", command=root.quit)
    quit_button.pack(pady=10)
    
    root.mainloop()

def save_decay_data():
    data = extract_decay_data()
    with open('decay_data.txt', 'w') as f:
        f.write(data)
    text_area.delete('1.0', tk.END)  # Clear previous content
    text_area.insert(tk.END, data)  # Display the data in the text area
    tk.messagebox.showinfo("Data Saved", "Decay data has been saved to 'decay_data.txt' and displayed in the text area.")

# Start the Tkinter app in a separate thread
thread = Thread(target=create_tkinter_app)
thread.start()

print("Tkinter application started.")

# Navigate to the webpage
driver.get('http://www.lnhb.fr/Laraweb/')

print("Webpage opened and Tkinter application started.")
print("You can now interact with the webpage and use the 'Extract Page Info' or 'Extract Sectioned Page Info' buttons.")
print("When you're done, click 'Quit' in the Tkinter waindow and run the next cell to close the browser.")

Tkinter application started.
Webpage opened and Tkinter application started.
You can now interact with the webpage and use the 'Extract Page Info' or 'Extract Sectioned Page Info' buttons.
When you're done, click 'Quit' in the Tkinter waindow and run the next cell to close the browser.
