In [None]:
import os
import tkinter as tk
from tkinter import ttk, scrolledtext, simpledialog, messagebox
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import string
import re  # Add this import for the re.sub function

class JANISWebApp:
    def __init__(self, master):
        self.master = master
        master.title("JANIS Web Interaction")
        master.geometry("1200x700")

        self.driver = None
        self.setup_driver()

        self.notebook = ttk.Notebook(master)
        self.notebook.pack(fill=tk.BOTH, expand=True)

        self.content_frame = ttk.Frame(self.notebook)
        self.numbered_content_frame = ttk.Frame(self.notebook)

        self.notebook.add(self.content_frame, text="Summary Content")
        self.notebook.add(self.numbered_content_frame, text="Numbered Content")

        self.content_text = scrolledtext.ScrolledText(self.content_frame, wrap=tk.WORD)
        self.content_text.pack(fill=tk.BOTH, expand=True)

        self.numbered_content_text = scrolledtext.ScrolledText(self.numbered_content_frame, wrap=tk.NONE)
        self.numbered_content_text.pack(fill=tk.BOTH, expand=True)

        self.button_frame = ttk.Frame(master)
        self.button_frame.pack(fill=tk.X)

        self.load_button = ttk.Button(self.button_frame, text="Load Page", command=self.load_page)
        self.load_button.pack(side=tk.LEFT, padx=5, pady=5)

        self.extract_button = ttk.Button(self.button_frame, text="Extract Data", command=self.extract_data)
        self.extract_button.pack(side=tk.LEFT, padx=5, pady=5)

        self.select_data_button = ttk.Button(self.button_frame, text="Select Data", command=self.select_data)
        self.select_data_button.pack(side=tk.LEFT, padx=5, pady=5)

        self.quit_button = ttk.Button(self.button_frame, text="Quit", command=self.quit)
        self.quit_button.pack(side=tk.RIGHT, padx=5, pady=5)

    def setup_driver(self):
        service = Service(ChromeDriverManager().install())
        self.driver = webdriver.Chrome(service=service)
        self.driver.get("https://www.oecd-nea.org/janisweb/tree/RDD")

    def clean_text(self, text):
        # Remove '%' and 'eV' from the text
        cleaned_text = re.sub(r'%|eV', '', text)
        return cleaned_text

    def update_numbered_content(self, content):
        self.numbered_content_text.delete(1.0, tk.END)
        lines = content.split('\n')
        max_line_number = len(lines)
        line_number_width = len(str(max_line_number))

        # Add column letters
        letter_header = '    ' + ''.join([f'{letter:<3}' for letter in string.ascii_uppercase])
        self.numbered_content_text.insert(tk.END, letter_header + '\n')

        for i, line in enumerate(lines, start=1):
            cleaned_line = self.clean_text(line)  # Clean the line
            line_number = f'{i:>{line_number_width}} '
            self.numbered_content_text.insert(tk.END, line_number + cleaned_line + '\n')

    def load_page(self):
        try:
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )

            page_content = self.driver.find_element(By.TAG_NAME, "body").text
            cleaned_content = self.clean_text(page_content)  # Clean the content
            self.content_text.delete(1.0, tk.END)
            self.content_text.insert(tk.END, cleaned_content)

            self.update_numbered_content(cleaned_content)

        except Exception as e:
            print(f"An error occurred: {e}")

    def extract_data(self):
        content = self.content_text.get(1.0, tk.END)
        cleaned_content = self.clean_text(content)  # Clean the content
        self.update_numbered_content(cleaned_content)

    def select_data(self):
        # Ask user for range
        range_input = simpledialog.askstring("Select Data", "Enter range (e.g., A2:C3):")
        if not range_input:
            return
    
        try:
            start, end = range_input.split(':')
            start_col, start_row = start[0], int(start[1:])
            end_col, end_row = end[0], int(end[1:])
    
            start_col_index = string.ascii_uppercase.index(start_col)
            end_col_index = string.ascii_uppercase.index(end_col)
    
            content = self.numbered_content_text.get(1.0, tk.END).splitlines()[1:]  # Skip the column header
            selected_data = []
            for row in range(start_row - 1, end_row):
                line = content[row]
                words = line.split()[1:]  # Remove line number and split into words
                selected_words = words[start_col_index:end_col_index + 1]
                selected_data.append(selected_words)
    
            # Create a DataFrame from the selected data
            df = pd.DataFrame(selected_data)
    
            # Save the selected data to a txt file
            file_path = self.save_to_txt(df)
            if file_path:
                tk.messagebox.showinfo("Data Saved", f"Selected data has been saved to:\n{file_path}")
    
            # Display the data in a new window
            self.show_selected_data(df)
    
        except Exception as e:
            tk.messagebox.showerror("Error", f"Invalid range format: {e}")
    
    def save_to_txt(self, df):
        # Ask user for file name
        file_name = simpledialog.askstring("Save Data", "Enter file name (without extension):")
        if not file_name:
            return None
        
        # Ensure the file has a .txt extension
        if not file_name.endswith('.txt'):
            file_name += '.txt'
        
        # Get the current directory (works for both scripts and Jupyter notebooks)
        try:
            current_dir = os.path.dirname(os.path.abspath(__file__))
        except NameError:
            current_dir = os.getcwd()
        
        file_path = os.path.join(current_dir, file_name)
        
        # Save the DataFrame to a txt file
        df.to_csv(file_path, sep='\t', index=False)
        
        return file_path

    def show_selected_data(self, df):
        # Create a new top-level window
        data_window = tk.Toplevel(self.master)
        data_window.title("Selected Data")
    
        # Create a Table widget to display the DataFrame
        table = ttk.Treeview(data_window)
        table['columns'] = list(df.columns)
        table['show'] = 'headings'
    
        # Set column headings
        for column in df.columns:
            table.heading(column, text=column)
            table.column(column, width=100)
    
        # Add data to the table
        for index, row in df.iterrows():
            table.insert("", 'end', values=list(row))
    
        table.pack(expand=True, fill='both')
    
        # Add a scrollbar
        scrollbar = ttk.Scrollbar(data_window, orient="vertical", command=table.yview)
        scrollbar.pack(side='right', fill='y')
        table.configure(yscrollcommand=scrollbar.set)

    def quit(self):
        if self.driver:
            self.driver.quit()
        self.master.quit()

root = tk.Tk()
app = JANISWebApp(root)
root.mainloop()