In [None]:
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

In [None]:
# Get paths from environment variables
base_path = os.getenv('PATH')
download_path = os.getenv('DOWNLOAD_PATH')

# Construct full path to HTML file
path = os.path.join(base_path, "3.htm")

print(f"HTML file path: {path}")
print(f"Download directory: {download_path}")

In [None]:
# Check if file exists before reading
if not os.path.exists(path):
    print(f"Error: HTML file not found at {path}")
else:
    # Read HTML file
    tables = pd.read_html(path)
    
    # Check if tables are found
    if not tables:
        print("No tables found in the HTML file.")
    else:
        print(f"Found {len(tables)} tables in the HTML file.")
        for i, table in enumerate(tables, 1):
            print(f"Table {i}: {table.shape[0]} rows x {table.shape[1]} columns")

In [None]:
def html_to_excel(html_file, excel_file):
    try:
        # Read HTML tables from the file
        tables = pd.read_html(html_file)
        if not tables:
            print("No tables found in the HTML file.")
            return
        
        # Save each table to a separate sheet in Excel
        with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
            for idx, table in enumerate(tables, start=1):
                sheet_name = f"Sheet{idx}"
                table.to_excel(writer, sheet_name=sheet_name, index=False)
        
        print(f"Conversion successful! Saved to '{excel_file}'")
    
    except FileNotFoundError:
        print(f"Error: File '{html_file}' not found.")
    except ValueError as e:
        print(f"Error reading HTML: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")

# Convert the HTML file to Excel
if os.path.exists(path):
    excel_output = os.path.join(download_path, "converted_tables.xlsx")
    html_to_excel(path, excel_output)
else:
    print("Cannot convert: HTML file not found.")