In [None]:
import tabula
import pandas as pd
import os  # Added for file existence check

# Define the PDF file path
pdf_file_path = "1.pdf"

# Check if the PDF file exists
if not os.path.exists(pdf_file_path):
    print("File does not exist.")
else:
    # Extract tables from the PDF and store them in a list of DataFrames
    tables = tabula.read_pdf(pdf_file_path, pages='all', multiple_tables=True)

    # Check if the number of tables found is equal to 9
    if len(tables) != 9:
        print("The PDF file does not have all 9 tables.")
    else:
        # Function to format numeric columns
        def format_numeric_columns(df):
            for col in df.columns:
                if df[col].dtype == 'object':
                    df[col] = df[col].str.replace(' ', '')  # Remove spaces
                    if df[col].str.isnumeric().all():
                        df[col] = df[col].str.replace(',', '').astype(float)  # Format as numbers

        # Create a dictionary to map table numbers to custom file names
        table_names = {
            2: "weekly stats - Elland",
            4: "weekly stats - Runcron",
            6: "Delivered messages split by attempt (%) Total for the week",
            8: "Delivered messages split by attempt - Totals"
        }

        # Specify the tables to keep (2, 4, 6, and 8)
        tables_to_keep = [2, 4, 6, 8]

        # Iterate through the specified tables, update column names, and save them as CSV files with custom names
        for table_number in tables_to_keep:
            table = tables[table_number - 1]  # Adjust for 0-based indexing
            format_numeric_columns(table)  # Format numeric columns
            custom_name = table_names.get(table_number, f"table_{table_number}")  # Get custom name or default

            if table_number == 2 or table_number == 4:
                # Rename columns for tables 2 and 4
                table.columns = ["time stamp", "SYSTEM_NAME", "Messages submitted", "Messages delivered", "Success Rate (%)", "Messages Undelivered", "Delivered after 5 minutes", "Messages expired"]

            if table_number == 6:
                # Rename columns for table 6
                table.columns = ["NR_DEL_ATT", "Count % of All records"]

            if table_number == 8:
                # Rename columns for table 8
                table.columns = ["NR_DEL_ATT", "timestamp", "Count of All records"]

            # Save as CSV with headers
            table.to_csv(f'{custom_name}.csv', index=False, header=True)

        print("Selected tables extracted, processed, and saved as CSV files with custom names and correct column names.")
