In [1]:
import pandas as pd
import pdfplumber
import os

In [7]:
def tableExtractor(pdf_path, output_folder):
    if not os.path.exists(pdf_path):
            print(f"Error: File '{pdf_path}' not found.")
            return
        
    if not os.path.exists(output_folder):
            output_folder = "output_tables_folder"
            os.makedirs(output_folder)
            print(f"Warning: File '{output_folder}' not found, hence folder named: 'output_tables_folder' created")
            
    pdf = pdfplumber.open(pdf_path)
    
    if len(pdf.pages) == 0:
            print("Error: This PDF is empty.")
            return
    
    table_count = 0
    for page_no, page in enumerate(pdf.pages, start=1):
        tables = page.extract_tables()
        
        if not tables:
            continue
            
        for table in tables:
            if table:
                table_count += 1
                df = pd.DataFrame(table)
                
                file_path = os.path.join(output_folder, f"table_{table_count}.xlsx")
                
                df.to_excel(file_path, index=False, header=False)
                print(f"Extracted Table {table_count} from Page {page_no} -> {file_path}")
                
    print(f"Extraction complete. {table_count} tables saved in '{output_folder}'.")

In [9]:
pdf_path = str(input("Enter the path of PDF file: "))
output_folder = str(input("Enter the folder path for excel files: "))
tableExtractor(pdf_path, output_folder)

Enter the path of PDF file: stock_market_dataset.pdf
Enter the folder path for excel files: sdlkjfa;sijf
Extracted Table 1 from Page 2 -> output_tables_folder\table_1.xlsx
Extracted Table 2 from Page 3 -> output_tables_folder\table_2.xlsx
Extracted Table 3 from Page 4 -> output_tables_folder\table_3.xlsx
Extracted Table 4 from Page 5 -> output_tables_folder\table_4.xlsx
Extracted Table 5 from Page 6 -> output_tables_folder\table_5.xlsx
Extracted Table 6 from Page 7 -> output_tables_folder\table_6.xlsx
Extracted Table 7 from Page 9 -> output_tables_folder\table_7.xlsx
Extracted Table 8 from Page 10 -> output_tables_folder\table_8.xlsx
Extracted Table 9 from Page 11 -> output_tables_folder\table_9.xlsx
Extracted Table 10 from Page 12 -> output_tables_folder\table_10.xlsx
Extracted Table 11 from Page 13 -> output_tables_folder\table_11.xlsx
Extracted Table 12 from Page 14 -> output_tables_folder\table_12.xlsx
Extracted Table 13 from Page 16 -> output_tables_folder\table_13.xlsx
Extracted 