In [6]:
import pdfplumber

def extract_tables_from_page(pdf_path, page_number):
    """Extract tables from a specified page in the PDF."""
    with pdfplumber.open(pdf_path) as pdf:
        if page_number < 0 or page_number >= len(pdf.pages):
            return None  # Return None if the page number is out of range
        page = pdf.pages[page_number]
        tables = page.extract_tables()
        return tables

def format_table_output(tables):
    """Format the output of the extracted tables for display."""
    output = ""
    if tables:
        for i, table in enumerate(tables):
            output += f"\nTable {i + 1}:\n"
            for row in table:
                output += " | ".join(str(cell) for cell in row) + "\n"
    else:
        output += "No tables found on this page."
    return output

def handle_query(user_query, pdf_path):
    """Handle the user's query to extract tables from the specified page."""
    try:
        # Extract the page number from the user query
        page_number = int(user_query.split("page")[1].strip()) - 1  # Convert to zero-based index
    except (IndexError, ValueError):
        return "Invalid query format. Please specify a valid page number."

    # Extract tables from the specified page
    tables = extract_tables_from_page(pdf_path, page_number)
    if tables is None:
        return f"The specified page {page_number + 1} does not exist in the PDF."

    # Prepare the response with formatted table output
    response = f"Tables found on page {page_number + 1}:\n"
    response += format_table_output(tables)
    return response

if __name__ == "__main__":
    # Path to your PDF file
    pdf_path = r"C:\Users\Sande\OneDrive\文档\indu\lavanya aiml\Tables.pdf"  # Replace with your PDF file path

    # Prompt the user for a query
    user_query = input("Enter your query (e.g., 'from page 3 get tables data'): ")
    response = handle_query(user_query, pdf_path)
    print(response)


Enter your query (e.g., 'from page 3 get tables data'):  page6


Tables found on page 6:

Table 1:
Year | 2010 | 2011 | 2012 | 2013 | 2014 | 2015
All Industries | 26093515 | 27535971 | 28663246 | 29601191 | 30895407 | 31397023
Manufacturing | 4992521 | 5581942 | 5841608 | 5953299 | 6047477 | 5829554
Finance,
Insurance, Real
Estate, Rental,
Leasing | 4522451 | 4618678 | 4797313 | 5031881 | 5339678 | 5597018
Arts,
Entertainment,
Recreation,
Accommodation,
and Food Service | 964032 | 1015238 | 1076249 | 1120496 | 1189646 | 1283813
Other | 15614511 | 16320113 | 16948076 | 17495515 | 18318606 | 18686638



In [2]:
pip install pdfplumber



