In [1]:
import os
import pandas as pd
from indoxMiner import (
    Extractor,
    ProcessingConfig,
    DocumentProcessor,
    AutoSchema,
    NerdTokenApi
)

# Replace with your actual API key
NERDTOKEN = "YOUR_NERD_TOKEN"

# Initialize Indox API and extractor configuration
indox_api = NerdTokenApi(api_key=NERDTOKEN, model='gpt-4o-mini')
config = ProcessingConfig(
    ocr_for_images=True,
    ocr_model='easyocr'  # or 'tesseract' or 'paddle'
)

# Step 1: Initialize AutoSchema for automatic field detection
auto_schema = AutoSchema()

# Step 2: Initialize the Extractor with the Indox API and AutoSchema
extractor = Extractor(llm=indox_api, schema=auto_schema)

# Step 3: Define the directory containing images and specify target images
image_directory = r'C:\My Files\IndoxM\IndoxMiner\examples'
target_images = ['invoice_Aaron Bergman_36258-1.png']
image_paths = [os.path.join(image_directory, img) for img in target_images]

# Step 4: Process the documents to extract text and metadata
processor = DocumentProcessor(image_paths)
results = processor.process(config)

# Step 5: Extract structured data from processed documents
extracted_data = extractor.extract(results)

# Step 6: Convert the extracted data to a DataFrame
df = extractor.to_dataframe(extracted_data)

# Display the final DataFrame
print("Extracted Data as DataFrame:")
print(df)


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Row added: {'invoice_number': 36258, 'date': '2012-03-06', 'ship_mode': 'First Class', 'balance_due': 50.1, 'subtotal': 48.71, 'shipping': 11.13, 'total': 50.1, 'notes': 'Thanks for your business!', 'items': [{'description': "Global Push Button Manager's Chair, Indigo", 'quantity': 1, 'rate': 48.71, 'amount': 48.71}, {'description': 'Chairs, Furniture, FUR-CH-4421', 'quantity': 1, 'rate': 48.71, 'amount': 48.71}]}
Extracted Data as DataFrame:
   invoice_number        date    ship_mode  balance_due  subtotal  shipping  \
0           36258  2012-03-06  First Class         50.1     48.71     11.13   

   total                      notes  \
0   50.1  Thanks for your business!   

                                               items  
0  [{'description': 'Global Push Button Manager's...  


In [2]:
df

Unnamed: 0,invoice_number,date,ship_mode,balance_due,subtotal,shipping,total,notes,items
0,36258,2012-03-06,First Class,50.1,48.71,11.13,50.1,Thanks for your business!,[{'description': 'Global Push Button Manager's...


In [3]:
mk = extractor.to_markdown(extracted_data)

In [4]:
print(mk)

 

 |   invoice_number | date       | ship_mode   |   balance_due |   subtotal |   shipping |   total | notes                     | items                                                                                                                                                                                                            |
|------------------|------------|-------------|---------------|------------|------------|---------|---------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|            36258 | 2012-03-06 | First Class |          50.1 |      48.71 |      11.13 |    50.1 | Thanks for your business! | [{'description': "Global Push Button Manager's Chair, Indigo", 'quantity': 1, 'rate': 48.71, 'amount': 48.71}, {'description': 'Chairs, Furniture, FUR-CH-4421', 'quantity': 1, 'rate': 48.71,

In [5]:
tb = extractor.to_table(extracted_data)

In [6]:
print(tb)

 

 +------------------+------------+-------------+---------------+------------+------------+---------+---------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|   invoice_number | date       | ship_mode   |   balance_due |   subtotal |   shipping |   total | notes                     | items                                                                                                                                                                                                            |
|            36258 | 2012-03-06 | First Class |          50.1 |      48.71 |      11.13 |    50.1 | Thanks for your business! | [{'description': "Global Push Button Manager's Chair, Indigo", 'quantity': 1, 'rate': 48.71, 'amount': 48.71}, {'description': 'Chairs, Furniture, FUR-CH-4421', 'quantity': 1, 'rate': 48.71,