In [6]:
import os
import openpyxl
import glob

def extract_number_from_merged_cell(file_path):
    try:
        wb = openpyxl.load_workbook(file_path, data_only=True)
        # Assuming the data is in the first sheet; modify if necessary
        ws = wb.active

        # Columns AD to AI correspond to columns 30 to 35
        target_columns = list(range(30, 36))  # Excel columns are 1-indexed

        # Iterate through all merged cell ranges
        for merged_range in ws.merged_cells.ranges:
            # Get bounds of the merged cell
            min_col, min_row, max_col, max_row = merged_range.bounds

            # Check if the merged cell spans exactly columns AD to AI
            if min_col == 30 and max_col == 35:
                # Ensure it spans only one row (assuming "the only merged cell" implies single row)
                if min_row == max_row:
                    cell = ws.cell(row=min_row, column=min_col)
                    value = cell.value
                    if isinstance(value, (int, float)):
                        return value
                    else:
                        # Try to convert to float if it's not already a number
                        try:
                            return float(value)
                        except (ValueError, TypeError):
                            print(f"Non-numeric value in file {file_path}: {value}")
                            return None
        # If no matching merged cell is found
        print(f"No matching merged cell found in file: {file_path}")
        return None
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def process_folder(folder_path):
    # Use glob to find all Excel files in the folder (including .xlsx and .xlsm)
    excel_files = glob.glob(os.path.join(folder_path, "*.xlsx")) + glob.glob(os.path.join(folder_path, "*.xlsm")) + glob.glob(os.path.join(folder_path, "*.xls"))
    
    # Dictionary to store results
    results = {}

    for file in excel_files:
        number = extract_number_from_merged_cell(file)
        if number is not None:
            results[os.path.basename(file)] = number
        else:
            results[os.path.basename(file)] = "No valid merged cell found or non-numeric value"

    return results

if __name__ == "__main__":
    # Specify the folder path containing Excel files
    folder = r"C:\Users\kings\Downloads\Invoices"  # Replace with your folder path

    # Process the folder and get results
    extracted_data = process_folder(folder)

    # Print the results
    for file, value in extracted_data.items():
        print(f"{file}: {value}")

    # Optionally, write results to a new Excel or CSV file
    # Example: Writing to a CSV file
    import csv
    output_csv = "extracted_numbers.csv"
    with open(output_csv, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["File Name", "Extracted Number"])
        for file, value in extracted_data.items():
            writer.writerow([file, value])
    
    print(f"Extraction complete. Results saved to {output_csv}")

No matching merged cell found in file: C:\Users\kings\Downloads\Invoices\Invoice I.23.0005a Soho vintage furniture repair -  HPG.xlsx
Error processing file C:\Users\kings\Downloads\Invoices\I.20.0001 The Aberdeen Marina Club (Buru Umbrella Protective Cover).xls: openpyxl does not support the old .xls file format, please use xlrd to read this file, or convert it to the more recent .xlsx file format.
Error processing file C:\Users\kings\Downloads\Invoices\I.20.0002 Hong Kong Golf & Tennis Academy (Sofa Cushion Cover).xls: openpyxl does not support the old .xls file format, please use xlrd to read this file, or convert it to the more recent .xlsx file format.
Error processing file C:\Users\kings\Downloads\Invoices\I.20.0003 Eclipse Management Ltd (Repair Awning - McSorley's DB).xls: openpyxl does not support the old .xls file format, please use xlrd to read this file, or convert it to the more recent .xlsx file format.
Error processing file C:\Users\kings\Downloads\Invoices\I.20.0004 Thic