In [1]:
import openpyxl
from openpyxl.styles import PatternFill, Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
import os
import logging

# Set up logging
logging.basicConfig(filename='excel_processing.log', level=logging.INFO)

def clean_excel_data(excel_file_path):
    try:
        wb = openpyxl.load_workbook(excel_file_path)
        ws = wb.active

        if ws.max_row == 1 and ws.max_column == 1 and ws.cell(row=1, column=1).value is None:
            print(f"Warning: Sheet is empty in {excel_file_path}. Skipping this file.")
            return

        # Insert a column at the beginning (column A)
        ws.insert_cols(1) 

        # Unmerge all cells within the sheet's dimensions
        merged_cells_copy = [
            rng for rng in ws.merged_cells.ranges 
            if rng.min_row <= ws.max_row and rng.max_col <= ws.max_column
        ]
        for merged_cell_range in merged_cells_copy:
            try:
                ws.unmerge_cells(str(merged_cell_range))
            except KeyError as e:
                print(f"Warning: KeyError encountered while unmerging cells (likely due to invalid range): {e}")
                print(f"Problematic merged cell range: {merged_cell_range}")  # Print details of the range

        # Copy row 24 and paste special to row 18
        for col_idx in range(1, ws.max_column + 1):
            ws.cell(row=18, column=col_idx).value = ws.cell(row=24, column=col_idx).value

        # Copying Project Names
        proj_name = None 
        for row_idx in range(18, ws.max_row + 1):
            crr_cell = ws.cell(row=row_idx, column=2)  # Adjusted column index due to insertion
            color_int = crr_cell.fill.start_color.index 
            if color_int == "00703084" and crr_cell.value == "Bank Account Name":
                proj_name = ws.cell(row=row_idx - 4, column=5).value  # Adjusted column index
                ws.cell(row=row_idx, column=1).value = proj_name
            elif color_int == "00FFFFFF" and crr_cell.value and ws.cell(row=row_idx, column=3).value:  # Adjusted column index
                ws.cell(row=row_idx, column=1).value = proj_name

        # Find the last row with data in column A, handle empty column case
        last_row = ws.max_row
        while last_row > 0 and ws.cell(row=last_row, column=1).value is None:
            last_row -= 1

        if last_row == 0:  # Handle empty column A
            print(f"Warning: Column A is empty in {excel_file_path}. Skipping this file.")
            return  # Skip further processing for this file

        # Set cell A18 
        cell_a18 = ws.cell(row=18, column=1)
        cell_a18.value = "Project"
        cell_a18.fill = PatternFill(start_color="00703084", end_color="00703084", fill_type="solid")
        cell_a18.font = Font(color="FFFFFF") 

        # Set cell in the next row to "END"
        ws.cell(row=last_row + 1, column=1).value = "END"

        # Deleting excess rows (iterate in reverse to avoid index issues)
        for row_idx in range(ws.max_row, 18, -1): 
            crr_cell = ws.cell(row=row_idx, column=1)
            if (
                crr_cell.value is None 
                or ws.cell(row=row_idx, column=2).value is None
                or ws.cell(row=row_idx, column=2).value == "Bank Account Name"
                or ws.cell(row=row_idx, column=2).value == "Owner Name "
            ):
                ws.delete_rows(row_idx)

        # Find the new last row after deletions
        last_row = ws.max_row
        while ws.cell(row=last_row, column=1).value is None:
            last_row -= 1

        # Delete 50 rows after the last row
        ws.delete_rows(last_row + 1, 50)

        # Find the final last row
        last_row = ws.max_row
        while ws.cell(row=last_row, column=1).value is None:
            last_row -= 1

        # Apply formatting
        range_to_format = ws["A18": get_column_letter(15) + str(last_row)]
        thin_border = Border(left=Side(style='thin'), 
                            right=Side(style='thin'), 
                            top=Side(style='thin'), 
                            bottom=Side(style='thin'))

        for row in range_to_format:
            for cell in row:
                cell.border = thin_border
                cell.alignment = Alignment(horizontal="center")

        # Autofit columns
        for col in ws.columns:
            max_length = 0
            column = col[0].column_letter 
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            ws.column_dimensions[column].width = adjusted_width

        wb.save(excel_file_path)
    except Exception as e:
        logging.error(f"Error processing {excel_file_path}: {e}")
        print(f"Error processing {excel_file_path}. See log for details.")

# Get the directory where your Jupyter Notebook is located
notebook_directory = os.path.dirname(os.path.abspath("__file__"))

# Find all Excel files in the directory
excel_files = [f for f in os.listdir(notebook_directory) if f.endswith('.xlsx') or f.endswith('.xls')]

# Process each Excel file
for excel_file in excel_files:
    excel_file_path = os.path.join(notebook_directory, excel_file)
    clean_excel_data(excel_file_path)
    print(f"Processed: {excel_file}")

Problematic merged cell range: A3987:A3988
Problematic merged cell range: A4460:A4461
Problematic merged cell range: A4760:A4761
Problematic merged cell range: A748:A749
Problematic merged cell range: A4814:A4815
Problematic merged cell range: A569:A570
Problematic merged cell range: A796:A797
Problematic merged cell range: A3339:A3340
Problematic merged cell range: A840:A841
Problematic merged cell range: A4172:A4173
Problematic merged cell range: A1951:A1952
Problematic merged cell range: A1652:A1653
Problematic merged cell range: A4824:A4825
Problematic merged cell range: A4525:A4526
Problematic merged cell range: A1662:A1663
Problematic merged cell range: A182:A183
Problematic merged cell range: A2779:A2780
Problematic merged cell range: A704:A705
Problematic merged cell range: A3769:A3770
Problematic merged cell range: A3448:A3449
Problematic merged cell range: A4133:A4134
Problematic merged cell range: A1707:A1708
Problematic merged cell range: A2392:A2393
Problematic merged cell