# Excel File Merger

This notebook provides a robust function to merge two Excel files with the same column structure.

In [ ]:
from pathlib import Path

import pandas as pd

In [ ]:
def merge_xlsx_files(file1_path, file2_path, output_path):
    """
    Merge two Excel files with same column structure.
    
    Args:
        file1_path: Path to first Excel file
        file2_path: Path to second Excel file  
        output_path: Path for merged output file
        
    Returns:
        pd.DataFrame: Merged dataframe
        
    Raises:
        FileNotFoundError: If input files don't exist
        ValueError: If files have incompatible structures
    """
    # Convert to Path objects
    file1 = Path(file1_path)
    file2 = Path(file2_path) 
    output = Path(output_path)
    
    # Validate input files exist
    if not file1.exists():
        raise FileNotFoundError(f"File not found: {file1}")
    if not file2.exists():
        raise FileNotFoundError(f"File not found: {file2}")
    
    # Create output directory if needed
    output.parent.mkdir(parents=True, exist_ok=True)
    
    try:
        # Read files
        df1 = pd.read_excel(file1)
        df2 = pd.read_excel(file2)
        
        # Check for empty dataframes
        if df1.empty and df2.empty:
            print("Warning: Both files are empty")
            merged_df = pd.DataFrame()
        elif df1.empty:
            print("Warning: First file is empty, using second file")
            merged_df = df2.copy()
        elif df2.empty:
            print("Warning: Second file is empty, using first file")
            merged_df = df1.copy()
        else:
            # Verify columns match
            if list(df1.columns) != list(df2.columns):
                raise ValueError(
                    f"Column mismatch - File 1: {list(df1.columns)}, "
                    f"File 2: {list(df2.columns)}"
                )
            
            # Merge dataframes
            merged_df = pd.concat([df1, df2], ignore_index=True)
        
        # Save result
        merged_df.to_excel(output, index=False)
        print(f"Successfully merged {len(df1)} + {len(df2)} = {len(merged_df)} rows")
        print(f"Output saved to: {output}")
        
        return merged_df
        
    except Exception as e:
        print(f"Error processing files: {e}")
        raise

## Usage Example

Update the file paths below to match your Excel files:

In [ ]:
# Update these paths to your actual files
file1_path = Path('file1.xlsx')
file2_path = Path('file2.xlsx') 
output_path = Path('merged.xlsx')

# Merge the files with error handling
try:
    merged_data = merge_xlsx_files(file1_path, file2_path, output_path)
    print("Merge completed successfully!")
except FileNotFoundError as e:
    print(f"File error: {e}")
except ValueError as e:
    print(f"Data error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

## Optional: Preview the merged data

In [ ]:
# Display first few rows of merged data (if merge was successful)
if 'merged_data' in locals() and not merged_data.empty:
    display(merged_data.head())
else:
    print("No merged data available to display")

In [ ]:
# Display basic info about the merged dataset (if available)
if 'merged_data' in locals() and not merged_data.empty:
    print(f"Shape: {merged_data.shape}")
    print(f"Columns: {list(merged_data.columns)}")
    merged_data.info()
else:
    print("No merged data available to analyze")