In [3]:
import pandas as pd
import os

# Define the file paths
data_folder = "data"
file1 = os.path.join(data_folder, "data.xlsx")
file2 = os.path.join(data_folder, "data-2.xlsx")

# Read the Excel files into DataFrames
try:
    df1 = pd.read_excel(file1)
    print(f"Successfully loaded {file1} into a DataFrame.")
except FileNotFoundError:
    print(f"Error: {file1} not found.")
    df1 = None

try:
    df2 = pd.read_excel(file2)
    print(f"Successfully loaded {file2} into a DataFrame.")
except FileNotFoundError:
    print(f"Error: {file2} not found.")
    df2 = None

# Merge the DataFrames if both are loaded
if df1 is not None and df2 is not None:
    # Find common columns between the two DataFrames
    common_columns = list(set(df1.columns).intersection(set(df2.columns)))

    # Merge the DataFrames on the common columns
    merged_df = pd.concat([df1, df2], ignore_index=True)

    # Drop duplicate rows based on the key columns
    merged_df = merged_df.drop_duplicates(subset=['BudgetCode', 'ProjectName', 'DecisionMoment'])

    # Ensure no duplicate headers in the merged DataFrame
    merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]

    print("\nMerged DataFrame:")
    print(merged_df.head())

    # Optional: Save the merged DataFrame to a CSV file
    merged_df.to_csv(os.path.join(data_folder, "merged_data.csv"), index=False)
    print(f"Merged DataFrame saved as merged_data.csv in {data_folder}.")
else:
    print("One or both DataFrames could not be loaded. Merging skipped.")

Successfully loaded data\data.xlsx into a DataFrame.
Successfully loaded data\data-2.xlsx into a DataFrame.

Merged DataFrame:
  BudgetCode                            ProjectName costCenter DecisionMoment  \
0      CM109                EPREP REGIONAL CAMEROUN      EPREP        2022-09   
1      CM109                EPREP REGIONAL CAMEROUN      EPREP        2022-10   
2      CM109                EPREP REGIONAL CAMEROUN      EPREP        2022-11   
3      CM109                EPREP REGIONAL CAMEROUN      EPREP        2022-13   
4      KE202  SUPPLY KENYA INTERSECTION DEPARTEMENT     Coordo        2022-09   

  Period            LastUpdate OriginCostCode whatLVL1Desc whatCode  \
0    NaN                   NaN            FIN    EQUIPMENT    AADAP   
1    NaN                   NaN            FIN    EQUIPMENT    AADAP   
2    NaN                   NaN           UNIF    EQUIPMENT    AADAP   
3    NaN  Last Update POST-MYR            FIN    EQUIPMENT    AADAP   
4    NaN                   NaN 

In [None]:
{
    "cells": [
        {
            "cell_type": "code",
            "metadata": {
                "id": "2549644f",
                "language": "python"
            },
            "source": [
                "import pandas as pd",
                "import os",
                "",
                "# Define the file paths",
                "data_folder = \"data\"",
                "file1 = os.path.join(data_folder, \"data.xlsx\")",
                "file2 = os.path.join(data_folder, \"data-2.xlsx\")",
                "",
                "# Read the Excel files into DataFrames",
                "try:",
                "    df1 = pd.read_excel(file1)",
                "    print(f\"Successfully loaded {file1} into a DataFrame.\")",
                "except FileNotFoundError:",
                "    print(f\"Error: {file1} not found.\")",
                "    df1 = None",
                "",
                "try:",
                "    df2 = pd.read_excel(file2)",
                "    print(f\"Successfully loaded {file2} into a DataFrame.\")",
                "except FileNotFoundError:",
                "    print(f\"Error: {file2} not found.\")",
                "    df2 = None",
                "",
                "# Merge the DataFrames if both are loaded",
                "if df1 is not None and df2 is not None:",
                "    # Find common columns between the two DataFrames",
                "    common_columns = list(set(df1.columns).intersection(set(df2.columns)))",
                "",
                "    # Merge the DataFrames on the common columns",
                "    merged_df = pd.concat([df1, df2], ignore_index=True)",
                "",
                "    # Drop duplicate rows based on the key columns",
                "    merged_df = merged_df.drop_duplicates(subset=['BudgetCode', 'ProjectName', 'DecisionMoment'])",
                "",
                "    print(\"\\nMerged DataFrame:\")",
                "    print(merged_df.head())",
                "",
                "    # Optional: Save the merged DataFrame to a CSV file",
                "    merged_df.to_csv(os.path.join(data_folder, \"merged_data.csv\"), index=False)",
                "    print(f\"Merged DataFrame saved as merged_data.csv in {data_folder}.\")",
                "else:",
                "    print(\"One or both DataFrames could not be loaded. Merging skipped.\")"
            ]
        }
    ]
}