In [24]:
import pandas as pd

2022 source file spec

In [25]:
# Define the file path
file_path = r'C:\Users\domin\Documents\GitHub\xai_budgeting\data\raw\kdkf_2022_raw.xlsx'

# Load the Excel file to get sheet names
xls = pd.ExcelFile(file_path)

# Define the years
previous_year = 2021
start_year = 2022
target_year = 2023

# Set the pandas option to display floating point numbers using the decimal format
pd.options.display.float_format = '{:.5f}'.format

Master Loop

In [26]:
# Initialize an empty DataFrame for the master data
master_2022 = pd.DataFrame()

# Loop over each sheet in the Excel file
for sheet_name in xls.sheet_names:
    # Check if the sheet name starts with "HRM2_KT_"
    if sheet_name.startswith("HRM2_KT_"):
        # Load the sheet into a DataFrame, skipping the first three rows
        df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=3)

        # Merge the first row with the column headers
        df.columns = df.iloc[0, :2].tolist() + (df.columns[2:]).tolist()
        df = df.iloc[1:].reset_index(drop=True)

        # Update Headers of the df
        rename_dict = {
            'Unnamed: 2': 'in 1000 CHF',
            'Rechnung': f'Rechnung {previous_year}',
            'Budget*': f'Budget {start_year}',
            'Anpassung Budget**': f'Anpassung Budget {start_year}',
            'Budgetplus***': f'Budgetplus {start_year}',
            'Rechnung.1': f'Rechnung {start_year}',
            'Budget': f'Budget {target_year}'
        }
        df.rename(columns=rename_dict, inplace=True)

        # Filter the DataFrame to keep only rows where 'Referenz-ID' contains 'HRM2'
        df = df[df['Referenz-ID'].str.contains('HRM2', na=False)]

        # Add a new column with the DataFrame name (sheet name)
        df['Kanton'] = sheet_name

        # Use pd.concat to add the DataFrame to the master DataFrame
        master_2022 = pd.concat([master_2022, df], ignore_index=True)

# Display the first few rows of the master DataFrame
master_2022.head()

Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023,Kanton,Compte,Adaptation Budget**,Compte.1
0,HRM2_ER0030,30,Personalaufwand,26152620.02857,26630564.07622,308222.46998,26938786.5462,26985594.56335,28032655.05247,HRM2_KT_alle,,,
1,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,10287100.48293,10415701.9807,648662.0455,11064364.0262,10806538.02481,10798381.31853,HRM2_KT_alle,,,
2,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,1138027.15383,1111854.518,19209.295,1131063.813,1118726.4112,1149341.785,HRM2_KT_alle,,,
3,HRM2_ER0318,davon 3180,Wertberichtigungen auf Forderungen,753.92456,52483.266,3469.8,55953.066,108041.00892,60863.81,HRM2_KT_alle,,,
4,HRM2_neu_ER0033,33,Abschreibungen VV,2829988.7358,2898268.65011,5258.4,2903527.05011,2839873.69033,2926517.17731,HRM2_KT_alle,,,


Translating FR columns to DE

In [27]:
# # Display rows where the column 'Kanton' is 'HRM2_KT_VD'
# filtered_rows = master_2022[master_2022['Kanton'] == 'HRM2_KT_VD']

# # Display the filtered rows
# filtered_rows.head()

Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023,Kanton,Compte,Adaptation Budget**,Compte.1
4531,HRM2_ER0030,30,Charges de personnel,,2684390.2,,2722671.4,,2780493.2,HRM2_KT_VD,2611669.4,38281.2,2669600.8
4532,HRM2_ER0031,31,Charges de biens et services et autres charges...,,755688.7,,800717.4,,762133.1,HRM2_KT_VD,719110.5,45028.7,749076.9
4533,HRM2_ER0314,de cela 314,Gros entretien et entretien courant,,70895.7,,77325.6,,73206.9,HRM2_KT_VD,76538.7,6429.9,75891.4
4534,HRM2_ER0318,de cela 3180,Réévaluations sur créances,,0.0,,0.0,,9.0,HRM2_KT_VD,-21734.0,0.0,-10857.0
4535,HRM2_neu_ER0033,33,Amortissements du patrimoine administratif,,188613.6,,188613.6,,217926.5,HRM2_KT_VD,158472.0,0.0,184990.8


In [28]:
# Update 'master_2022' DataFrame as per the specified conditions

# 1. Update 'Rechnung 2021' based on 'Compte'
master_2022['Rechnung 2021'] = master_2022.apply(
    lambda row: row['Compte'] if pd.isna(row['Rechnung 2021']) and not pd.isna(row['Compte']) else row['Rechnung 2021'],
    axis=1
)

# 2. Update 'Anpassung Budget 2022' based on 'Adaptation Budget**'
master_2022['Anpassung Budget 2022'] = master_2022.apply(
    lambda row: row['Adaptation Budget**'] if pd.isna(row['Anpassung Budget 2022']) and not pd.isna(row['Adaptation Budget**']) else row['Anpassung Budget 2022'],
    axis=1
)

# 3. Update 'Rechnung 2022' based on 'Compte.1'
master_2022['Rechnung 2022'] = master_2022.apply(
    lambda row: row['Compte.1'] if pd.isna(row['Rechnung 2022']) and not pd.isna(row['Compte.1']) else row['Rechnung 2022'],
    axis=1
)

# Display the first few rows of the updated DataFrame
master_2022.head()


Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023,Kanton,Compte,Adaptation Budget**,Compte.1
0,HRM2_ER0030,30,Personalaufwand,26152620.02857,26630564.07622,308222.46998,26938786.5462,26985594.56335,28032655.05247,HRM2_KT_alle,,,
1,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,10287100.48293,10415701.9807,648662.0455,11064364.0262,10806538.02481,10798381.31853,HRM2_KT_alle,,,
2,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,1138027.15383,1111854.518,19209.295,1131063.813,1118726.4112,1149341.785,HRM2_KT_alle,,,
3,HRM2_ER0318,davon 3180,Wertberichtigungen auf Forderungen,753.92456,52483.266,3469.8,55953.066,108041.00892,60863.81,HRM2_KT_alle,,,
4,HRM2_neu_ER0033,33,Abschreibungen VV,2829988.7358,2898268.65011,5258.4,2903527.05011,2839873.69033,2926517.17731,HRM2_KT_alle,,,


In [29]:
# # Display rows where the column 'Kanton' is 'HRM2_KT_VD'
# filtered_rows = master_2022[master_2022['Kanton'] == 'HRM2_KT_VD']

# # Display the filtered rows
# filtered_rows.head()

Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023,Kanton,Compte,Adaptation Budget**,Compte.1
4531,HRM2_ER0030,30,Charges de personnel,2611669.4,2684390.2,38281.2,2722671.4,2669600.8,2780493.2,HRM2_KT_VD,2611669.4,38281.2,2669600.8
4532,HRM2_ER0031,31,Charges de biens et services et autres charges...,719110.5,755688.7,45028.7,800717.4,749076.9,762133.1,HRM2_KT_VD,719110.5,45028.7,749076.9
4533,HRM2_ER0314,de cela 314,Gros entretien et entretien courant,76538.7,70895.7,6429.9,77325.6,75891.4,73206.9,HRM2_KT_VD,76538.7,6429.9,75891.4
4534,HRM2_ER0318,de cela 3180,Réévaluations sur créances,-21734.0,0.0,0.0,0.0,-10857.0,9.0,HRM2_KT_VD,-21734.0,0.0,-10857.0
4535,HRM2_neu_ER0033,33,Amortissements du patrimoine administratif,158472.0,188613.6,0.0,188613.6,184990.8,217926.5,HRM2_KT_VD,158472.0,0.0,184990.8
