In [111]:
import os
import pandas as pd
import numpy as np

# Import Files

### View the various tabs of the files

In [112]:
def list_tabs_in_excel_files(folder_path):
    # List to store the names of Excel files and their tabs
    excel_files_and_tabs = []

    # Iterate over the files in the specified folder
    for file in os.listdir(folder_path):
        # Check for both .xlsx and .xls files
        if file.endswith(('.xlsx', '.xls')):
            file_path = os.path.join(folder_path, file)
            try:
                # Load the Excel file
                xls = pd.ExcelFile(file_path)
                # Store the file name and its tabs
                excel_files_and_tabs.append((file, xls.sheet_names))
            except Exception as e:
                print(f"Error processing file {file}: {e}")

    return excel_files_and_tabs

In [113]:
# Specify folder location with .xls and .xlsx files
folder_path = r'C:\Users\domin\Documents\GitHub\xai_budgeting\data\raw'  # Use a raw string or properly escaped path
tabs_in_files = list_tabs_in_excel_files(folder_path)

# Print the list of files and their tabs
for file_name, tabs in tabs_in_files:
    print(f"File: {file_name}, Tabs: {tabs}")


File: kdkf_2001-2002_raw.xls, Tabs: ['ZH', 'BE', 'LU', 'UR', 'SZ', 'OW', 'NW', 'GL', 'ZG', 'FR', 'SO', 'BS', 'BL', 'SH', 'AR', 'AI', 'SG', 'GR', 'AG', 'TG', 'TI', 'VD', 'VS', 'NE', 'GE', 'JU', 'CHF', 'CHD', 'Abschlusszahlen Budget 2001', 'Abschlusszahlen Rechnung 2001', 'Abschlusszahlen Budgets 2002', 'Abschlusszahlen Rechnung 2002', 'Übersicht Saldo L. R. ', 'Finanzierungsfehlbetrag', 'Selbstfinanzierungsgrad', 'Erläuterung Kennzahlen']
File: kdkf_2003-2004_raw.xls, Tabs: ['ZH', 'BE', 'LU', 'UR', 'SZ', 'OW', 'NW', 'GL', 'ZG', 'FR', 'SO', 'BS', 'BL', 'SH', 'AR', 'AI', 'SG', 'GR', 'AG', 'TG', 'TI', 'VD', 'VS', 'NE', 'GE', 'JU', 'CHF', 'CHD', 'AbsschlusszahlenBudget 2003', 'Abschlusszahlen Rechnung 03', 'Abschlusszahlen Budgets 2004', 'Abschlusszahlen Rechnung 2004', 'Übersicht Saldo L. R. ', 'Finanzierungsfehlbetrag', 'Selbstfinanzierungsgrad', 'Erläuterung Kennzahlen']
File: kdkf_2005-2006_raw.xls, Tabs: ['ZH', 'BE', 'LU', 'UR', 'SZ', 'OW', 'NW', 'GL', 'ZG', 'FR', 'SO', 'BS', 'BL', 'SH

### Load Testfile and extract data

In [114]:
# Define the file path and the sheet name
file_path = r'C:\Users\domin\Documents\GitHub\xai_budgeting\data\raw\kdkf_2022_raw.xlsx'  # Use a raw string for the file path
sheet_name = 'HRM2_KT_AG'

# Load the specified sheet into a DataFrame
hrm2_kt_ag = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=3) # Skip the first three rows as these are solely headers

# Display the first few rows of the DataFrame
hrm2_kt_ag.head()


Unnamed: 0.1,Unnamed: 0,Aargau,Unnamed: 2,Rechnung,Budget*,Anpassung Budget**,Budgetplus***,Rechnung.1,Budget
0,Referenz-ID,HRM 2,in 1 000 Franken,2021.0,2022.0,2022.0,2022.0,2022.0,2023.0
1,ER,ERFOLGSRECHNUNG,,,,,,,
2,HRM2_ER0030,30,Personalaufwand,1710857.49536,1824800.673,47003.566,1871804.239,1769407.06614,1913008.922
3,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,465105.56175,486337.94,66409.523,552747.463,471235.72459,540895.2865
4,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,40709.52698,38381.362,1000.0,39381.362,36143.20582,38863.247


In [115]:
# Set the pandas option to display floating point numbers using the decimal format
pd.options.display.float_format = '{:.5f}'.format

In [116]:
# Merge the first row with the column headers in a dataframe
hrm2_kt_ag.columns = hrm2_kt_ag.iloc[0, :2].tolist() + (hrm2_kt_ag.columns[2:]).tolist()
hrm2_kt_ag = hrm2_kt_ag.iloc[1:].reset_index(drop=True) 
hrm2_kt_ag.head()


Unnamed: 0,Referenz-ID,HRM 2,Unnamed: 2,Rechnung,Budget*,Anpassung Budget**,Budgetplus***,Rechnung.1,Budget
0,ER,ERFOLGSRECHNUNG,,,,,,,
1,HRM2_ER0030,30,Personalaufwand,1710857.49536,1824800.673,47003.566,1871804.239,1769407.06614,1913008.922
2,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,465105.56175,486337.94,66409.523,552747.463,471235.72459,540895.2865
3,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,40709.52698,38381.362,1000.0,39381.362,36143.20582,38863.247
4,HRM2_ER0318,davon 3180,Wertberichtigungen auf Forderungen,1524.78485,1550.0,0.0,1550.0,-1948.07897,1483.5


In [119]:
# Update Headers of the df

# Define the years
previous_year = 2021
start_year = 2022
target_year = 2023

# Mapping of old column names to new column names
rename_dict = {
    'Unnamed: 2': 'in 1000 CHF',
    'Rechnung': f'Rechnung {previous_year}',
    'Budget*': f'Budget {start_year}',
    'Anpassung Budget**': f'Anpassung Budget {start_year}',
    'Budgetplus***': f'Budgetplus {start_year}',
    'Rechnung.1': f'Rechnung {start_year}',
    'Budget': f'Budget {target_year}'
}

# Rename the columns
hrm2_kt_ag.rename(columns=rename_dict, inplace=True)

# Display the first few rows of the DataFrame with updated column names
hrm2_kt_ag.head()



Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023
0,ER,ERFOLGSRECHNUNG,,,,,,,
1,HRM2_ER0030,30,Personalaufwand,1710857.49536,1824800.673,47003.566,1871804.239,1769407.06614,1913008.922
2,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,465105.56175,486337.94,66409.523,552747.463,471235.72459,540895.2865
3,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,40709.52698,38381.362,1000.0,39381.362,36143.20582,38863.247
4,HRM2_ER0318,davon 3180,Wertberichtigungen auf Forderungen,1524.78485,1550.0,0.0,1550.0,-1948.07897,1483.5


In [121]:
# Filter the DataFrame to keep only rows where 'Referenz-ID' contains 'HRM2'
hrm2_kt_ag = hrm2_kt_ag[hrm2_kt_ag['Referenz-ID'].str.contains('HRM2', na=False)]

# Display the first few rows of the DataFrame to verify the changes
hrm2_kt_ag

Unnamed: 0,Referenz-ID,HRM 2,in 1000 CHF,Rechnung 2021,Budget 2022,Anpassung Budget 2022,Budgetplus 2022,Rechnung 2022,Budget 2023
1,HRM2_ER0030,30,Personalaufwand,1710857.49536,1824800.67300,47003.56600,1871804.23900,1769407.06614,1913008.92200
2,HRM2_ER0031,31,Sach- und übriger Betriebsaufwand,465105.56175,486337.94000,66409.52300,552747.46300,471235.72459,540895.28650
3,HRM2_ER0314,davon 314,baulicher und betrieblicher Unterhalt,40709.52698,38381.36200,1000.00000,39381.36200,36143.20582,38863.24700
4,HRM2_ER0318,davon 3180,Wertberichtigungen auf Forderungen,1524.78485,1550.00000,0.00000,1550.00000,-1948.07897,1483.50000
5,HRM2_neu_ER0033,33,Abschreibungen VV,264389.83000,201998.18638,0.00000,201998.18638,176835.45829,162266.45182
...,...,...,...,...,...,...,...,...,...
205,HRM2_KZ18_12,HRM2-Tabelle 18.12,Gesamteinnahmen,5749681.16529,5424135.51185,0.00000,5468190.44485,5826118.76115,5389544.72900
206,HRM2_KZ18_15,HRM2-Tabelle 18.15,Laufende Ausgaben,5072349.35397,5066781.14300,0.00000,5223709.47600,5294503.70695,5361025.20850
207,HRM2_KZ18_11,HRM2-Tabelle 18.11,Gesamtausgaben,5390875.82542,5422015.95805,0.00000,5586768.01205,5584185.98046,5666084.24485
208,HRM2_KZEFRLZ,,Ergebnis Finanzrechnung Laufende Zahlungen,594052.34468,273521.21085,0.00000,160647.81085,452644.19696,-41249.59750
