In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
sns.set_style('whitegrid')

In [3]:
file_path = "../data/final/merged_double_digit.csv"

In [4]:
df = pd.read_csv(file_path, index_col=None, header=0)

In [5]:
df.head()

In [6]:
df.dtypes

In [7]:
df.info()

In [8]:
df.describe()

In [9]:
duplicate_rows_df = df[df.duplicated()]
duplicate_rows_df

In [10]:
df.sample(10)

In [11]:
agg_year_region_df = df.groupby(['Year', 'Region', 'Acc-ID']).agg({'Realized': 'sum', 'Budget y': 'sum'}).reset_index()

In [12]:
for region in agg_year_region_df['Region'].unique():
    region_df = agg_year_region_df[agg_year_region_df['Region'] == region]
    plt.figure(figsize=(12, 6))
    plt.plot(region_df['Year'], region_df['Realized'], label='Realized', marker='o')
    plt.plot(region_df['Year'], region_df['Budget y'], label='Budget y', marker='x')
    
    plt.title(f"Realized vs Budget y Over Time for Region: {region}")
    plt.xlabel("Year")
    plt.ylabel("CHF")
    plt.legend()
    plt.show()

In [13]:
agg_region_slack_df = df.groupby(['Year', 'Region'])['Slack'].sum().reset_index()

In [16]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.lineplot(data=agg_region_slack_df, x='Year', y='Slack', hue='Region', marker='o')

plt.title("Total Slack of Every Region for Each Year", fontsize=16)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Total Slack", fontsize=12)

plt.ticklabel_format(style='plain', axis='y')

plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10) 

plt.show()


In [17]:
personel_expenses_df = df[df['Acc-ID'] == 30]

# Group the data by year and calculate the sum of 'Realized' and 'Budget y' for each year
grouped_data = personel_expenses_df.groupby('Year')[['Realized', 'Budget y']].sum().reset_index()

# Set the years as the x-axis labels
years = grouped_data['Year']

# Get the Realized and Budget y values for each year
realized_values = grouped_data['Realized']
budget_values = grouped_data['Budget y']

bar_width = 0.35

# Create an index for the x-axis positions of the bars
x = range(len(years))

# Create the bar plots
plt.figure(figsize=(12, 6))
plt.bar(x, realized_values, width=bar_width, label='Realized', align='center')
plt.bar([i + bar_width for i in x], budget_values, width=bar_width, label='Budget y', align='center')

# Set x-axis labels and title
plt.xlabel('Year')
plt.xticks([i + bar_width / 2 for i in x], years)
plt.ylabel('CHF')
plt.title('Comparison of Realized and Budget y by Year for Cantons of Personnel Expenses')

# Add a legend
plt.legend()

# Show the plot
plt.tight_layout()
plt.show()

In [18]:
# Get unique cantons (regions)
cantons = personel_expenses_df['Region'].unique()

# Create a bar plot for each canton separately
for canton in cantons:
    canton_data = personel_expenses_df[personel_expenses_df['Region'] == canton]
    
    # Group the data by year and calculate the sum of 'Realized' and 'Budget y' for each year
    grouped_data = canton_data.groupby('Year')[['Realized', 'Budget y']].sum().reset_index()

    # Set the years as the x-axis labels
    years = grouped_data['Year']

    # Get the Realized and Budget y values for each year
    realized_values = grouped_data['Realized']
    budget_values = grouped_data['Budget y']

    # Define the width of each bar
    bar_width = 0.35

    # Create an index for the x-axis positions of the bars
    x = range(len(years))

    # Create the bar plots
    plt.figure(figsize=(10, 6))
    plt.bar(x, realized_values, width=bar_width, label='Realized', align='center')
    plt.bar([i + bar_width for i in x], budget_values, width=bar_width, label='Budget y', align='center')

    # Set x-axis labels and title
    plt.xlabel('Year')
    plt.xticks([i + bar_width / 2 for i in x], years)
    plt.ylabel('CHF')
    plt.title(f'Comparison of Realized and Budget y by Year for {canton} in Personnel Expenses')
    
    # Add a legend
    plt.legend()

    # Show the plot for each canton separately
    plt.tight_layout()
    plt.show()