# Enterprise Size

In [3]:
import pandas as pd
import string

sector_letters = list(string.ascii_uppercase)[:21]

dfs = []

kwartaal_map = {
    'Eerste kwartaal': 1, 
    'Tweede kwartaal': 2, 
    'Derde kwartaal': 3, 
    'Vierde kwartaal': 4
}

for letter in sector_letters:
    filename = f"Aantal bedrijven naar omvang {letter}.csv"

    df = pd.read_csv(f"Enterprise_size/{filename}", sep=';')

    cols_to_keep = [
        '1 werkzaam persoon',
        '2 tot 10 werkzame personen',
        '10 tot 50 werkzame personen',
        '50 tot 250 werkzame personen'
    ]

    # Obtain latest quarter each sector has available data (2025 Q4)
    df['Q_sort'] = df['Kwartaal'].map(kwartaal_map)
    df = df.sort_values(by=['Jaar', 'Q_sort'])    
    most_recent_row = df.iloc[[-5]].copy()
    
    subset = most_recent_row[cols_to_keep].copy()
    subset['Sector'] = letter
    
    dfs.append(subset)

all_sectors = pd.concat(dfs, ignore_index=True)

all_sectors = all_sectors.rename(columns={
    '1 werkzaam persoon': '1 worker',
    '2 tot 10 werkzame personen': '2-10 workers',
    '10 tot 50 werkzame personen': '10-50 workers',
    '50 tot 250 werkzame personen': '50-250 workers'
})

all_sectors.set_index('Sector', inplace=True)

all_sectors

Unnamed: 0_level_0,1 worker,2-10 workers,10-50 workers,50-250 workers
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,42065,35865,1785,180
B,520,70,40,35
C,61955,16795,5915,2015
D,2535,305,70,35
E,1490,360,190,90
F,235160,30060,4565,775
G,199405,81470,11930,2255
H,52830,12050,2650,725
I,44295,29640,6470,470
J,110025,14595,3195,725


In [4]:
sector_groups = {
    "Agriculture, forestry and fishing (A)": ["A"],
    "Industry and energy (B-E)": ["B", "C", "D", "E"],
    "Construction (F)": ["F"],
    "Trade, transport and hospitality (G-I)": ["G", "H", "I"],
    "Information and communication (J)": ["J"],
    "Financial services (K)": ["K"],
    "Real estate (L)": ["L"],
    "Business services (M-N)": ["M", "N"],
    "Government and care (O-Q)": ["O", "P", "Q"],
    "Culture, recreation and other (R-U)": ["R", "S", "T", "U"]
}

new_data = []

for new_name, letters in sector_groups.items():

    summed_values = all_sectors.loc[letters].sum()    
    summed_values.name = new_name
    new_data.append(summed_values)

combined_df = pd.DataFrame(new_data)
combined_df.loc['*TOTAL*'] = combined_df.sum()
combined_df['*TOTAL*'] = combined_df.sum(axis=1)

# Business economy sectors are only sectors with represented age
BE_sectors = [
    "Industry and energy (B-E)",
    "Construction (F)",
    "Trade, transport and hospitality (G-I)",
    "Information and communication (J)",
    "Real estate (L)",
    "Business services (M-N)" 
]

combined_df.loc['Business Economy (B-N, excl. K)'] = combined_df.loc[BE_sectors].sum()



combined_df

Unnamed: 0,1 worker,2-10 workers,10-50 workers,50-250 workers,*TOTAL*
"Agriculture, forestry and fishing (A)",42065,35865,1785,180,79895
Industry and energy (B-E),66500,17530,6215,2175,92420
Construction (F),235160,30060,4565,775,270560
"Trade, transport and hospitality (G-I)",296530,123160,21050,3450,444190
Information and communication (J),110025,14595,3195,725,128540
Financial services (K),33300,5680,790,245,40015
Real estate (L),29275,7990,525,180,37970
Business services (M-N),539060,60940,9900,2225,612125
Government and care (O-Q),342305,32735,5970,1665,382675
"Culture, recreation and other (R-U)",237905,26250,2630,525,267310


# Enterprise Age


In [15]:
enterprise_age = {"0-3" : 348175,
                  "3-5" : 261480,
                  "5-10" : 389545,
                  "10+" : 618635}

1617835