In [10]:
from numpy import int64
import pandas as pd

# Read in the data from a dedicated csv file
df = pd.read_csv('./data/12411-01-01-4.csv',
                sep=';', header=0, index_col=0, usecols=[0,1,2],
                encoding="iso-8859-1", engine='python',
                converters={'regional_key': str})

# Transform the population column to numeric
df['total'] = pd.to_numeric(df['total'], errors='coerce', downcast='integer')
df = df.dropna(subset=['total'])
df['total'] = df['total'].astype(int64)

# Remove leading whitespace from area names
df['area'] = df['area'].str.lstrip()

def rearrange_city_types(df, column):
    # Function to rearrange city name and type
    def rearrange(s):
        # Check if the string contains a comma
        if ',' not in s:
            return s

        # Split the string at the rightmost comma
        parts = s.rsplit(',', 1)
        
        # Check if the second part of the string contains either "kreisfreie Stadt" or "Landkreis"
        if "kreisfreie Stadt" in parts[1] or "Landkreis" or "Kreis" in parts[1]:
            # Strip leading and trailing whitespace from both parts
            parts = [part.strip() for part in parts]
            parts = [part.title() for part in parts]
            
            # Use an f-string to format the output
            return f"{parts[1]} {parts[0]}"
        else:
            return s

    # Apply the function to the specified column
    df[column] = df[column].apply(rearrange)

# Use the function on the sample dataframe
rearrange_city_types(df, 'area')



In [11]:
df

Unnamed: 0_level_0,area,total
regional_key,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Deutschland,83237124
01,Schleswig-Holstein,2922005
01001,Kreisfreie Stadt Flensburg,91113
01002,"Kreisfreie Stadt Kiel, Landeshauptstadt",246243
01003,"Kreisfreie Stadt Lübeck, Hansestadt",216277
...,...,...
16073,Kreis Saalfeld-Rudolstadt,100969
16074,Saale-Holzland-Kreis,82513
16075,Saale-Orla-Kreis,79030
16076,Kreis Greiz,96102


In [12]:
df.loc['09663', 'total']

126933

In [14]:
df.to_csv('./data/kreise_data.csv', sep=';', encoding='utf-8')