In [None]:
import pandas as pd
import json

In [None]:
with open("../../web/backend/data/berlin_postal_districts.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [None]:
# If the file is a GeoJSON, features are usually under 'features'
features = data
print(f"Number of features: {len(features)}")

Number of features: 225


In [None]:
# Remove postcode_geometry from each feature for inspection
for feature in features:
    if "postal_geometry" in feature:
        feature.pop("postal_geometry")
# Remove neighborhood_geometry from each feature for inspection
for feature in features:
    if "neighborhood_geometry" in feature:
        feature.pop("neighborhood_geometry")


# Remember the geometries look like this:
# "postal_geometry": {
#       "type": "MultiPolygon",
#       "coordinates": [
#         [
#           [
#             [
#               389373.0621017474,
#               5821922.280477124
#             ],
#             [
#               389357.215288683,
#               5821947.676252604
#             ],
#             [
#               389333.0156890175,
#               5821930.752676883...

In [12]:
# Display the first feature to understand the structure
features[0]


{'postal_code': '10115',
 'district': 'Mitte',
 'neighborhood': 'Mitte',
 'east_west': 'east',
 'postal_area_km2': 2.308054731806728,
 'neighborhood_area_km2': 10.638748,
 'area_ratio': 0.2169479652875252,
 'centroid_x': 390425.4069628683,
 'centroid_y': 5821466.267325718}

In [13]:
# Convert to DataFrame for easier analysis (if properties exist)
if "properties" in features[0]:
    df = pd.DataFrame([f["properties"] for f in features])
else:
    df = pd.DataFrame(features)
df.head()



Unnamed: 0,postal_code,district,neighborhood,east_west,postal_area_km2,neighborhood_area_km2,area_ratio,centroid_x,centroid_y
0,10115,Mitte,Mitte,east,2.308055,10.638748,0.216948,390425.406963,5821466.0
1,10117,Friedrichshain-Kreuzberg,Kreuzberg,west,3.439319,10.339178,0.332649,390565.623141,5819761.0
2,10119,Pankow,Prenzlauer Berg,east,0.974947,10.969869,0.088875,391826.784028,5821237.0
3,10178,Friedrichshain-Kreuzberg,Friedrichshain,east,1.857715,9.912352,0.187414,392096.474061,5820211.0
4,10179,Friedrichshain-Kreuzberg,Kreuzberg,west,2.199622,10.339178,0.212746,392529.179825,5819186.0


In [14]:
# Summarize the columns and data types
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   postal_code            225 non-null    object 
 1   district               225 non-null    object 
 2   neighborhood           225 non-null    object 
 3   east_west              225 non-null    object 
 4   postal_area_km2        225 non-null    float64
 5   neighborhood_area_km2  212 non-null    float64
 6   area_ratio             212 non-null    float64
 7   centroid_x             225 non-null    float64
 8   centroid_y             225 non-null    float64
dtypes: float64(5), object(4)
memory usage: 15.9+ KB


In [15]:
# Show basic statistics for numeric columns
df.describe(include="all")


Unnamed: 0,postal_code,district,neighborhood,east_west,postal_area_km2,neighborhood_area_km2,area_ratio,centroid_x,centroid_y
count,225.0,225,225,225,225.0,212.0,212.0,225.0,225.0
unique,225.0,13,81,2,,,,,
top,10115.0,Treptow-Köpenick,Lichterfelde,west,,,,,
freq,1.0,31,13,131,,,,,
mean,,,,,10.913758,11.85896,0.764735,391221.775171,5818376.0
std,,,,,24.441203,7.176074,1.744525,9539.565636,8578.913
min,,,,,0.445718,1.090465,0.042044,367219.91602,5793776.0
25%,,,,,1.707014,7.092692,0.180788,385465.301257,5813449.0
50%,,,,,3.454044,10.441907,0.327712,390331.213819,5818308.0
75%,,,,,8.117784,13.613629,0.735632,395919.330625,5823195.0


In [16]:
# List unique postal codes (or relevant field)
if "postal_code" in df.columns:
    print(df["postal_code"].unique())

['10115' '10117' '10119' '10178' '10179' '10243' '10245' '10247' '10249'
 '10315' '10317' '10318' '10319' '10365' '10367' '10369' '10405' '10407'
 '10409' '10435' '10437' '10439' '10551' '10553' '10555' '10557' '10559'
 '10585' '10587' '10589' '10623' '10625' '10627' '10629' '10707' '10709'
 '10711' '10713' '10715' '10717' '10719' '10777' '10779' '10781' '10783'
 '10785' '10787' '10789' '10823' '10825' '10827' '10829' '10961' '10963'
 '10965' '10967' '10969' '10997' '10999' '12043' '12045' '12047' '12049'
 '12051' '12053' '12055' '12057' '12059' '12099' '12101' '12103' '12105'
 '12107' '12109' '12157' '12159' '12161' '12163' '12165' '12167' '12169'
 '12203' '12205' '12207' '12209' '12247' '12249' '12277' '12279' '12305'
 '12307' '12309' '12347' '12349' '12351' '12353' '12355' '12357' '12359'
 '12435' '12437' '12439' '12459' '12487' '12489' '12524' '12526' '12527'
 '12529' '12555' '12557' '12559' '12587' '12589' '12619' '12621' '12623'
 '12627' '12629' '12679' '12681' '12683' '12685' '1

In [17]:
# List unique postal codes (or relevant field)
if "district" in df.columns:
    print(df["district"].unique())

['Mitte' 'Friedrichshain-Kreuzberg' 'Pankow' 'Treptow-Köpenick'
 'Lichtenberg' 'Charlottenburg-Wilmersdorf' 'Tempelhof-Schöneberg'
 'Neukölln' 'Steglitz-Zehlendorf' 'Marzahn-Hellersdorf' 'Reinickendorf'
 'Spandau' 'Brandenburg']


In [18]:
# List unique postal codes (or relevant field)
if "neighborhood" in df.columns:
    print(df["neighborhood"].unique())

['Mitte' 'Kreuzberg' 'Prenzlauer Berg' 'Friedrichshain' 'Plänterwald'
 'Lichtenberg' 'Rummelsburg' 'Köpenick' 'Moabit' 'Charlottenburg-Nord'
 'Tiergarten' 'Charlottenburg' 'Halensee' 'Wilmersdorf' 'Schmargendorf'
 'Friedenau' 'Schöneberg' 'Tempelhof' 'Neukölln' 'Alt-Treptow' 'Britz'
 'Lankwitz' 'Buckow' 'Steglitz' 'Dahlem' 'Lichterfelde' 'Lichtenrade'
 'Rudow' 'Altglienicke' 'Johannisthal' 'Baumschulenweg' 'Adlershof'
 'Grünau' 'Bohnsdorf' 'Schmöckwitz' 'Müggelheim' 'Kaulsdorf' 'Mahlsdorf'
 'Hellersdorf' 'Marzahn' 'Friedrichsfelde' 'Biesdorf' 'Weißensee'
 'Malchow' 'Fennpfuhl' 'Stadtrandsiedlung Malchow' 'Pankow'
 'Niederschönhausen' 'Französisch Buchholz' 'Wedding' 'Reinickendorf'
 'Wittenau' 'Tegel' 'Blankenfelde' 'Haselhorst' 'Wilhelmstadt' 'Spandau'
 'Staaken' 'Gatow' 'Westend' 'Grunewald' 'Wannsee' 'Zehlendorf'
 'Potsdam-Süd' 'Strausberg' 'Petershagen-Eggersdorf-Fredersdorf'
 'Rüdersdorf' 'Rahnsdorf' 'Königs Wusterhausen' 'Wildau'
 'Blankenfelde-Mahlow' 'Rangsdorf-Groß Machnow' 'R