# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [58]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt
import plotly.express as px

### Create Database Connection


In [59]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [60]:
%sql SHOW ALL TABLES; # shows all available tables

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,data_systems_project,public,grond_data,"[geometry, id, locatie, amNummer, typeOnderzoe...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False
1,data_systems_project,public,service_areas,"[H_Verzorgingsgebied_ID, Verzorgingsgebied, LA...","[BIGINT, VARCHAR, DOUBLE, DOUBLE, VARCHAR]",False
2,data_systems_project,public,storm_deployments,"[Deployment_ID, Incident_ID, Vehicle_Type, Veh...","[BIGINT, BIGINT, VARCHAR, VARCHAR, VARCHAR, VA...",False
3,data_systems_project,public,storm_incidents,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, TIMESTAMP_MS, TIME, TIME, TIME, DOUBL...",False
4,data_systems_project,public,tree_data,"[geometry, id, gbdBuurtId, typeBeheerderPlus, ...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False


##### Drop Tables


In [61]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE public.cbs_wijken;

##### Retrieve Tables as Polars DataFrame


In [62]:
cbs_buurten = conn.execute(
    """
    SELECT * FROM public.cbs_buurten """
).pl()

tree_data = conn.execute(
    """
    SELECT * FROM public.tree_data
    """
).pl()

grond_data = conn.execute(
    """
    SELECT * FROM public.tree_data
    """
).pl()


# Close the database connection
conn.close()

In [63]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def convert_to_polars(gdf: gpd.GeoDataFrame) -> pl.DataFrame:
    """
    Convert a GeoDataFrame to a Polars DataFrame, converting geometries to WKB strings.
    """
    # If geometry conversion is necessary, uncomment the following line
    # gdf["geometry"] = gdf["geometry"].apply(lambda geom: wkb_dumps(geom, hex=True))
    gdf["geometry"] = gdf["geometry"].apply(wkt.dumps)

    # Convert to Polars DataFrame
    return pl.from_pandas(gdf)

In [64]:
# Filter out Totals
cbs_buuurten = cbs_buurten.filter(pl.col("buurtnaam") != " ")
gdf_buurten = convert_to_geodf(cbs_buuurten)
gdf_buurten.head()

Unnamed: 0,geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,...,percentageUitMarokko,percentageUitNederlandseAntillenEnAruba,percentageUitSuriname,percentageUitTurkije,percentageOverigeNietwestersemigratieachtergrond,oppervlakteTotaalInHa,oppervlakteLandInHa,oppervlakteWaterInHa,jrstatcode,jaar
0,"MULTIPOLYGON (((4.82752 52.43741, 4.83173 52.4...",BU04792130,Het Eiland,WK047921,GM0479,Zaanstad,1,NEE,1506,1,...,1,1,4,3,5,28,13,15,2022BU04792130,2022
1,"MULTIPOLYGON (((4.77229 52.50569, 4.77270 52.5...",BU04796120,Noorderhoofdbuurt,WK047961,GM0479,Zaanstad,1,NEE,1561,1,...,2,0,2,3,4,46,41,5,2022BU04796120,2022
2,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,1,1,2,1,32,55,55,0,2022BU03620401,2022
3,"MULTIPOLYGON (((4.87899 52.37831, 4.87869 52.3...",BU03630604,Zaagpoortbuurt,WK036306,GM0363,Amsterdam,1,NEE,1015,1,...,4,2,5,2,14,4,4,0,2022BU03630604,2022
4,"MULTIPOLYGON (((4.86733 52.39319, 4.86742 52.3...",BU03631305,Spaarndammerbuurt Noordwest,WK036313,GM0363,Amsterdam,1,NEE,1013,1,...,9,1,9,3,13,12,12,0,2022BU03631305,2022


In [65]:
gdf_tree_data = convert_to_geodf(tree_data)
gdf_tree_data.head()

Unnamed: 0,geometry,id,gbdBuurtId,typeBeheerderPlus,boomhoogteklasseActueel,typeEigenaarPlus,jaarVanAanleg,soortnaam,stamdiameterklasse,standplaatsGedetailleerd,typeObject,typeSoortnaam,soortnaamKort,soortnaamTop
0,POINT (4.90467 52.33981),919933,3630980000301,Stadsdeel Zuid,e. 15 tot 18 m.,Gemeente Amsterdam,1948,Tilia americana,,,Boom niet vrij uitgroeiend,Bomen,Tilia,Linde (Tilia)
1,POINT (4.90269 52.34009),919934,3630980000301,Stadsdeel Zuid,c. 9 tot 12 m.,Gemeente Amsterdam,1978,Ulmus hollandica 'Vegeta',,Tegels,Boom niet vrij uitgroeiend,Bomen,Ulmus,Iep (Ulmus)
2,POINT (4.85521 52.33198),919935,3630980000311,Stadsdeel Zuid,c. 9 tot 12 m.,Gemeente Amsterdam,1990,Fraxinus excelsior 'Westhof's Glorie',"0,2 tot 0,3 m.",,Boom niet vrij uitgroeiend,Bomen,Fraxinus,Es (Fraxinus)
3,POINT (4.90367 52.34884),919936,3630980000297,Stadsdeel Zuid,b. 6 tot 9 m.,Gemeente Amsterdam,2002,Ulmus glabra 'Lutescens',,,Boom niet vrij uitgroeiend,Bomen,Ulmus,Iep (Ulmus)
4,POINT (4.87589 52.34106),919937,3630980000306,Stadsdeel Zuid,b. 6 tot 9 m.,Gemeente Amsterdam,1985,Quercus robur,,,Boom niet vrij uitgroeiend,Bomen,Quercus,Eik (Quercus)


In [66]:
# Plotting
fig = px.scatter_mapbox(
    gdf_tree_data,
    lat=gdf_tree_data.geometry.y,
    lon=gdf_tree_data.geometry.x,
    color="soortnaamTop",
    size_max=15,
    zoom=10,
)
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [70]:
# Do a spatial join
result = gdf_buurten.sjoin(gdf_tree_data)

# Convert tot Polars
result = convert_to_polars(result)
result.head()


Geometry column does not contain geometry.



geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,omgevingsadressendichtheid,stedelijkheidAdressenPerKm2,bevolkingsdichtheidInwonersPerKm2,aantalInwoners,mannen,vrouwen,percentagePersonen0Tot15Jaar,percentagePersonen15Tot25Jaar,percentagePersonen25Tot45Jaar,percentagePersonen45Tot65Jaar,percentagePersonen65JaarEnOuder,percentageOngehuwd,percentageGehuwd,percentageGescheid,percentageVerweduwd,aantalHuishoudens,percentageEenpersoonshuishoudens,percentageHuishoudensZonderKinderen,percentageHuishoudensMetKinderen,gemiddeldeHuishoudsgrootte,percentageWesterseMigratieachtergrond,percentageNietWesterseMigratieachtergrond,percentageUitMarokko,percentageUitNederlandseAntillenEnAruba,percentageUitSuriname,percentageUitTurkije,percentageOverigeNietwestersemigratieachtergrond,oppervlakteTotaalInHa,oppervlakteLandInHa,oppervlakteWaterInHa,jrstatcode,jaar,index_right,id,gbdBuurtId,typeBeheerderPlus,boomhoogteklasseActueel,typeEigenaarPlus,jaarVanAanleg,soortnaam,stamdiameterklasse,standplaatsGedetailleerd,typeObject,typeSoortnaam,soortnaamKort,soortnaamTop
str,str,str,str,str,str,i64,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,i64,i64,i64,str,str,str,str,i64,str,str,str,str,str,str,str
"""MULTIPOLYGON (…","""BU03639000""","""Gelderlandplei…","""WK036390""","""GM0363""","""Amsterdam""",1,"""NEE""","""1082""",1,4279,1,10383,4730,2195,2535,11,11,36,18,23,57,28,8,7,2865,58,24,18,1.7,33,28,3,1,3,2,18,46,46,1,"""2022BU03639000…",2022,538,920476,"""03630980000314…","""Stadsdeel Zuid…","""e. 15 tot 18 m…","""Gemeente Amste…",1991,"""Platanus hispa…",,,"""Boom niet vrij…","""Bomen""","""Platanus""","""Plataan (Plata…"
"""MULTIPOLYGON (…","""BU03639000""","""Gelderlandplei…","""WK036390""","""GM0363""","""Amsterdam""",1,"""NEE""","""1082""",1,4279,1,10383,4730,2195,2535,11,11,36,18,23,57,28,8,7,2865,58,24,18,1.7,33,28,3,1,3,2,18,46,46,1,"""2022BU03639000…",2022,522,920459,"""03630980000314…","""Stadsdeel Zuid…","""c. 9 tot 12 m.…","""Gemeente Amste…",1988,"""Platanus hispa…",,,"""Boom niet vrij…","""Bomen""","""Platanus""","""Plataan (Plata…"
"""MULTIPOLYGON (…","""BU03639000""","""Gelderlandplei…","""WK036390""","""GM0363""","""Amsterdam""",1,"""NEE""","""1082""",1,4279,1,10383,4730,2195,2535,11,11,36,18,23,57,28,8,7,2865,58,24,18,1.7,33,28,3,1,3,2,18,46,46,1,"""2022BU03639000…",2022,479,920415,"""03630980000314…","""Stadsdeel Zuid…","""c. 9 tot 12 m.…","""Gemeente Amste…",1988,"""Platanus hispa…",,,"""Boom niet vrij…","""Bomen""","""Platanus""","""Plataan (Plata…"
"""MULTIPOLYGON (…","""BU03639000""","""Gelderlandplei…","""WK036390""","""GM0363""","""Amsterdam""",1,"""NEE""","""1082""",1,4279,1,10383,4730,2195,2535,11,11,36,18,23,57,28,8,7,2865,58,24,18,1.7,33,28,3,1,3,2,18,46,46,1,"""2022BU03639000…",2022,561,920499,"""03630980000314…","""Stadsdeel Zuid…","""d. 12 tot 15 m…","""Gemeente Amste…",1970,"""Tilia europaea…",,,"""Boom niet vrij…","""Bomen""","""Tilia""","""Linde (Tilia)"""
"""MULTIPOLYGON (…","""BU03639000""","""Gelderlandplei…","""WK036390""","""GM0363""","""Amsterdam""",1,"""NEE""","""1082""",1,4279,1,10383,4730,2195,2535,11,11,36,18,23,57,28,8,7,2865,58,24,18,1.7,33,28,3,1,3,2,18,46,46,1,"""2022BU03639000…",2022,478,920414,"""03630980000314…","""Stadsdeel Zuid…","""c. 9 tot 12 m.…","""Gemeente Amste…",1970,"""Tilia europaea…",,"""Tegels""","""Boom niet vrij…","""Bomen""","""Tilia""","""Linde (Tilia)"""


In [None]:
result.group_by("buurtcode").agg()