# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [37]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt

### Create Database Connection


In [38]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [39]:
%sql SHOW ALL TABLES; # shows all available tables

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,data_systems_project,public,fire_stations_and_vehicles,"[Fire_Station, Vehicle, Vehicle_Type]","[VARCHAR, VARCHAR, VARCHAR]",False
1,data_systems_project,public,grond_data,"[geometry, id, locatie, amNummer, typeOnderzoe...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False
2,data_systems_project,public,service_areas,"[H_Verzorgingsgebied_ID, Verzorgingsgebied, LA...","[BIGINT, VARCHAR, DOUBLE, DOUBLE, VARCHAR]",False
3,data_systems_project,public,storm_deployments,"[Deployment_ID, Incident_ID, Vehicle_Type, Veh...","[BIGINT, BIGINT, VARCHAR, VARCHAR, VARCHAR, VA...",False
4,data_systems_project,public,storm_incidents,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, TIMESTAMP_MS, TIME, TIME, TIME, DOUBL...",False
5,data_systems_project,public,tree_data,"[geometry, id, gbdBuurtId, typeBeheerderPlus, ...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False


##### Drop Tables


In [40]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE cleaned.cleaned_storm_incidents;
# %sql DROP TABLE joined.buurten_incidents_trees;

RuntimeError: If using snippets, you may pass the --with argument explicitly.
For more details please refer: https://jupysql.ploomber.io/en/latest/compose.html#with-argument


Original error message from DB driver:
Catalog Error: Table with name buurten_incidents_trees does not exist!
Did you mean "buurten_incidents"?

If you need help solving this issue, send us a message: https://ploomber.io/community


##### Retrieve Tables as Polars DataFrame


In [36]:
buurten_trees = conn.execute(
    """
    SELECT * FROM joined.buurten_trees
    """
).pl()

buurten_incidents = conn.execute(
    """
    SELECT * FROM joined.buurten_incidents
    """
).pl()

# Close the database connection
conn.close()

In [25]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def convert_to_polars(gdf: gpd.GeoDataFrame) -> pl.DataFrame:
    """
    Convert a GeoDataFrame to a Polars DataFrame, converting geometries to WKB strings.
    """
    # If geometry conversion is necessary, uncomment the following line
    # gdf["geometry"] = gdf["geometry"].apply(lambda geom: wkb_dumps(geom, hex=True))
    gdf["geometry"] = gdf["geometry"].apply(wkt.dumps)

    # Convert to Polars DataFrame
    return pl.from_pandas(gdf)

### Buurten Incidents


In [29]:
print(buurten_incidents.columns)
buurten_incidents.head()

['geometry', 'buurtcode', 'buurtnaam', 'wijkcode', 'gemeentecode', 'gemeentenaam', 'indelingswijzigingWijkenEnBuurten', 'water', 'meestVoorkomendePostcode', 'dekkingspercentage', 'omgevingsadressendichtheid', 'stedelijkheidAdressenPerKm2', 'bevolkingsdichtheidInwonersPerKm2', 'aantalInwoners', 'mannen', 'vrouwen', 'percentagePersonen0Tot15Jaar', 'percentagePersonen15Tot25Jaar', 'percentagePersonen25Tot45Jaar', 'percentagePersonen45Tot65Jaar', 'percentagePersonen65JaarEnOuder', 'percentageOngehuwd', 'percentageGehuwd', 'percentageGescheid', 'percentageVerweduwd', 'aantalHuishoudens', 'percentageEenpersoonshuishoudens', 'percentageHuishoudensZonderKinderen', 'percentageHuishoudensMetKinderen', 'gemiddeldeHuishoudsgrootte', 'percentageWesterseMigratieachtergrond', 'percentageNietWesterseMigratieachtergrond', 'percentageUitMarokko', 'percentageUitNederlandseAntillenEnAruba', 'percentageUitSuriname', 'percentageUitTurkije', 'percentageOverigeNietwestersemigratieachtergrond', 'oppervlakteTot

geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,omgevingsadressendichtheid,stedelijkheidAdressenPerKm2,bevolkingsdichtheidInwonersPerKm2,aantalInwoners,mannen,vrouwen,percentagePersonen0Tot15Jaar,percentagePersonen15Tot25Jaar,percentagePersonen25Tot45Jaar,percentagePersonen45Tot65Jaar,percentagePersonen65JaarEnOuder,percentageOngehuwd,percentageGehuwd,percentageGescheid,percentageVerweduwd,aantalHuishoudens,percentageEenpersoonshuishoudens,percentageHuishoudensZonderKinderen,percentageHuishoudensMetKinderen,gemiddeldeHuishoudsgrootte,percentageWesterseMigratieachtergrond,percentageNietWesterseMigratieachtergrond,percentageUitMarokko,percentageUitNederlandseAntillenEnAruba,percentageUitSuriname,percentageUitTurkije,percentageOverigeNietwestersemigratieachtergrond,oppervlakteTotaalInHa,oppervlakteLandInHa,oppervlakteWaterInHa,jrstatcode,jaar,index_right,Incident_ID,Date,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,Incident_Starttime_Hour,Incident_Starttime_Minute
str,str,str,str,str,str,i64,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,i64,i64,i64,datetime[ms],time,time,time,f64,str,str,str,i8,i8
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,55,55,0,"""2022BU03620401…",2022,2404,326790,2019-08-10 00:00:00,06:50:21,08:55:29,02:05:08,2.0,"""Amstelveen""","""Amstelveen""","""Building""",6,50
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,55,55,0,"""2022BU03620401…",2022,2552,340707,2020-02-09 00:00:00,12:44:28,13:59:47,01:15:19,2.0,"""Amstelveen""","""Amstelveen""","""Building""",12,44
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,55,55,0,"""2022BU03620401…",2022,2530,340671,2020-02-09 00:00:00,11:22:07,12:02:51,00:40:44,5.0,"""Amstelveen""","""Amstelveen""","""Fence, Road si…",11,22
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,55,55,0,"""2022BU03620401…",2022,541,87157,2009-10-04 00:00:00,00:28:01,01:27:33,00:59:32,2.0,"""Amstelveen""","""AMSTELVEEN""","""Unknown""",0,28
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,55,55,0,"""2022BU03620401…",2022,2581,340744,2020-02-09 00:00:00,14:15:34,14:26:53,00:11:19,5.0,"""Amstelveen""","""Amstelveen""","""Fence, Road si…",14,15


In [31]:
df = buurten_incidents.group_by(
    [
        "buurtcode",
        "Date",
        "Incident_Starttime_Hour",
        "Damage_Type",
    ]
).agg(pl.col("Incident_ID").count().alias("Totaal"))

buurtcode,Date,Incident_Starttime_Hour,Damage_Type,Totaal
str,datetime[ms],i8,str,u32
"""BU03620401""",2018-08-09 00:00:00,21,"""Tree""",1
"""BU03631305""",2009-05-17 00:00:00,8,"""Tree""",1
"""BU03639000""",2022-02-18 00:00:00,18,"""Building""",1
"""BU03840201""",2022-02-19 00:00:00,11,"""Building""",1
"""BU03635801""",2023-03-25 00:00:00,16,"""Building""",1
