# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [41]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt

### Create Database Connection


In [42]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [43]:
%sql SHOW ALL TABLES; # shows all available tables

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,data_systems_project,joined,buurten_trees,"[geometry, buurtcode, buurtnaam, wijkcode, gem...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
1,data_systems_project,public,bag_panden,"[geometry, identificatie, rdf_seealso, bouwjaa...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, VARCHAR, V...",False
2,data_systems_project,public,cbs_buurten,"[geometry, buurtcode, buurtnaam, wijkcode, gem...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
3,data_systems_project,public,cbs_wijken,"[geometry, wijkcode, wijknaam, gemeentecode, g...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
4,data_systems_project,public,fire_stations_and_vehicles,"[Fire_Station, Vehicle, Vehicle_Type]","[VARCHAR, VARCHAR, VARCHAR]",False
5,data_systems_project,public,grond_data,"[geometry, id, locatie, amNummer, typeOnderzoe...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False
6,data_systems_project,public,service_areas,"[H_Verzorgingsgebied_ID, Verzorgingsgebied, LA...","[BIGINT, VARCHAR, DOUBLE, DOUBLE, VARCHAR]",False
7,data_systems_project,public,storm_deployments,"[Deployment_ID, Incident_ID, Vehicle_Type, Veh...","[BIGINT, BIGINT, VARCHAR, VARCHAR, VARCHAR, VA...",False
8,data_systems_project,public,storm_incidents,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, TIMESTAMP_MS, TIME, TIME, TIME, DOUBL...",False
9,data_systems_project,public,tree_data,"[geometry, id, gbdBuurtId, typeBeheerderPlus, ...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False


##### Drop Tables


In [44]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE cleaned.cleaned_storm_incidents;
# %sql DROP TABLE joined.buurten_incidents;

##### Retrieve Tables as Polars DataFrame


In [45]:
buurten_trees = conn.execute(
    """
    SELECT * FROM joined.buurten_trees
    """
).pl()

buurten_incidents = conn.execute(
    """
    SELECT * FROM joined.buurten_incidents
    """
).pl()

# Close the database connection
conn.close()

In [46]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def convert_to_polars(gdf: gpd.GeoDataFrame) -> pl.DataFrame:
    """
    Convert a GeoDataFrame to a Polars DataFrame, converting geometries to WKB strings.
    """
    # If geometry conversion is necessary, uncomment the following line
    # gdf["geometry"] = gdf["geometry"].apply(lambda geom: wkb_dumps(geom, hex=True))
    gdf["geometry"] = gdf["geometry"].apply(wkt.dumps)

    # Convert to Polars DataFrame
    return pl.from_pandas(gdf)

### Buurten Incidents


In [48]:
gdf_buurten_incidents = convert_to_geodf(buurten_incidents)
gdf_buurten_incidents.head(5)

Unnamed: 0,geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,...,Date,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,Incident_Starttime_Hour,Incident_Starttime_Minute
0,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,2019-08-10,06:50:21,08:55:29,02:05:08,2.0,Amstelveen,Amstelveen,Building,6,50
1,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,2020-02-09,12:44:28,13:59:47,01:15:19,2.0,Amstelveen,Amstelveen,Building,12,44
2,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,2020-02-09,11:22:07,12:02:51,00:40:44,5.0,Amstelveen,Amstelveen,"Fence, Road signs, Scaffolding",11,22
3,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,2009-10-04,00:28:01,01:27:33,00:59:32,2.0,Amstelveen,AMSTELVEEN,Unknown,0,28
4,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,2020-02-09,14:15:34,14:26:53,00:11:19,5.0,Amstelveen,Amstelveen,"Fence, Road signs, Scaffolding",14,15
