# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [172]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt

### Create Database Connection


In [173]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [174]:
%sql SHOW ALL TABLES; # shows all available tables

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,data_systems_project,public,storm_deployments,"[Deployment_ID, Incident_ID, Vehicle_Type, Veh...","[BIGINT, BIGINT, VARCHAR, VARCHAR, VARCHAR, VA...",False
1,data_systems_project,public,storm_incidents,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, TIMESTAMP_MS, TIME, TIME, TIME, DOUBL...",False
2,data_systems_project,public,tree_data,"[geometry, id, gbdBuurtId, typeBeheerderPlus, ...","[VARCHAR, BIGINT, VARCHAR, VARCHAR, VARCHAR, V...",False


##### Drop Tables


In [175]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE public.cbs_wijken;

##### Retrieve Tables as Polars DataFrame


In [176]:
cbs_buurten = conn.execute(
    """
    SELECT * FROM public.cbs_buurten """
).pl()

tree_data = conn.execute(
    """
    SELECT * FROM public.tree_data
    """
).pl()

grond_data = conn.execute(
    """
    SELECT * FROM public.tree_data
    """
).pl()


# Close the database connection
conn.close()

In [177]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

In [178]:
# def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
#     """
#     Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
#     """

#     # Check if the 'geometry' column exists
#     if "geometry" in polars_df.columns:
#         # Convert WKT strings to geometry objects and reassign to polars_df
#         polars_df = polars_df.with_columns(
#             pl.col("geometry").map_elements(wkt.loads).alias("geometry")
#         )
#     else:
#         raise ValueError("No 'geometry' column found in the DataFrame")

#     # Convert to GeoDataFrame
#     return gpd.GeoDataFrame(polars_df.to_pandas(), geometry="geometry", crs="EPSG:4326")

In [179]:
# Filter out Totals
cbs_buuurten = cbs_buurten.filter(pl.col("buurtnaam") != " ")
gdf_buurten = convert_to_geodf(cbs_buuurten)
gdf_buurten.head()

Unnamed: 0,geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,...,percentageUitMarokko,percentageUitNederlandseAntillenEnAruba,percentageUitSuriname,percentageUitTurkije,percentageOverigeNietwestersemigratieachtergrond,oppervlakteTotaalInHa,oppervlakteLandInHa,oppervlakteWaterInHa,jrstatcode,jaar
0,"MULTIPOLYGON (((4.82752 52.43741, 4.83173 52.4...",BU04792130,Het Eiland,WK047921,GM0479,Zaanstad,1,NEE,1506,1,...,1,1,4,3,5,28,13,15,2022BU04792130,2022
1,"MULTIPOLYGON (((4.77229 52.50569, 4.77270 52.5...",BU04796120,Noorderhoofdbuurt,WK047961,GM0479,Zaanstad,1,NEE,1561,1,...,2,0,2,3,4,46,41,5,2022BU04796120,2022
2,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,1,1,2,1,32,55,55,0,2022BU03620401,2022
3,"MULTIPOLYGON (((4.87899 52.37831, 4.87869 52.3...",BU03630604,Zaagpoortbuurt,WK036306,GM0363,Amsterdam,1,NEE,1015,1,...,4,2,5,2,14,4,4,0,2022BU03630604,2022
4,"MULTIPOLYGON (((4.86733 52.39319, 4.86742 52.3...",BU03631305,Spaarndammerbuurt Noordwest,WK036313,GM0363,Amsterdam,1,NEE,1013,1,...,9,1,9,3,13,12,12,0,2022BU03631305,2022


In [180]:
gdf_tree_data = convert_to_geodf(tree_data)
gdf_tree_data.head()

Unnamed: 0,geometry,id,gbdBuurtId,typeBeheerderPlus,boomhoogteklasseActueel,typeEigenaarPlus,jaarVanAanleg,soortnaam,stamdiameterklasse,standplaatsGedetailleerd,typeObject,typeSoortnaam,soortnaamKort,soortnaamTop
0,POINT (3.31360 47.97524),919933,3630980000301,Stadsdeel Zuid,e. 15 tot 18 m.,Gemeente Amsterdam,1948,Tilia americana,,,Boom niet vrij uitgroeiend,Bomen,Tilia,Linde (Tilia)
1,POINT (3.31360 47.97524),919934,3630980000301,Stadsdeel Zuid,c. 9 tot 12 m.,Gemeente Amsterdam,1978,Ulmus hollandica 'Vegeta',,Tegels,Boom niet vrij uitgroeiend,Bomen,Ulmus,Iep (Ulmus)
2,POINT (3.31360 47.97524),919935,3630980000311,Stadsdeel Zuid,c. 9 tot 12 m.,Gemeente Amsterdam,1990,Fraxinus excelsior 'Westhof's Glorie',"0,2 tot 0,3 m.",,Boom niet vrij uitgroeiend,Bomen,Fraxinus,Es (Fraxinus)
3,POINT (3.31360 47.97524),919936,3630980000297,Stadsdeel Zuid,b. 6 tot 9 m.,Gemeente Amsterdam,2002,Ulmus glabra 'Lutescens',,,Boom niet vrij uitgroeiend,Bomen,Ulmus,Iep (Ulmus)
4,POINT (3.31360 47.97524),919937,3630980000306,Stadsdeel Zuid,b. 6 tot 9 m.,Gemeente Amsterdam,1985,Quercus robur,,,Boom niet vrij uitgroeiend,Bomen,Quercus,Eik (Quercus)


In [183]:
gdf_buurten.sjoin(gdf_tree_data)

Unnamed: 0,geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,...,boomhoogteklasseActueel,typeEigenaarPlus,jaarVanAanleg,soortnaam,stamdiameterklasse,standplaatsGedetailleerd,typeObject,typeSoortnaam,soortnaamKort,soortnaamTop
0,"MULTIPOLYGON (((4.82752 52.43741, 4.83173 52.4...",BU04792130,Het Eiland,WK047921,GM0479,Zaanstad,1,NEE,1506,1,...,,,,,,,,,,
1,"MULTIPOLYGON (((4.77229 52.50569, 4.77270 52.5...",BU04796120,Noorderhoofdbuurt,WK047961,GM0479,Zaanstad,1,NEE,1561,1,...,,,,,,,,,,
2,"MULTIPOLYGON (((4.85730 52.30633, 4.85730 52.3...",BU03620401,Stadshart,WK036204,GM0362,Amstelveen,1,NEE,1181,1,...,,,,,,,,,,
3,"MULTIPOLYGON (((4.87899 52.37831, 4.87869 52.3...",BU03630604,Zaagpoortbuurt,WK036306,GM0363,Amsterdam,1,NEE,1015,1,...,,,,,,,,,,
4,"MULTIPOLYGON (((4.86733 52.39319, 4.86742 52.3...",BU03631305,Spaarndammerbuurt Noordwest,WK036313,GM0363,Amsterdam,1,NEE,1013,1,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,"MULTIPOLYGON (((4.93955 52.36313, 4.93958 52.3...",BU03633101,Noordwestkwadrant Indische buurt Zuid,WK036331,GM0363,Amsterdam,1,NEE,1094,1,...,,,,,,,,,,
616,"MULTIPOLYGON (((4.96276 52.37197, 4.96292 52.3...",BU03633402,Zeeburgereiland Zuidoost,WK036334,GM0363,Amsterdam,1,NEE,1095,1,...,,,,,,,,,,
617,"MULTIPOLYGON (((4.88274 52.36892, 4.88357 52.3...",BU03630203,Leidsegracht Noord,WK036302,GM0363,Amsterdam,1,NEE,1016,1,...,,,,,,,,,,
618,"MULTIPOLYGON (((4.99176 52.32428, 4.99203 52.3...",BU03639410,Kantershof,WK036394,GM0363,Amsterdam,1,NEE,1104,1,...,,,,,,,,,,


In [None]:
# cbs_wijken.filter(
#     [
#         pl.col("gemeentenaam") == "Amsterdam",
#     ]
# ).head()

In [None]:
# incident_deployments_vehicles_wijken.head()

In [None]:
# incident_deployments_vehicles_weather.head()