# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [None]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt

### Create Database Connection


In [None]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

In [None]:
%sql SHOW ALL TABLES; # shows all available tables

##### Drop Tables


In [None]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
%sql DROP TABLE joined.knmi_weather_txt;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE cleaned.cleaned_storm_incidents;
# %sql DROP TABLE joined.buurten_incidents_trees;

##### Retrieve Tables as Polars DataFrame


In [None]:
buurten_trees = conn.execute(
    """
    SELECT * FROM joined.buurten_trees
    """
).pl()

buurten_incidents = conn.execute(
    """
    SELECT * FROM joined.buurten_incidents
    """
).pl()

# Close the database connection
conn.close()

In [None]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def convert_to_polars(gdf: gpd.GeoDataFrame) -> pl.DataFrame:
    """
    Convert a GeoDataFrame to a Polars DataFrame, converting geometries to WKB strings.
    """
    # If geometry conversion is necessary, uncomment the following line
    # gdf["geometry"] = gdf["geometry"].apply(lambda geom: wkb_dumps(geom, hex=True))
    gdf["geometry"] = gdf["geometry"].apply(wkt.dumps)

    # Convert to Polars DataFrame
    return pl.from_pandas(gdf)

### Buurten Incidents


In [None]:
print(buurten_incidents.columns)
buurten_incidents.head()

In [None]:
df = buurten_incidents.group_by(
    [
        "buurtcode",
        "Date",
        "Incident_Starttime_Hour",
        "Damage_Type",
    ]
).agg(pl.col("Incident_ID").count().alias("Totaal"))