# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [1]:
import duckdb
import polars as pl

# from IPython import display
import geopandas as gpd
from shapely import wkt

### Create Database Connection


In [None]:
%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb

In [None]:
%sql SHOW ALL TABLES; # shows all available tables

##### Drop Tables


In [None]:
# %sql DROP TABLE joined.incident_deployments_vehicles_weather;
# %sql DROP TABLE joined.incident_deployments_vehicles_wijken;
# %sql DROP TABLE joined.incident_deployments_vehicles;
# %sql DROP TABLE joined.incidents_buurten;
%sql DROP TABLE joined.knmi_weather_txt;
# %sql DROP TABLE joined.buurten_trees;
# %sql DROP TABLE cleaned.cleaned_storm_incidents;
# %sql DROP TABLE joined.buurten_incidents_trees;

In [None]:
buurten_incidents_trees = conn.execute(
    """
    SELECT * FROM joined.buurten_incidents_trees
    """
).pl()


# Close the database connection
conn.close()

In [None]:
def convert_to_geodf(polars_df: pl.DataFrame) -> gpd.GeoDataFrame:
    """
    Convert a Polars DataFrame to a GeoDataFrame using WKB or WKT transformation.
    """

    # Convert Polars DataFrame to Pandas DataFrame
    df = polars_df.to_pandas()

    # Convert geometry strings back to geometry objects
    if "geometry" in df.columns:
        df["geometry"] = df["geometry"].apply(wkt.loads)

    else:
        raise ValueError("No 'geometry' column found in the DataFrame")

    # Convert back to GeoDataFrame
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def convert_to_polars(gdf: gpd.GeoDataFrame) -> pl.DataFrame:
    """
    Convert a GeoDataFrame to a Polars DataFrame, converting geometries to WKB strings.
    """
    # If geometry conversion is necessary, uncomment the following line
    # gdf["geometry"] = gdf["geometry"].apply(lambda geom: wkb_dumps(geom, hex=True))
    gdf["geometry"] = gdf["geometry"].apply(wkt.dumps)

    # Convert to Polars DataFrame
    return pl.from_pandas(gdf)

### Buurten Incidents


In [None]:
print(buurten_incidents.columns)
buurten_incidents.head()

In [None]:
df = buurten_incidents.group_by(
    [
        "buurtcode",
        "Date",
        "Incident_Starttime_Hour",
        "Damage_Type",
    ]
).agg(pl.col("Incident_ID").count().alias("Totaal"))

### Importing dataframes from parquet

In [3]:
buurten_combined = pl.read_parquet("../dsp-dagster/data/buurten_combined.parquet")
buurten_combined = buurten_combined.with_columns(pl.col("Incidents_Date").cast(pl.Date))
buurten_combined

geometry,buurtcode,buurtnaam,wijkcode,gemeentecode,gemeentenaam,indelingswijzigingWijkenEnBuurten,water,meestVoorkomendePostcode,dekkingspercentage,omgevingsadressendichtheid,stedelijkheidAdressenPerKm2,bevolkingsdichtheidInwonersPerKm2,aantalInwoners,mannen,vrouwen,percentagePersonen0Tot15Jaar,percentagePersonen15Tot25Jaar,percentagePersonen25Tot45Jaar,percentagePersonen45Tot65Jaar,percentagePersonen65JaarEnOuder,percentageOngehuwd,percentageGehuwd,percentageGescheid,percentageVerweduwd,aantalHuishoudens,percentageEenpersoonshuishoudens,percentageHuishoudensZonderKinderen,percentageHuishoudensMetKinderen,gemiddeldeHuishoudsgrootte,percentageWesterseMigratieachtergrond,percentageNietWesterseMigratieachtergrond,percentageUitMarokko,percentageUitNederlandseAntillenEnAruba,percentageUitSuriname,percentageUitTurkije,percentageOverigeNietwestersemigratieachtergrond,…,Trees_soortnaamTop_Albizia,Trees_soortnaamTop_Tetracentron,Trees_soortnaamTop_Oxydendrum,Trees_soortnaamTop_Cladrastis,Trees_standplaatsGedetailleerd_Straatbaksteen,Trees_standplaatsGedetailleerd_Lijnvormige-haag,Trees_standplaatsGedetailleerd_Gazon,Trees_standplaatsGedetailleerd_Tegels,Trees_standplaatsGedetailleerd_Bomen-en-struikvormers,Trees_standplaatsGedetailleerd_Fijne-sierheester,Trees_standplaatsGedetailleerd_Vaste-planten,Trees_standplaatsGedetailleerd_Struikvormers,Trees_standplaatsGedetailleerd_Betonstraatstenen,Trees_standplaatsGedetailleerd_Ruw-gras,Trees_standplaatsGedetailleerd_Bloemrijk-gras,Trees_standplaatsGedetailleerd_Bodembedekkende-heesters,Trees_standplaatsGedetailleerd_Cultuurrozen,Trees_standplaatsGedetailleerd_Ruigte,Trees_standplaatsGedetailleerd_Los,Trees_standplaatsGedetailleerd_Sierbestrating,Trees_standplaatsGedetailleerd_Samenhangend,Trees_standplaatsGedetailleerd_Boomvormers,Trees_standplaatsGedetailleerd_Dichte-deklagen,Trees_standplaatsGedetailleerd_Ongewapend-nietverdeuveld-beton,Trees_standplaatsGedetailleerd_Blokhaag,Trees_standplaatsGedetailleerd_Grove-sierheester,Trees_standplaatsGedetailleerd_Heesterrozen,Trees_standplaatsGedetailleerd_Botanische-rozen,"Trees_stamdiameterklasse_0,1-tot-0,2-m.","Trees_stamdiameterklasse_0,5-tot-1-m.","Trees_stamdiameterklasse_0,3-tot-0,5-m.","Trees_stamdiameterklasse_0,2-tot-0,3-m.","Trees_stamdiameterklasse_1,0-tot-1,5-m.",Incidents_Date,Incidents_Incident_Starttime_Hour,Incidents_Damage_Type,Incidents_Total
str,str,str,str,str,str,i64,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,i64,…,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,date,i8,str,u32
"""MULTIPOLYGON (…","""BU04792130""","""Het Eiland""","""WK047921""","""GM0479""","""Zaanstad""",1,"""NEE""","""1506""",1,3064,1,9972,1265,630,635,13,14,13,33,27,41,46,7,7,540,27,34,39,2.3,12,13,1,1,4,3,5,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""MULTIPOLYGON (…","""BU04796120""","""Noorderhoofdbu…","""WK047961""","""GM0479""","""Zaanstad""",1,"""NEE""","""1561""",1,2171,2,5036,2040,1020,1020,13,8,26,28,26,48,33,12,7,1140,52,24,24,1.8,8,12,2,0,2,3,4,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-03-13,15,"""Building""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022-02-18,15,"""Fence, Road si…",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-03-07,23,"""Unknown""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-08-10,6,"""Building""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021-05-22,10,"""Tree""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021-03-14,1,"""Tree""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018-08-09,21,"""Tree""",1
"""MULTIPOLYGON (…","""BU03620401""","""Stadshart""","""WK036204""","""GM0362""","""Amstelveen""",1,"""NEE""","""1181""",1,3693,1,6297,3480,1780,1700,17,7,36,23,18,47,43,6,4,1780,47,25,29,2.0,31,37,1,1,2,1,32,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-09,12,"""Building""",1


In [4]:
knmi_weather_data = pl.read_parquet("../dsp-dagster/data/knmi_weather_data.parquet")
knmi_weather_data

STN,YYYYMMDD,HH,DD,FH,FF,FX,T,T10N,TD,SQ,Q,DR,RH,P,VV,N,U,WW,IX,M,R,S,O,Y
i64,date,i8,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0
240,2024-01-22,17,240,100,100,160,87,""" """,44,0,0,0,0,10090,66,1,74,""" """,5,0,0,0,0,0
240,2024-01-22,16,240,110,110,160,90,""" """,48,3,13,0,0,10085,65,6,75,""" """,5,0,0,0,0,0
240,2024-01-22,15,240,120,120,190,94,""" """,50,10,43,0,0,10073,64,2,74,""" """,5,0,0,0,0,0
240,2024-01-22,14,240,130,130,190,98,""" """,52,10,79,0,0,10069,66,5,73,""" """,5,0,0,0,0,0
240,2024-01-22,13,240,120,120,200,100,""" """,57,10,97,0,0,10065,64,1,74,""" """,5,0,0,0,0,0
240,2024-01-22,12,250,120,120,180,104,""" 89""",63,10,101,0,0,10060,65,0,76,""" 2""",7,0,0,0,0,0
240,2024-01-22,11,250,120,130,190,101,""" """,71,10,82,0,0,10050,63,1,81,""" """,7,0,0,0,0,0
240,2024-01-22,10,240,100,100,160,96,""" """,73,0,18,0,-1,10042,60,7,85,""" 23""",7,0,1,0,0,0
240,2024-01-22,9,240,120,120,190,105,""" """,84,0,3,0,-1,10032,58,8,86,""" 22""",7,0,1,0,0,0


In [5]:
knmi_weather_data_2023 = knmi_weather_data.filter(
    pl.col("YYYYMMDD") >= pl.datetime(2023, 1, 1)
)

In [6]:
buurten_combined_2023 = buurten_combined.filter(
    pl.col("Incidents_Date") >= pl.datetime(2023, 1, 1)
)

In [7]:
distinct_buurten = buurten_combined.select("buurtcode").unique()
distinct_buurten

buurtcode
str
"""BU03633404"""
"""BU03637003"""
"""BU03630002"""
"""BU03635801"""
"""BU03633706"""
"""BU03636401"""
"""BU04798140"""
"""BU03634400"""
"""BU03639201"""
"""BU03635804"""


In [8]:
knmi_weather_data_2023_buurten = knmi_weather_data_2023.join(
    distinct_buurten,
    how="cross",
)
knmi_weather_data_2023_buurten

STN,YYYYMMDD,HH,DD,FH,FF,FX,T,T10N,TD,SQ,Q,DR,RH,P,VV,N,U,WW,IX,M,R,S,O,Y,buurtcode
i64,date,i8,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,str
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03633404"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03637003"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03630002"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03635801"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03633706"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03636401"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU04798140"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03634400"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03639201"""
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03635804"""


In [10]:
model_data = knmi_weather_data_2023_buurten.join(
    buurten_combined[
        [
            "Incidents_Date",
            "Incidents_Incident_Starttime_Hour",
            "buurtcode",
            "Trees_Total",
            "Incidents_Damage_Type",
            "Incidents_Total",
            # "buurtnaam",
            # "geometry",
        ]
    ],
    left_on=("YYYYMMDD", "HH", "buurtcode"),
    right_on=("Incidents_Date", "Incidents_Incident_Starttime_Hour", "buurtcode"),
    how="left",
)
model_data

STN,YYYYMMDD,HH,DD,FH,FF,FX,T,T10N,TD,SQ,Q,DR,RH,P,VV,N,U,WW,IX,M,R,S,O,Y,buurtcode,Trees_Total,Incidents_Damage_Type,Incidents_Total
i64,date,i8,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,str,u32,str,u32
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03633404""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03637003""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03630002""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03635801""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03633706""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03636401""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU04798140""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03634400""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03639201""",,,
240,2024-01-22,18,240,100,100,160,85,""" 76""",48,0,0,0,0,10099,65,0,77,""" """,7,0,0,0,0,0,"""BU03635804""",,,


In [None]:
model_data_buurt = model_data.join(
    buurten_combined[["buurtcode", "buurtnaam", "geometry"]],
    on=("buurtcode"),
    how="left",
)
model_data_buurt

In [None]:
model_data.fill_null(0)