# Testing

In this notebook you can explore and test the resulting database tables from our Dagster ETL process.
Here you can analyze


In [9]:
import duckdb
import polars as pl
from IPython import display

### Create Database Connection


In [10]:
# Use SQL magic
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = True
%config SqlMagic.displaycon = False

%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb
%sql SHOW ALL TABLES; # shows all available tables

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,data_systems_project,joined,incident_deployments_vehicles,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, DATE, TIME, TIME, TIME, BIGINT, VARCH...",False
1,data_systems_project,joined,incident_deployments_vehicles_weather,"[Station_code, Date, Hour, Dd, Fh, Ff, Fx, T, ...","[BIGINT, DATE, TINYINT, BIGINT, BIGINT, BIGINT...",False
2,data_systems_project,joined,incident_deployments_vehicles_wijken,"[Incident_ID, Date, Incident_Starttime, Incide...","[BIGINT, TIMESTAMP_MS, TIME, TIME, TIME, BIGIN...",False
3,data_systems_project,public,bag_panden,"[geometry, identificatie, rdf_seealso, bouwjaa...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, VARCHAR, V...",False
4,data_systems_project,public,cbs_buurten,"[geometry, buurtcode, buurtnaam, wijkcode, gem...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
5,data_systems_project,public,cbs_wijken,"[geometry, wijkcode, wijknaam, gemeentecode, g...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
6,data_systems_project,public,fire_stations_and_vehicles,"[Fire_Station, Vehicle, Vehicle_Type]","[VARCHAR, VARCHAR, VARCHAR]",False
7,data_systems_project,public,knmi_weather_data,"[station_code, date, hour, DD, FH, FF, FX, T, ...","[BIGINT, VARCHAR, BIGINT, BIGINT, BIGINT, BIGI...",False
8,data_systems_project,public,service_areas,"[H_Verzorgingsgebied_ID, Verzorgingsgebied, LA...","[BIGINT, VARCHAR, DOUBLE, DOUBLE, VARCHAR]",False
9,data_systems_project,public,storm_deployments,"[Deployment_ID, Incident_ID, Vehicle_Type, Veh...","[BIGINT, BIGINT, VARCHAR, VARCHAR, VARCHAR, VA...",False


##### Drop Tables


In [11]:
# %sql DROP TABLE public.cbs_buurten;
# %sql DROP TABLE public.cbs_wijken;

##### Retrieve Tables as Polars DataFrame


In [12]:
cbs_wijken = conn.execute(
    """
    SELECT * FROM public.cbs_wijken """
).pl()

service_areas = conn.execute(
    """
    SELECT * FROM public.service_areas """
).pl()

tree_data = conn.execute(
    """
    SELECT * FROM public.tree_data
    """
).pl()


incident_deployments_vehicles = conn.execute(
    """
    SELECT * FROM joined.incident_deployments_vehicles"""
).pl()


cleaned_knmi_weather_data = conn.execute(
    """
    SELECT * FROM cleaned.cleaned_knmi_weather_data
    """
).pl()


incident_deployments_vehicles_wijken = conn.execute(
    """
    SELECT * FROM joined.incident_deployments_vehicles_wijken
    """
).pl()

incident_deployments_vehicles_weather = conn.execute(
    """
    SELECT * FROM joined.incident_deployments_vehicles_weather
    """
).pl()


# Close the database connection
conn.close()

In [32]:
df = tree_data.to_pandas().drop(columns=["_links"])
print(df.head()["geometrie"])
# df["Coordinates"] = gpd.GeoSeries.from_wkt(df["geometrie"])

# df = gpd.GeoDataFrame.from_features(tree_data.to_pandas())

0    {'type': 'Point', 'coordinates': [122115.11, 4...
1    {'type': 'Point', 'coordinates': [121980.46, 4...
2    {'type': 'Point', 'coordinates': [118737.81, 4...
3    {'type': 'Point', 'coordinates': [122053.62, 4...
4    {'type': 'Point', 'coordinates': [120154.43, 4...
Name: geometrie, dtype: object


In [6]:
# cbs_wijken.filter(
#     [
#         pl.col("gemeentenaam") == "Amsterdam",
#     ]
# ).head()

In [7]:
# incident_deployments_vehicles_wijken.head()

In [8]:
incident_deployments_vehicles_weather.head()

Station_code,Date,Hour,Dd,Fh,Ff,Fx,T,T10n,Td,Sq,Q,Dr,Rh,P,Vv,N,U,Ww,Ix,M,R,S,O,Y,Incident_ID,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,LON,LAT,Incident_Endtime_Hour,Incident_Duration_Hour,Incident_Starttime_Minute,Incident_Endtime_Minute,Incident_Duration_Minute,Deployment_ID,Vehicle_Type,Vehicle_Role,Fire_Station,Fire_Station_Service_Status,Driving_Time_To_Incident,Vehicle
i64,date,i8,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,time,time,time,i64,str,str,str,f64,f64,i8,i8,i8,i8,i8,i64,str,str,str,str,str,str
240,2005-01-01,1,260,40,30,60,68,,57,0,0,0,0,10246,57,8,93,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,2,230,30,30,60,65,,52,0,0,0,0,10244,58,8,91,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,3,230,40,30,50,43,,34,0,0,0,0,10241,40,1,94,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,4,220,40,40,50,38,,32,0,0,0,0,10239,12,0,96,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,5,230,40,40,50,38,,34,0,0,0,0,10237,14,3,97,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,


In [4]:
# Select all rows where Incidents happened (by filtering on Incident_ID != null)
selected_df_2005 = incident_deployments_vehicles_weather.filter(
    pl.col("Incident_ID").is_not_null(), pl.col("Date").dt.year() == 2005
)
display(selected_df_2005.head())
print(selected_df_2005.columns)

TypeError: 'module' object is not callable

In [1]:
incident_deployments_vehicles_weather.head()

NameError: name 'incident_deployments_vehicles_weather' is not defined