## Modelling

In this Notebook we can start modelling, with some data from our DB.

- To do this we can connect with our local DB using the `duckdb` library
- When a connection has been made we can start retrieving data from our DB.


### Setup


In [6]:
# Import the right libraries
import duckdb
import polars as pl
from IPython import display

In [2]:
# Use SQL magic
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = True
%config SqlMagic.displaycon = False

%load_ext sql
conn = duckdb.connect(database="../dsp-dagster/data_systems_project.duckdb")
%sql conn --alias duckdb
# %sql SHOW ALL TABLES; # shows all available tables

[32mDeploy AI and data apps for free on Ploomber Cloud! Learn more: https://docs.cloud.ploomber.io/en/latest/quickstart/signup.html[0m


In [3]:
## We can use SQL magic to retrieve data from our DB like so:
# %sql res << SELECT * FROM joined.deployment_incident_vehicles_weather
# res

In [3]:
# Or the more Pythonic way:

# Here we retrieve a table where KNMI weather data and Fire Department data is combined
df = conn.execute(
    """
    SELECT * FROM joined.incident_deployments_vehicles_weather """
).pl()

# Close the database connection
conn.close()

In [4]:
df.head()

Station_code,Date,Hour,Dd,Fh,Ff,Fx,T,T10n,Td,Sq,Q,Dr,Rh,P,Vv,N,U,Ww,Ix,M,R,S,O,Y,Incident_ID,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,LON,LAT,Incident_Endtime_Hour,Incident_Duration_Hour,Incident_Starttime_Minute,Incident_Endtime_Minute,Incident_Duration_Minute,Deployment_ID,Vehicle_Type,Vehicle_Role,Fire_Station,Fire_Station_Service_Status,Driving_Time_To_Incident,Vehicle
i64,date,i8,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,time,time,time,i64,str,str,str,f64,f64,i8,i8,i8,i8,i8,i64,str,str,str,str,str,str
240,2005-01-01,1,260,40,30,60,68,,57,0,0,0,0,10246,57,8,93,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,2,230,30,30,60,65,,52,0,0,0,0,10244,58,8,91,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,3,230,40,30,50,43,,34,0,0,0,0,10241,40,1,94,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,4,220,40,40,50,38,,32,0,0,0,0,10239,12,0,96,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,
240,2005-01-01,5,230,40,40,50,38,,34,0,0,0,0,10237,14,3,97,10,7,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,


### Feature Selection


In [12]:
# Select all rows where inicident happended
selected_df = df.filter(pl.col("Incident_ID").is_not_null())
# display(selected_df.head())
selected_df

Station_code,Date,Hour,Dd,Fh,Ff,Fx,T,T10n,Td,Sq,Q,Dr,Rh,P,Vv,N,U,Ww,Ix,M,R,S,O,Y,Incident_ID,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,LON,LAT,Incident_Endtime_Hour,Incident_Duration_Hour,Incident_Starttime_Minute,Incident_Endtime_Minute,Incident_Duration_Minute,Deployment_ID,Vehicle_Type,Vehicle_Role,Fire_Station,Fire_Station_Service_Status,Driving_Time_To_Incident,Vehicle
i64,date,i8,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,time,time,time,i64,str,str,str,f64,f64,i8,i8,i8,i8,i8,i64,str,str,str,str,str,str
240,2005-04-07,12,240,160,160,210,99,69,13,8,195,0,0,9993,74,6,55,2,7,0,0,0,0,0,11333,12:09:51,13:38:19,01:28:28,3,"""IJsbrand""","""AMSTERDAM""","""Unknown""",4.887098,52.418767,13,1,9,38,28,94,"""TS""","""TS""","""IJsbrand""","""Professional""","""NULL""","""Tankautospuit"""
240,2005-04-07,12,240,160,160,210,99,69,13,8,195,0,0,9993,74,6,55,2,7,0,0,0,0,0,11333,12:09:51,13:38:19,01:28:28,3,"""IJsbrand""","""AMSTERDAM""","""Unknown""",4.887098,52.418767,13,1,9,38,28,95,"""TS""","""TS""","""Zebra""","""Professional""","""808""","""Tankautospuit"""
240,2005-04-07,12,240,160,160,210,99,69,13,8,195,0,0,9993,74,6,55,2,7,0,0,0,0,0,11337,12:53:52,13:57:46,01:03:54,2,"""Pieter""","""AMSTERDAM""","""Tree""",4.826198,52.352551,13,1,53,57,3,97,"""RV""","""RV""","""Teunis""","""Professional""","""753""","""HV met kraan"""
240,2005-04-07,12,240,160,160,210,99,69,13,8,195,0,0,9993,74,6,55,2,7,0,0,0,0,0,11337,12:53:52,13:57:46,01:03:54,2,"""Pieter""","""AMSTERDAM""","""Tree""",4.826198,52.352551,13,1,53,57,3,98,"""TS""","""TS""","""Osdorp""","""Professional""","""633""","""Tankautospuit"""
240,2005-04-07,12,240,160,160,210,99,69,13,8,195,0,0,9993,74,6,55,2,7,0,0,0,0,0,11337,12:53:52,13:57:46,01:03:54,2,"""Pieter""","""AMSTERDAM""","""Tree""",4.826198,52.352551,13,1,53,57,3,98,"""TS""","""TS""","""Osdorp""","""Professional""","""633""","""Tankautospuit"""
240,2005-11-25,7,290,160,160,260,45,,34,0,0,10,24,9784,58,8,93,62,7,0,1,0,0,0,24028,07:25:02,07:41:25,00:16:23,3,"""Dirk""","""AMSTERDAM""","""Unknown""",4.894682,52.359381,7,0,25,41,16,99,"""TS""","""TS""","""Dirk""","""Professional""","""157""","""Tankautospuit"""
240,2006-03-26,7,240,100,100,150,109,,98,2,22,0,0,10058,65,6,93,,5,0,0,0,0,0,30843,07:42:58,14:32:53,06:49:55,1,"""Victor""","""AMSTERDAM""","""Fence, Road si…",4.93092,52.358682,14,6,42,32,49,105,"""TS""","""TS""","""Victor""","""Professional""","""NULL""","""Tankautospuit"""
240,2006-03-26,7,240,100,100,150,109,,98,2,22,0,0,10058,65,6,93,,5,0,0,0,0,0,30843,07:42:58,14:32:53,06:49:55,1,"""Victor""","""AMSTERDAM""","""Tree""",4.93092,52.358682,14,6,42,32,49,105,"""TS""","""TS""","""Victor""","""Professional""","""NULL""","""Tankautospuit"""
240,2006-03-27,17,230,160,160,250,131,,78,10,67,0,0,9989,65,2,70,,5,0,0,0,0,0,30913,17:58:03,20:20:43,02:22:40,2,"""Nico""","""AMSTERDAM""","""Unknown""",4.925121,52.375628,20,2,58,20,22,107,"""TS""","""TS""","""Nico""","""Professional""","""194""","""Tankautospuit"""
240,2006-03-27,18,230,150,130,230,118,110,73,5,16,0,0,9996,64,7,74,3,7,0,0,0,0,0,30914,18:03:27,20:06:44,02:03:17,2,"""Hendrik""","""AMSTERDAM""","""Unknown""",4.857239,52.36996,20,2,3,6,3,108,"""TS""","""TS""","""Hendrik""","""Professional""","""295""","""Tankautospuit"""
