# Here a kick scan from the data from the fire-brigate department

In [None]:
import pandas as pd
import numpy as np
from pyproj import Proj, transform
from math import radians, cos, sin, asin, sqrt,atan2

## Call the data sets

In [None]:
df_in = pd.read_csv('data\JADS\incidenten_2017.csv', sep=';', decimal=',')
df_dep = pd.read_csv('data\JADS\inzetten_2017.csv', sep=';', decimal=',')
locations = pd.read_excel('data\JADS\kazernepositie en voertuigen.xlsx', sheet_name='adressen')

In [None]:
#(in) incident starts = dim_incident_start_datumtijd
#(in) incident finish = dim_incident_eind_datumtijd
#(dep) alarm is activated = inzet_gealarmeerd_datumtijd
#(dep) leave = inzet_uitgerukt_datumtijd
#(dep) arrive = inzet_terplaatse_datumtijd


In [None]:
locations

In [None]:
df_in.T

In [None]:
df_dep.T

In [None]:
print("the shape of the incidents dataset is {}".format(df_in.shape))
print("the shape of the deployment dataset is {}".format(df_dep.shape))
print("the time span of the data set incidents is: {}".format((min(df_in['dim_datum_datum']), max(df_in['dim_datum_datum']))))
print(print("the time span of the data set deploy is: {}".format((min(df_dep['inzet_gealarmeerd_datumtijd']), 
                                                                  max(df_dep['inzet_gealarmeerd_datumtijd'])))))

incidents_df_in = list(set(df_in['dim_incident_id'].unique()) - set(df_dep['hub_incident_id'].unique()))
incidents_df_dep = list(set(df_dep['hub_incident_id'].unique()) - set(df_in['dim_incident_id'].unique()))

print('the following id are only at incidents {}'.format(incidents_df_in))
print('the following id are only at deploy {}'.format(incidents_df_dep))

In [None]:
df_in['Aux'] = 1
names = list(df_in)
# df_in.groupby('Aux')[names].apply(lambda x: x.isnull().sum()).T

In [None]:
df_in.describe().T.dropna()[['mean', 'std', 'min', 'max']]

In [None]:
df_dep.describe().T.dropna()[['mean', 'std', 'min', 'max']]


# EDA on the components of the response time

In [None]:
#(in) incident starts = dim_incident_start_datumtijd
#(in) incident finish = dim_incident_eind_datumtijd
#(dep) alarm is activated = inzet_gealarmeerd_datumtijd
#(dep) leave = inzet_uitgerukt_datumtijd
#(dep) arrive = inzet_terplaatse_datumtijd

In [None]:
print(list(df_in))

In [None]:
def pre_process_data(df_in, df_dep, locations):
    
    inProj  = Proj("+init=EPSG:28992", preserve_units=True)
    outProj = Proj("+init=EPSG:4326") # WGS84 in degrees and not EPSG:3857 in meters)

    def projections(x,y, inProj, outProj):
        """
        in this funtion we transform the data from the corrdinate system to gps
        """
        longitd, latitud = transform(inProj,outProj,x,y)
        return longitd, latitud
    
    
    keep_in = ['dim_incident_id','st_x', 'st_y', 'dim_incident_incident_type', 'inc_dim_object_naam', 
               'dim_incident_start_datumtijd', 'dim_incident_eind_datumtijd', 'dim_prioriteit_prio']
    
    keep_dep = ['hub_incident_id', 'inzet_uitgerukt_datumtijd', 'inzet_terplaatse_datumtijd', 'voertuig_groep',
            'kazerne_groep', 'inzet_kazerne_naam']
    
    df_in = df_in[keep_in]
    df_dep = df_dep[keep_dep]
    
    M = df_dep.merge(df_in, left_on='hub_incident_id', right_on='dim_incident_id', how = 'inner')
    
    M['inzet_kazerne_naam'] = M['inzet_kazerne_naam'].apply(lambda x: x.lower())
    locations['kazerne'] = locations['kazerne'].apply(lambda x: x.lower())
    
    M = locations.merge(M, left_on='kazerne', right_on='inzet_kazerne_naam', how = 'inner')
    return M

M = pre_process_data(df_in, df_dep, locations)
M.head()


data issues:
(a) no all the stations have dispatch time
(b) no all the stations (kazerne) are in the deploy dataset (inzet_kazerne_naam) ['anton', 'dirk', 'hendrik', 'ijsbrand', 'nico', 'osdorp', 'pieter', 'teunis', 'victor', 'willem', 'zebra']



In [None]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

M['haversine_distance (Km)'] = np.vectorize(haversine)(M['lon'], M['lat'], M['st_y'], M['st_x'])/1000
M.T

In [None]:
# to transform latitud and longitud 