In [7]:
import numpy as np
import pandas as pd
import geopandas as gpd
import geopy as gpy
from geopy.extra.rate_limiter import RateLimiter
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from collections import OrderedDict

## Incident data

In [19]:
def rawData(address = './data/ambulance/virginiaBeach_ambulance_timeData.csv'):
    data = pd.read_csv(address)

    data['CallDateTime'] = pd.to_datetime(data['Call Date and Time'], format = "%Y-%m-%d %H:%M:%S")
    data['EntryDateTime'] = pd.to_datetime(data['Entry Date and Time'], format = "%Y-%m-%d %H:%M:%S")
    data['DispatchDateTime'] = pd.to_datetime(data['Dispatch Date and Time'], format = "%Y-%m-%d %H:%M:%S")
    data['EnRouteDateTime'] = pd.to_datetime(data['En route Date and Time'], format = "%Y-%m-%d %H:%M:%S")
    data['OnSceneDateTime'] = pd.to_datetime(data['On Scene Date and Time'], format = "%Y-%m-%d %H:%M:%S")
    data['CloseDateTime'] = pd.to_datetime(data['Close Date and Time'], format = "%Y-%m-%d %H:%M:%S")

    data['Country'] = 'USA'
    data['Address'] = data['Block Address'].str.cat([
        pd.Series(', ', index = data.index),
        data['City'], 
        pd.Series(', ', index = data.index),
        data['State'], 
        pd.Series(', ', index = data.index),
        data['Country']
    ], join="left")

    data['DispatchTime'] = (data['DispatchDateTime'] - data['CallDateTime']).astype("timedelta64[s]")
    data['EnRouteTime'] = (data['EnRouteDateTime'] - data['CallDateTime']).astype("timedelta64[s]")
    data['TravelTime'] = (data['OnSceneDateTime'] - data['EnRouteDateTime']).astype("timedelta64[s]")
    data['ResponseTime'] = (data['OnSceneDateTime'] - data['CallDateTime']).astype("timedelta64[s]")
    data['HourInDay'] = data['CallDateTime'].dt.hour
    data['DayOfWeek'] = data['CallDateTime'].dt.dayofweek
    
    return data

def addOrigin(data, rescueAddress):
    rescue = pd.read_csv(rescueAddress) 
    rescue = gpd.GeoDataFrame(rescue, geometry = gpd.points_from_xy(rescue['lon'], rescue['lat']))
    data = data[data['Rescue Squad Number'].isin(rescue.Number.to_list())]
    data = data.merge(rescue, how = 'left', left_on = 'Rescue Squad Number', right_on = 'Number')
    
    data = gpd.GeoDataFrame(data)
    data = data.set_index('CallDateTime')
    data = data.sort_index()
    
    data['geometry'] = gpd.GeoSeries(data['geometry'], crs = 'EPSG:4326', index = data.index)
    return data

def geoCoding(data):
    locator = gpy.geocoders.ArcGIS()
    geocode = RateLimiter(locator.geocode, min_delay_seconds = 0.1)

    data['IncidentFullInfo'] = data['Address'].apply(geocode)
    data['IncidentCoor'] = data['IncidentFullInfo'].apply(lambda loc: tuple(loc.point) if loc else None)
    data['IncidentFullInfo'] = data['IncidentFullInfo'].astype(str)
    data[['IncidentLat', 'IncidentLon', 'IncidentElevation']] = pd.DataFrame(data['IncidentCoor'].tolist(), index = data.index)
    data['IncidentPoint'] = gpd.GeoSeries(gpd.points_from_xy(y = data.IncidentLat, x = data.IncidentLon), index = data.index, crs = "EPSG:4326")
    
    return data

def organizeData(data):
    data['CallDateTime'] = data.index
    data = data.reset_index(drop = True)
    data = data.loc[:, [
        'Call Priority',
        'CallDateTime', 'EntryDateTime', 'DispatchDateTime', 'EnRouteDateTime', 'OnSceneDateTime', 'CloseDateTime',
        'DispatchTime', 'EnRouteTime', 'TravelTime', 'ResponseTime', 'HourInDay', 'DayOfWeek', 
        'Rescue Squad Number', 'geometry', 
        'Address', 'IncidentFullInfo', 'IncidentPoint',] ]
    data = data.rename(columns = {"geometry": "RescueSquadPoint", 
                                  "Address": "IncidentAddress", 
                                  'Rescue Squad Number': 'RescueSquadNumber',
                                  'Call Priority': 'CallPriority'})
    data.set_geometry("IncidentPoint")
    return data

def string2Points(column, crs, index):
    x = [float(location.replace('POINT (', '').replace(')', '').split(' ')[0]) for location in list(data[column].values)]
    y = [float(location.replace('POINT (', '').replace(')', '').split(' ')[1]) for location in list(data[column].values)]
    return gpd.GeoSeries(gpd.points_from_xy(x = x, y = y), crs = crs, index = index)


In [21]:
# preprocess and save data
data = rawData('./data/ambulance/virginiaBeach_ambulance_timeData.csv')
data = addOrigin(data, './data/rescueTeamLocation/rescueStations.txt')
data = data.loc['2015-01-01' : '2015-12-31', :]
data = geoCoding(data)
data = organizeData(data)
data.to_csv('./data/geocoded_saved/20150101-20151231.csv', index = False)

In [3]:
# reload data and build geoDataFrame
data = pd.read_csv('./data/geocoded_saved/20160101-20161015.csv', index_col = 'CallDateTime')
data.index = pd.to_datetime(data.index, format = "%Y-%m-%d %H:%M:%S")
data = gpd.GeoDataFrame(data)
data['RescueSquadPoint'] = string2Points('RescueSquadPoint', "EPSG:4326", data.index)
data['IncidentPoint'] = string2Points('IncidentPoint', "EPSG:4326", data.index)

# Geographical data

In [8]:
import rasterio
from rasterio.plot import show
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import rasterio.mask
import pandas as pd
from shapely.geometry import Polygon, Point
import warnings
import networkx as nx
import momepy
import libpysal
import math
import contextily as cx

In [9]:
# visualzation
def showRoadsInundation(inundation):
    fig = plt.figure(figsize = (100, 50))
    ax = fig.add_subplot()
    ax = show(inundation, ax = ax, cmap = 'pink')
    roads.plot(ax = ax)
    plt.show()

def showInundation(inundation):
    plt.imshow(inundation.read()[0], cmap = 'hot')
    plt.colorbar()
    plt.show()
    
def showBridgesInundation():
    fig = plt.figure(figsize = (100, 50))
    ax = fig.add_subplot()
    ax = show(inundation, ax = ax, cmap = 'pink')
    bridges.plot(ax = ax)
    plt.show()

def showLinesSurfaces_withBounds(roads, roadSurfaces, bounds):
    fig = plt.figure(figsize = (100, 50))
    ax = fig.add_subplot()
    roadsBounded = roads.cx[bounds[0]: bounds[1], bounds[2]: bounds[3]]
    roadSurfacesBounded = roadSurfaces.cx[bounds[0]: bounds[1], bounds[2]: bounds[3]]
    roadsBounded.plot(ax = ax, color='red')
    roadSurfacesBounded.plot(ax = ax)
    plt.show()    

def showMidpointLineSurface(roads, roadSurfaces):
    fig = plt.figure(figsize = (400, 200))
    ax = fig.add_subplot()
    roads.line.plot(ax = ax, linewidth = .75, zorder = 0)
    # roads.midpoint.plot(ax = ax, zorder = 0)
    roadSurfaces.geometry.plot(ax = ax, color = 'red', zorder = 0)
    plt.show()

# utilites
def getGlobalBounds(gpd):
    # get global bounds of a geopandas df
    xmin = gpd.bounds.minx.min()
    xmax = gpd.bounds.maxx.max()
    ymin = gpd.bounds.miny.min()
    ymax = gpd.bounds.maxy.max()    
    return xmin, xmax, ymin, ymax

def getMiddleBounding(bounds, percent = 0.05):
    xmin = bounds[0]
    xmax = bounds[1]
    ymin = bounds[2]
    ymax = bounds[3]
    xminNew = xmin + ((xmax - xmin) * ((1 - percent) / 2))
    xmaxNew = xminNew + (xmax - xmin) * percent
    yminNew = ymin + ((ymax - ymin) * ((1 - percent) / 2))
    ymaxNew = yminNew + (ymax - ymin) * percent   
    return xminNew, xmaxNew, yminNew, ymaxNew

In [10]:
def moveDuplicates(joined):
    # move duplicates, keep the row with higher width
    u, c = np.unique(joined.OBJECTID_left.values, return_counts = True)
    duplicates = u[c > 1]
    joined_noDuplicates = joined.copy()
    for dup in duplicates:
        du = joined[joined.OBJECTID_left == dup]
        joined_noDuplicates = joined_noDuplicates[joined_noDuplicates.OBJECTID_left != dup]
        duOne = du[du.aveWidth == du.aveWidth.max()]
        joined_noDuplicates = pd.concat([joined_noDuplicates, duOne])
    return joined_noDuplicates.sort_values(by = ['OBJECTID_left'])

def createSurface4roads(roads, roadSurfaces):
    # USE: create a geoDataFrame containing the column of average width and full polygon (might include multiple road segments) for each road
    # spatial join road lines and surfaces
    if roads.crs != roadSurfaces.crs:
        return 'crs not consistent'
    roadSurfaces['aveWidth'] = roadSurfaces.Shapearea / roadSurfaces.Shapelen
    roads['midpoint'] = roads.geometry.interpolate(0.5, normalized = True)
    roads = roads.set_geometry("midpoint", crs = roadSurfaces.crs)
    roads = roads.rename(columns = {"geometry": "line"})
    joined = roads.sjoin(roadSurfaces, how = "left", predicate = 'within')
    # move duplicates/nan 
    joined_updated = moveDuplicates(joined)
    joined_updated.loc[np.isnan(joined_updated.aveWidth), ['aveWidth']] = joined_updated.aveWidth.mean() # assign width to missing roads
    # attach roadSurface polygons
    joined_updated['OBJECTID_right'] = joined_updated.OBJECTID_right.astype('Int64')
    roadSurfaces_temp = roadSurfaces[['OBJECTID', 'geometry']].rename({'OBJECTID': 'OBJECTID_right', 'geometry': 'surfacePolygon'}, axis = 1)
    roadSurfaces_temp.loc[len(roadSurfaces_temp)] = [np.nan, Polygon()]
    roadSurfaces_temp.OBJECTID_right = roadSurfaces_temp.OBJECTID_right.astype('Int64')
    joined_updated = joined_updated.merge(roadSurfaces_temp, how = 'left', on = 'OBJECTID_right')
    joined_updated = joined_updated.set_geometry('surfacePolygon').set_crs(roadSurfaces.crs)
    return joined_updated

roads = gpd.read_file('./data/roads/Streets.shp')
roads = roads.loc[-roads['geometry'].duplicated(), :]
roads['OBJECTID'] = list(range(1, len(roads) + 1))
roads = roads.reset_index(drop = True)

roadSurfaces = gpd.read_file('./data/roads/Road_Surfaces.shp')
surfaces4roads = createSurface4roads(roads, roadSurfaces)

# make buffer for lines
scale = 2.7
roads['aveWidth'] = surfaces4roads.aveWidth
roads['scaledRadius'] = roads['aveWidth'] / 2 * scale
roads['buffers'] = roads.geometry.buffer(roads['scaledRadius'])
roads['buffersUnscaled'] = roads.geometry.buffer(roads['aveWidth'] / 2 * 1.5) # may be some errors in raw data, roads look good when multiply by 1.5 
roads = roads.rename(columns = {"geometry": "line"})
roads = roads.set_geometry('buffers', crs = roadSurfaces.crs)

# intersect buffer with roadSurface polygon
roads['surface'] = [road.intersection(surface) if not road.intersection(surface).is_empty else roadUnscaled \
                            for road, surface, roadUnscaled in zip(roads.geometry, surfaces4roads.geometry, roads.buffersUnscaled)]
roads = roads.set_geometry('surface', crs = roadSurfaces.crs)

In [12]:
def inundationCutter(inundation, cut, all_touched, invert, addr = './data/inundation/croppedByBridge/croppedByBridge.tif'):
    if inundation.crs != cut.crs:
        return 'crs not consistent'
    # mask the inundation using bridges shp, remove the inundation under bridges
    out_array, _ = rasterio.mask.mask(inundation, cut.geometry, all_touched = all_touched, invert = invert)
    inundation_cropped = rasterio.open(
        addr,
        'w+',
        **inundation.meta
    )
    inundation_cropped.write(out_array)
    return inundation_cropped

def getMaxWaterDepth(roadGeometry, inundation):
    # roadGeometry should be series, inundation is raster
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        inundationOnRoad, _ = rasterio.mask.mask(inundation, roadGeometry)
        inundationOnRoad = np.where(inundationOnRoad == inundation.nodata, - inundationOnRoad, inundationOnRoad)
    return np.max(inundationOnRoad)


# determine disruption on road network (take one hour as example)
inundation = rasterio.open('./data/inundation/tifData/depth_objID_35.tif')
roads_updated_4getInundation = roads.copy().to_crs(str(inundation.crs))
inundation_cutByRoads = inundationCutter(inundation, roads_updated_4getInundation, False, False, './data/inundation/croppedByRoads/croppedByRoads.tif')

roads['waterDepth'] = roads_updated_4getInundation.loc[:, ['surface']] \
    .apply(lambda x: getMaxWaterDepth(x, inundation_cutByRoads), axis = 1, raw = True).replace(-inundation.nodata, 0)

In [None]:
# consider bridges in road network (PENDING)
# bridges = gpd.read_file('./data/bridges/bridgePolygon.shp').to_crs(str(inundation.crs))
# inundation_cropped = inundationCutter(inundation, bridges, True, True)

In [13]:
# create graph
def showGraphRoads(roads4graph, graph):
    f, ax = plt.subplots(1, 3, figsize = (100, 50), sharex = True, sharey = True)
    for i, facet in enumerate(ax):
        facet.set_title(("Streets", "Primal graph", "Overlay")[i])
        facet.axis("off")

    roads4graph.plot(color='#e32e00', ax = ax[0])
    nx.draw(graph, {key: [value.x, value.y] for key, value in nx.get_node_attributes(graph, 'midpoint').items()}, ax = ax[1], node_size = 1)
    roads4graph.plot(color = '#e32e00', ax = ax[2], zorder = -1)
    nx.draw(graph, {key: [value.x, value.y] for key, value in nx.get_node_attributes(graph, 'midpoint').items()}, ax = ax[2], node_size = 1)


roads4graph = roads.copy()
roads4graph['geometry'] = roads4graph['line'].to_crs(roads4graph.line.crs)
roads4graph = roads4graph.set_geometry("geometry")
graph = momepy.gdf_to_nx(roads4graph, approach = 'dual', multigraph = False, angles = False)
graph = nx.relabel_nodes(graph, nx.get_node_attributes(graph, 'OBJECTID'))
for edge in graph.edges():
    graph[edge[0]][edge[1]]['weight'] = (graph.nodes[edge[0]]['SHAPElen'] + graph.nodes[edge[1]]['SHAPElen']) / 2
# showGraphRoads(roads4graph, graph)

# NOTE: the graph is un-directed right now, the logic should be checked if changed to directed

In [14]:
def addPathLen2Graph(graph, rescue, weight, newAttribute_rescueSquad, newAttribute_path):
    # some roads are disconnected from all the rescue station even in normal time (as the raw data indicates)
    voronoi = nx.voronoi_cells(graph, set(rescue.OBJECTID_nearestRoad.unique()), weight = weight)
    for rescueSquad, destinations in zip(voronoi.keys(), voronoi.values()):
        if rescueSquad == 'unreachable':
            print(len(destinations), 'nodes are unreachable when building voronoi for', newAttribute_path)
            for des in destinations:
                graph.nodes[des][newAttribute_rescueSquad] = np.nan
                graph.nodes[des][newAttribute_path] = math.inf # set path len to inf if it's disconnected from rescues
#                 print('NOTE: node', des, 'is unreachable when building voronoi for', newAttribute_path)
        else:
            for des in destinations:
                shortestPath = nx.shortest_path_length(graph, source = rescueSquad, target = des, weight = weight)
                graph.nodes[des][newAttribute_path] = shortestPath
                graph.nodes[des][newAttribute_rescueSquad] = rescueSquad
                if shortestPath == 0:
                    graph.nodes[des][newAttribute_path] = 1
                if shortestPath == math.inf:
                    graph.nodes[des][newAttribute_rescueSquad] = math.inf
    return graph, voronoi

def addDisruption(graph, roads, newAttribute = 'weightWithDisruption', threshold = 3):
    nx.set_edge_attributes(graph, nx.get_edge_attributes(graph, "weight"), newAttribute)
    disruptedRoads = roads[roads['waterDepth'] >= threshold]['OBJECTID'].to_list()
    for disruption in disruptedRoads:
        for edge in graph.edges(disruption):
            graph.edges()[edge][newAttribute] = math.inf # set edge weight to inf if it's disrupted by inundation
    return graph

def changeValue4DisruptedRoad(roads, graph, threshold = 3):
    # the disrupted road itself is not disconnected, so assign the shortestPath of adjancent road to this road
    for disruption in roads[roads['waterDepth'] >= threshold]['OBJECTID'].to_list():
        pathLen = []
        edgeNum = []
        for edge in graph.edges(disruption):
            pathLen.append(graph.nodes()[edge[1]]['shortestPathLenWithDisruption'])
            edgeNum.append(edge[1])
        if pathLen != []: # in case there are disconnected single node
            graph.nodes()[disruption]['shortestPathLenWithDisruption'] = min(pathLen)
            if min(pathLen) != math.inf:
                graph.nodes()[disruption]['rescueAssignedWithDisruption'] = edgeNum[pathLen.index(min(pathLen))]
            else:
                graph.nodes()[disruption]['rescueAssignedWithDisruption'] = np.nan
    return graph


# read the location of rescue squads and attach them to nodes 
rescue = pd.read_csv('./data/rescueTeamLocation/rescueStations.txt') 
rescue = gpd.GeoDataFrame(rescue, geometry = gpd.points_from_xy(rescue['lon'], rescue['lat'])).set_crs('EPSG:4326').to_crs(roads.crs) 
rescue['OBJECTID_nearestRoad'] = rescue.geometry.apply(lambda x: x.distance(roads.line).sort_values().index[0] + 1)

In [15]:
# calculate ratios
graph, _ = addPathLen2Graph(graph, rescue, 'weight', 'rescueAssigned', 'shortestPathLen')
graphDisrupted = addDisruption(graph, roads, threshold = 1)
graph, _ = addPathLen2Graph(graphDisrupted, rescue, 'weightWithDisruption', 'rescueAssignedWithDisruption', 'shortestPathLenWithDisruption') 
graph = changeValue4DisruptedRoad(roads, graph, threshold = 1)

nx.set_node_attributes(graph, 
                       {x[0]: y[1]/x[1] if y[1]/x[1] != math.inf else np.nan \
                        for x, y in zip(nx.get_node_attributes(graph, "shortestPathLen").items(), 
                                        nx.get_node_attributes(graph, "shortestPathLenWithDisruption").items() ) },
                       'travelTimeIncreaseRatio')
roads['travelTimeIncreaseRatio'] = roads['OBJECTID'].map(nx.get_node_attributes(graph, "travelTimeIncreaseRatio"))

81 nodes are unreachable when building voronoi for shortestPathLen
81 nodes are unreachable when building voronoi for shortestPathLenWithDisruption


# Incident and geographical info intersection

In [147]:
def assignGraphEdge(data, roads, inColumn, outColumn1, outColumn2, max_distance = 500):
    # NOTE: there could be null value if no road is within the scope of search for a location
    roadLines = roads.loc[:, ['OBJECTID', 'line']].set_geometry('line')
    locations = data.loc[:, [inColumn]].set_geometry(inColumn).to_crs(roadLines.crs)
    match = locations.sjoin_nearest(roadLines, how = 'left', max_distance = max_distance, distance_col = 'distance')
    match = match.reset_index().drop_duplicates(subset = ['CallDateTime']).set_index('CallDateTime')
    data[outColumn1] = match['OBJECTID']
    data[outColumn2] = match['distance']
    return data

def nearestRescue4Incidents(data, rescue):
    # find nearest rescues for all incidents
    incidents = data.DestinationID.values
    voronoi = nx.voronoi_cells(graph, set(rescue.OBJECTID_nearestRoad.unique()), weight = 'weight')
    nearestRescue = []
    for incident in incidents:
        len1 = len(nearestRescue)
        if np.isnan(incident):
            nearestRescue.append(np.nan)
        else:
            for key, value in voronoi.items():
                if int(incident) in list(value):
                    if key == 'unreachable':
                        nearestRescue.append(np.nan)
                    else:
                        nearestRescue.append(key)
                    break 
        len2 = len(nearestRescue)
        if len2 == len1:
            print(incident, 'not in any')
    return nearestRescue

def generateDistDf(rescue, graph):
    nodeList = range(1, len(list(graph.nodes())) + 1)
    df = pd.DataFrame(nodeList, index = nodeList, columns =['NodeNames'])
    for res in rescue.values:
        resName = res[0]
        resRoadNumber = res[-1]
        distanceDict = nx.single_source_dijkstra_path_length(graph, resRoadNumber, weight='weight')
        orderedResRoadNumber = OrderedDict(sorted(distanceDict.items()))
        orderedResRoadNumberDf = pd.DataFrame.from_dict(orderedResRoadNumber, orient = 'index', columns = ['from' + resName])
        orderedResRoadNumberDf = orderedResRoadNumberDf.reset_index()
        df = df.merge(orderedResRoadNumberDf, how = 'left', left_on = 'NodeNames', right_on = 'index').drop(columns = 'index')
    return df

def obedianceOfShortestPrinciple(Series, distanceDataFrame):
    DestinationID = Series.DestinationID
    RescueSquadNumber = Series.RescueSquadNumber
    if len(distanceDataFrame[distanceDataFrame.NodeNames == DestinationID]) != 0:
        # NOTE:some incidents are not considered because no road around them
        allDist = list(np.sort(distanceDataFrame[distanceDataFrame.NodeNames == DestinationID].values[0][1:]))
        realDist = distanceDataFrame[distanceDataFrame.NodeNames == DestinationID]['from' + RescueSquadNumber].values[0]
        if np.isnan(realDist):
            # NOTE: some roads are disconnected even in normal time
            realDistRank = np.nan
            realDistIncreaseRatio = np.nan
        else:
            realDistRank = allDist.index(realDist) + 1
            if allDist[0] == 0:
                # NOTE: in case the incident is just beside the rescue station, set the dist to 1
                allDist[0] = 1
            realDistIncreaseRatio = realDist / allDist[0]
    else:
        realDistRank = np.nan
        realDistIncreaseRatio = np.nan
    return realDistRank, realDistIncreaseRatio

def shortestRouteLength_slow(row, graph, ifPrintError = False):
    try:
        length = nx.dijkstra_path_length(graph, row.OriginRoadID, row.DestinationID, weight = 'weight')
    except BaseException as ex:
        if ifPrintError == True:
            print(ex)
        length = np.nan
    return length

def shortestRouteLength(s, distanceDataFrame):
    if np.isnan(s.DestinationID):
        return np.nan
    else:
        return distanceDataFrame[distanceDataFrame.NodeNames == s.DestinationID]['from' + s.RescueSquadNumber].values[0]

In [148]:
# assign records to graph edges
data = assignGraphEdge(data, roads, 'RescueSquadPoint', 'OriginRoadID', 'Origin2RoadDist')
data = assignGraphEdge(data, roads, 'IncidentPoint', 'DestinationID', 'Destination2RoadDist')

# # find nearest rescue station
# data['NearestRescue'] = nearestRescue4Incidents(data, rescue)
# data = data.merge(rescue.loc[:, ["OBJECTID_nearestRoad", "Number"]], how = 'left', left_on = "NearestRescue", right_on = 'OBJECTID_nearestRoad')
# data = data.drop(columns = 'OBJECTID_nearestRoad').rename(columns={"Number": "NearestRescueNumber"})

# find the top nearest rescus stations
distanceDataFrame = generateDistDf(rescue, graph)
obediance = data.apply(obedianceOfShortestPrinciple, distanceDataFrame = distanceDataFrame, axis = 1, result_type = 'expand')
data['NearestOrder'] = obediance[0]
data['DisobediancePathIncrease'] = obediance[1]

# calculate shortest path length and ave speed
data['AssumedRouteLength'] = data.apply(shortestRouteLength, distanceDataFrame = distanceDataFrame, axis = 1)
data['AverageSpeed'] = data['AssumedRouteLength'] / data['TravelTime']

In [27]:
# build adjencency matrix for rescue stations


## Descriptive analysis

In [317]:
# basic
display(data.loc[:, ['DispatchTime', 'EnRouteTime', 'TravelTime', 'ResponseTime']].mean())
display(data['RescueSquadNumber'].value_counts())
display(data['CallPriority'].value_counts())
display(data['DayOfWeek'].value_counts())
display(data['HourInDay'].value_counts())

In [33]:
def incidentMap(data, timeSelectStart, timeSelectEnd, sizeMax, ifLog):
    # general spatial dist of incidents
    if ifLog == True:
        data['LogResponseTime'] = np.log2(data['ResponseTime'])
        colorData = "LogResponseTime"
        range_color = [0, 20]
    elif ifLog == False:
        colorData = "ResponseTime"
        range_color = [0, 45000]
    px.set_mapbox_access_token(open("mapboxToken.txt").read())
    dataSelected = data.loc[timeSelectStart: timeSelectEnd, :].dropna()
    fig = px.scatter_mapbox(lat = dataSelected.IncidentPoint.y, lon = dataSelected.IncidentPoint.x, color = dataSelected.ResponseTime,
                            color_continuous_scale = px.colors.sequential.Sunsetdark, range_color = range_color, 
                            size = dataSelected.ResponseTime,
                            size_max = sizeMax, 
                            zoom = 9.5, width = 750, height = 500)
    return fig

def responsTimeScatter(data):
    # show the surge of the response time
    fig = go.Figure(data = go.Scatter(x = data.index, y = data['ResponseTime'], mode='markers', marker_color = data['ResponseTime'],)) 
    fig.update_layout(xaxis_title = "Datatime", yaxis_title = "Response time (s)",)
    return fig

def processingTimeProportionDist(data):
    df = pd.DataFrame((data.EnRouteTime / data.ResponseTime))
    df = df.rename(columns = {0: "Proportion of Preparation Time"})
    fig = px.histogram(df, x = "Proportion of Preparation Time", 
                        nbins = 75, template = 'seaborn', histnorm = 'probability', opacity = 0.75,
                        width = 700, height = 500)
    fig.update_layout(yaxis_title = 'Probability')
    return fig

def proximityOrderDist(data):
    df = data.copy()
    df['Flooding'] = 'Normal'
    df.loc['2016-10-08 11:59:59': '2016-10-09 23:59:59', ['Flooding']] = 'Flooding'
    fig = px.histogram(df[df.NearestOrder < 10], 
                 x = "NearestOrder", 
    #              color = "Flooding", 
                 template = 'seaborn', 
                 histnorm = 'probability', 
                 barmode = "overlay",
                 opacity = 0.75, 
                 width = 700, height = 500,
                )
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Proximity Order of Origins')
    return fig

def distanceIncreaseRatioDist(data):
    fig = px.histogram(data[(data.DisobediancePathIncrease > 1)], 
                 x = "DisobediancePathIncrease", 
                 barmode = "overlay",
                 template = 'seaborn', histnorm = 'probability', 
                 opacity = 0.75, 
                 width = 700, height = 500,)
    fig.update_traces(xbins = dict(start = 1, end = 2, size = 0.1))
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Travel Distance Increase Percentage')
    return fig

def responseTimeWithCallPriorityDist(data):
    fig = px.histogram(data[data.ResponseTime < 5000], 
                 x = "ResponseTime", 
                 color = "CallPriority", 
                 barmode = "overlay",
                 template = 'seaborn', 
                 histnorm = 'probability', 
                 opacity = 0.75, 
                 width = 700, height = 500,
                 nbins = 200, 
                )
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Response Time')
    return fig

def averageSpeedPercentStd(data):
    # np.histogram(data.groupby(['OriginRoadID', 'DestinationID']).count().AverageSpeed.values, 10, range = (5, 100)) # keep about 25% of OD when set freqencey above 5
    dataSelect = data.loc[:, ['OriginRoadID', 'DestinationID', 'AverageSpeed']]
    groupByODCount = dataSelect.groupby(['OriginRoadID', 'DestinationID']).count() # any columns indicates count
    groupByODSpeed = dataSelect.groupby(['OriginRoadID', 'DestinationID']).mean()[groupByODCount.AverageSpeed >= 5].rename(columns = {'AverageSpeed': 'AverageSpeed_mean'})
    groupByODSpeed['AverageSpeed_std'] = dataSelect.groupby(['OriginRoadID', 'DestinationID']).std()[groupByODCount.AverageSpeed >= 5]
    groupByODSpeed['AverageSpeed_stdPercent'] = groupByODSpeed['AverageSpeed_std'] / groupByODSpeed['AverageSpeed_mean']
    df = groupByODSpeed['AverageSpeed_stdPercent'].reset_index()
    OriginNum = df.OriginRoadID
    for origin, num in zip(pd.unique(df.OriginRoadID), range(1, pd.unique(df.OriginRoadID).shape[0] + 1)):
        OriginNum = OriginNum.replace(origin, num)
    df['OriginNum'] = OriginNum
    fig = px.box(df, x = 'OriginNum', y = "AverageSpeed_stdPercent", template = 'seaborn', width = 500, height = 750, range_y = (0, 6.5), points = 'suspectedoutliers')
    fig.update_layout(yaxis_title = 'Average Speed Percentage Standard Deviation', xaxis_title = 'Rescue Squad Number')
    return fig

def showWaterOnRoads(roads, figsize = (100, 50), vmax = 6):
    fig, ax = plt.subplots(figsize = figsize)
    roadsLineWater = roads.loc[:, ['line', 'waterDepth']].set_geometry('line')
    ax = roadsLineWater.plot(ax = ax, 
                        column = 'waterDepth', 
                        zorder = 5, 
                        cmap = 'OrRd',
                        legend = True,
                        vmax = vmax,
                       )
    cx.add_basemap(ax, crs = roads.crs, source = cx.providers.CartoDB.Positron)
    ax.set_axis_off()
    
def showTravelUpRatioOnRoads(roads, figsize = (100, 50), vmax = 10):
    fig, ax = plt.subplots(figsize = figsize)
    roadsLineWater = roads.loc[:, ['line', 'travelTimeIncreaseRatio']].set_geometry('line')
    ax = roadsLineWater.plot(ax = ax, 
                        column = 'travelTimeIncreaseRatio', 
                        zorder = 5, 
                        cmap = 'OrRd',
                        legend = True,
                        vmax = vmax,
                        vmin = 1,
                       )
    cx.add_basemap(ax, crs = roads.crs, source = cx.providers.CartoDB.Positron)
    ax.set_axis_off()
    

In [34]:
# processingTimeProportionDist(data)
# proximityOrderDist(data)
# distanceIncreaseRatioDist(data)
# responseTimeWithCallPriorityDist(data)
# averageSpeedPercentStd(data)

# incidentMap(data, '2016-10-08', '2016-10-09', 16, False)
# responsTimeScatter(data)

# showWaterOnRoads(roads, (50, 25), 6)
# showTravelUpRatioOnRoads(roads, (20, 12), 10)

In [337]:
dataGroupByHour = data.groupby('HourInDay').mean()
dataGroupByDayOfWeek = data.groupby('DayOfWeek').mean()

fig5 = px.bar(dataGroupByHour.reset_index(), y = 'ResponseTime', x = 'HourInDay', text_auto='.3s',)
fig5.show()

dataGroupByDayOfWeek['DayOfWeek'] = ['Mon.', 'Tue.', 'Wed.', 'Thu.', 'Fri.', 'Sat.', 'Sun.']
fig6 = px.bar(dataGroupByDayOfWeek, y = 'ResponseTime', x = 'DayOf Week', text_auto = '.3s',)
fig6.show()


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [None]:
dataGroupByHour['Time period'] = ['0AM - 3AM', '0AM - 3AM', '0AM - 3AM',
                                  '3AM - 6AM', '3AM - 6AM', '3AM - 6AM', 
                                  '6AM - 9AM', '6AM - 9AM', '6AM - 9AM',
                                  '9AM - 12PM', '9AM - 12PM', '9AM - 12PM',
                                  '12PM - 3PM', '12PM - 3PM', '12PM - 3PM',
                                  '3PM - 6PM', '3PM - 6PM', '3PM - 6PM', 
                                  '6PM - 9PM', '6PM - 9PM','6PM - 9PM',
                                  '9PM - 12PM', '9PM - 12PM', '9PM - 12PM',] 

dataGroupByTimePeriod = dataGroupByHour.groupby('Time period').mean()
fig = px.bar(dataGroupByTimePeriod.reset_index(), y = 'Response Time', x = 'Time period', text_auto='.3s',)
fig.show()

In [None]:
fig6 = go.Figure()
hourList  = range(0, 24)
for hour in hourList:
    fig6.add_trace(go.Box(
        y = dataProcessed[:'2016-10-08'].loc[lambda row: row.index.hour == hour]['Response Time'].tolist(),
        name = 'Hour ' + str(hour),
        jitter=0.3,
        pointpos=-1.8,
        boxpoints=False, # represent all points
        marker_color='rgb(7,40,89)',
        line_color='rgb(7,40,89)'
    ))
fig6.show()

# Training and prediction

In [None]:
# preparation time prediction








In [None]:
# resample data
def makeODmatrix(dataOneHourIndex, timeType = 'ResponseTime'):
    dataOneHour = data.loc[dataOneHourIndex.index, :]
    dataOneHour = dataOneHour.loc[:, ['OriginRoadID', 'DestinationID', timeType]]
    dataOneHour = dataOneHour.groupby(by = ['OriginRoadID', 'DestinationID'], dropna = True).mean()
    
    ODmatrix_df = pd.DataFrame(index = rescue.OBJECTID_nearestRoad.values, columns = roads.OBJECTID.values)
    for indexes in dataOneHour.index:
        ODmatrix_df.loc[int(indexes[0]), int(indexes[1])] = dataOneHour.loc[indexes].values[0]
    return ODmatrix_df.to_numpy()

dataByHour = data.resample(pd.Timedelta(1, "hour"), closed = 'left', label = 'left', origin = 'end_day').apply(makeODmatrix)

In [306]:
dataByHour

CallDateTime
2016-01-01 00:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 01:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 02:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 03:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 04:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
                                             ...                        
2016-10-15 19:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 20:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 21:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 22:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 23:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
Freq: H, Length: 6936, dtype: object

In [30]:
print(torch.__version__)
print(torch.version.cuda)

1.13.1+cu117
11.7


# Experiments

In [None]:
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(30, 7.5))
t = range(len(dataTest_y_Flood))
ax.plot(t, predictions, label = 'prediction')
ax.plot(t, dataTest_y_Flood, label = 'ground truth')
ax.set_xlabel('Incident', fontsize = 20)
ax.set_ylabel('If accessible', fontsize = 20)
ax.legend(fontsize = 20)
ax.tick_params(axis='both', labelsize = 15)
#ax.set_title('Prediction vs Ground truth', fontsize = 25)

In [None]:
from sklearn.metrics import confusion_matrix , classification_report
print(classification_report(dataTest_y_Flood, predictions))

In [None]:
def calculateWaste (row):
    if row['If accessible Real'] == 1 and row['If accessible Predicted'] == 0:
        return 1
    else:
        return 0

def calculateUnknownDanger (row):
    if row['If accessible Real'] == 0 and row['If accessible Predicted'] == 1:
        return 1
    else:
        return 0    
    
dataFlood = dataProcessed.loc['2016-10-09' : '2016-10-09']
dataFlood['If accessible Real'] = dataFlood['Accessibility'].astype('int64')
dataFlood['If accessible Predicted'] = [element[0] for element in predictions]
dataFlood['Waste'] = dataFlood.apply(calculateWaste, axis = 1)
dataFlood['Unknown Danger'] = dataFlood.apply(calculateUnknownDanger, axis = 1)
dataFlood['Error Type'] = (dataFlood['Waste'] + dataFlood['Unknown Danger'] * 2).astype('string') # 1 mean wastes, 2 means potential danger
dataFlood['Error Type'] = dataFlood['Error Type'].replace('1', 'Type 1').replace('2', 'Type 2')
pd.set_option('display.max_rows', 20)
display(dataFlood)

# visualization
px.set_mapbox_access_token(open("mapboxToken.txt").read())
fig1 = px.scatter_mapbox(dataFlood.loc[lambda df: df['Error Type'] != '0'], 
                        lat="latitude", lon="longitude",  
                        color = "Error Type", #size = "Response Time",
                        size_max = 15, zoom = 10, width = 575, height = 500)
fig1.show()

fig2 = px.scatter_mapbox(dataFlood.loc[lambda df: (df['Error Type'] != '0') &
                                      (df['Error Type'] != '1')], 
                        lat="latitude", lon="longitude",  
                        color = "Error Type", size = "Response Time",
                        size_max = 30, zoom = 10, width = 550, height = 500)
fig2.update_layout(showlegend=False)
fig2.show()

In [None]:
# import
import geopandas as gpd
from shapely.geometry import Point
shp = gpd.read_file('./data/VB_City_Boundary.geojson')
shp.crs = 'CRS84'

# generate all points
numOfPointsOneDimX = 50
deltaX = shp.bounds.maxx - shp.bounds.minx
deltaY = shp.bounds.maxy - shp.bounds.miny
numOfPointsOneDimY = numOfPointsOneDimX * (deltaY / deltaX)

xCorList = np.arange(float(shp.bounds.minx), float(shp.bounds.maxx), float(deltaX / numOfPointsOneDimX))
yCorList = np.arange(float(shp.bounds.miny), float(shp.bounds.maxy), float(deltaY / numOfPointsOneDimY))
xyPointList = [Point(x, y) for x in xCorList for y in yCorList]

# select points within the city
samplePoints = gpd.GeoSeries(xyPointList)
samplePoints.crs = 'CRS84'
withinOrNot = samplePoints.within(shp['geometry'].values[0])
gdf = pd.concat([samplePoints, withinOrNot], axis = 1)
gdf.crs = 'CRS84'
gdfSelected = gdf.loc[gdf[1] == True]
display(gdfSelected)

In [None]:
# do the prediction for these points
gdfSelected['latitude'] = gdfSelected[0].values.y
gdfSelected['longitude'] = gdfSelected[0].values.x

def addTimeFeature(gdf, hourInDay, dayOfWeek):
    gdf_out = gdf.copy()
    gdf_out['Hour in Day'] = hourInDay
    gdf_out['Day of Week'] = dayOfWeek
    return gdf_out

gdfSelected_withTime = addTimeFeature(gdfSelected, 0, 6) #The day of week is 1, because it is the normalized value, flooding day is Sunday
for hour in range(23):
    gdfSelected_withTime = pd.concat([gdfSelected_withTime, addTimeFeature(gdfSelected, hour + 1, 6)])

gdfForPrediction = gdfSelected_withTime.reset_index().loc[:,['latitude', 'longitude', 'Hour in Day']]
gdfForPrediction_norm = normalizer(gdfForPrediction.copy())
gdfForPrediction_norm['Day of Week'] = 1 #The day of week is 1, because it is the normalized value, flooding day is Sunday
gdfForPrediction_norm

In [None]:
predictionsFull = model.predict(gdfForPrediction_norm.values)
predictionsFull = np.where(predictionsFull < 0.5, 0, 1).tolist()
print(len(predictionsFull))

In [None]:
dataFloodFull = gdfForPrediction.copy()
dataFloodFull['If accessible'] = [element[0] for element in predictionsFull]
dataFloodFull['If accessible'] = dataFloodFull['If accessible'].astype('string').replace('1', 'Accessible').replace('0', 'Inaccessible')
display(dataFloodFull)

# visualization
px.set_mapbox_access_token(open("mapboxToken.txt").read())
fig = px.scatter_mapbox(dataFloodFull.loc[dataFloodFull['Hour in Day'] == 23], 
                        lat = "latitude", lon = "longitude",  
                        color = "If accessible", #size = "Response Time",
                        zoom = 9.5, opacity = 0.7, width = 600, height =700)
fig.show()