In [2]:
from preprocess import *

## Data preprocessing

In [None]:
# preprocess and save data
data = rawData('./data/ambulance/virginiaBeach_ambulance_timeData.csv')
data = addOrigin(data, './data/rescueTeamLocation/rescueStations.txt')
data = geoCoding(data.loc['2013-01-01' : '2013-12-31', :], './data/geocoded_saved/20130101-20131231.csv')

In [3]:
# reload data and build geoDataFrame
data = reLoadData('./data/geocoded_saved/20160101-20161015.csv')

In [None]:
roads = readRoads('./data/roads/Streets.shp')
roads = makeSurface4Lines(roads, './data/roads/Road_Surfaces.shp', scale = 2.7)
roads = getWaterDepthOnRoads(roads, './data/inundation/tifData/depth_objID_35.tif', './data/inundation/croppedByRoads/croppedByRoads.tif') 

In [None]:
# consider bridges in road network (PENDING)
# bridges = gpd.read_file('./data/bridges/bridgePolygon.shp').to_crs(str(inundation.crs))
# inundation_cropped = inundationCutter(inundation, bridges, True, True)

In [43]:
# create graph
graph = roads2Graph(roads)
# showGraphRoads(roads, graph)
# NOTE: the graph is un-directed right now, the logic should be checked if changed to directed

# read the location of rescue squads and attach them to nodes 
rescue = readRescue('./data/rescueTeamLocation/rescueStations.txt', 'EPSG:4326')

In [None]:
# additional info for descriptive analysis
# assign records to graph edges
data = assignGraphEdge(data, roads, 'RescueSquadPoint', 'OriginRoadID', 'Origin2RoadDist')
data = assignGraphEdge(data, roads, 'IncidentPoint', 'DestinationID', 'Destination2RoadDist')
# # find nearest rescue station
# data = nearestRescueStation(data, rescue)
# find the top nearest rescue stations
data = nearnessObediance(data, rescue, graph)
# calculate shortest path length and ave speed
data = assumedAveSpeed(data, rescue, graph)

# Descriptive analysis

In [317]:
# basic
display(data.loc[:, ['DispatchTime', 'EnRouteTime', 'TravelTime', 'ResponseTime']].mean())
display(data['RescueSquadNumber'].value_counts())
display(data['CallPriority'].value_counts())
display(data['DayOfWeek'].value_counts())
display(data['HourInDay'].value_counts())

In [18]:
def incidentMap(data, timeSelectStart, timeSelectEnd, sizeMax, ifLog):
    # general spatial dist of incidents
    if ifLog == True:
        data['LogResponseTime'] = np.log2(data['ResponseTime'])
        colorData = "LogResponseTime"
        range_color = [0, 20]
    elif ifLog == False:
        colorData = "ResponseTime"
        range_color = [0, 45000]
    px.set_mapbox_access_token(open("mapboxToken.txt").read())
    dataSelected = data.loc[timeSelectStart: timeSelectEnd, :].dropna()
    fig = px.scatter_mapbox(lat = dataSelected.IncidentPoint.y, lon = dataSelected.IncidentPoint.x, color = dataSelected.ResponseTime,
                            color_continuous_scale = px.colors.sequential.Sunsetdark, range_color = range_color, 
                            size = dataSelected.ResponseTime,
                            size_max = sizeMax, 
                            zoom = 9.5, width = 750, height = 500)
    return fig

def responsTimeScatter(data):
    # show the surge of the response time
    fig = go.Figure(data = go.Scatter(x = data.index, y = data['ResponseTime'], mode='markers', marker_color = data['ResponseTime'],)) 
    fig.update_layout(xaxis_title = "Datatime", yaxis_title = "Response time (s)",)
    return fig

def processingTimeProportionDist(data):
    df = pd.DataFrame((data.EnRouteTime / data.ResponseTime))
    df = df.rename(columns = {0: "Proportion of Preparation Time"})
    fig = px.histogram(df, x = "Proportion of Preparation Time", 
                        nbins = 75, template = 'seaborn', histnorm = 'probability', opacity = 0.75,
                        width = 700, height = 500)
    fig.update_layout(yaxis_title = 'Probability')
    return fig

def proximityOrderDist(data):
    df = data.copy()
    df['Flooding'] = 'Normal'
    df.loc['2016-10-08 11:59:59': '2016-10-09 23:59:59', ['Flooding']] = 'Flooding'
    fig = px.histogram(df[df.NearestOrder < 10], 
                 x = "NearestOrder", 
    #              color = "Flooding", 
                 template = 'seaborn', 
                 histnorm = 'probability', 
                 barmode = "overlay",
                 opacity = 0.75, 
                 width = 700, height = 500,
                )
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Proximity Order of Origins')
    return fig

def distanceIncreaseRatioDist(data):
    fig = px.histogram(data[(data.DisobediancePathIncrease > 1)], 
                 x = "DisobediancePathIncrease", 
                 barmode = "overlay",
                 template = 'seaborn', histnorm = 'probability', 
                 opacity = 0.75, 
                 width = 700, height = 500,)
    fig.update_traces(xbins = dict(start = 1, end = 2, size = 0.1))
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Travel Distance Increase Percentage')
    return fig

def responseTimeWithCallPriorityDist(data):
    fig = px.histogram(data[data.ResponseTime < 5000], 
                 x = "ResponseTime", 
                 color = "CallPriority", 
                 barmode = "overlay",
                 template = 'seaborn', 
                 histnorm = 'probability', 
                 opacity = 0.75, 
                 width = 700, height = 500,
                 nbins = 200, 
                )
    fig.update_layout(yaxis_title = 'Probability', xaxis_title = 'Response Time')
    return fig

def averageSpeedPercentStd(data):
    # np.histogram(data.groupby(['OriginRoadID', 'DestinationID']).count().AverageSpeed.values, 10, range = (5, 100)) # keep about 25% of OD when set freqencey above 5
    dataSelect = data.loc[:, ['OriginRoadID', 'DestinationID', 'AverageSpeed']]
    groupByODCount = dataSelect.groupby(['OriginRoadID', 'DestinationID']).count() # any columns indicates count
    groupByODSpeed = dataSelect.groupby(['OriginRoadID', 'DestinationID']).mean()[groupByODCount.AverageSpeed >= 5].rename(columns = {'AverageSpeed': 'AverageSpeed_mean'})
    groupByODSpeed['AverageSpeed_std'] = dataSelect.groupby(['OriginRoadID', 'DestinationID']).std()[groupByODCount.AverageSpeed >= 5]
    groupByODSpeed['AverageSpeed_stdPercent'] = groupByODSpeed['AverageSpeed_std'] / groupByODSpeed['AverageSpeed_mean']
    df = groupByODSpeed['AverageSpeed_stdPercent'].reset_index()
    OriginNum = df.OriginRoadID
    for origin, num in zip(pd.unique(df.OriginRoadID), range(1, pd.unique(df.OriginRoadID).shape[0] + 1)):
        OriginNum = OriginNum.replace(origin, num)
    df['OriginNum'] = OriginNum
    fig = px.box(df, x = 'OriginNum', y = "AverageSpeed_stdPercent", template = 'seaborn', width = 500, height = 750, range_y = (0, 6.5), points = 'suspectedoutliers')
    fig.update_layout(yaxis_title = 'Average Speed Percentage Standard Deviation', xaxis_title = 'Rescue Squad Number')
    return fig

def showWaterOnRoads(roads, figsize = (100, 50), vmax = 6):
    fig, ax = plt.subplots(figsize = figsize)
    roadsLineWater = roads.loc[:, ['line', 'waterDepth']].set_geometry('line')
    ax = roadsLineWater.plot(ax = ax, 
                        column = 'waterDepth', 
                        zorder = 5, 
                        cmap = 'OrRd',
                        legend = True,
                        vmax = vmax,
                       )
    cx.add_basemap(ax, crs = roads.crs, source = cx.providers.CartoDB.Positron)
    ax.set_axis_off()
    
def showTravelUpRatioOnRoads(roads, figsize = (100, 50), vmax = 10):
    fig, ax = plt.subplots(figsize = figsize)
    roadsLineWater = roads.loc[:, ['line', 'travelTimeIncreaseRatio']].set_geometry('line')
    ax = roadsLineWater.plot(ax = ax, 
                        column = 'travelTimeIncreaseRatio', 
                        zorder = 5, 
                        cmap = 'OrRd',
                        legend = True,
                        vmax = vmax,
                        vmin = 1,
                       )
    cx.add_basemap(ax, crs = roads.crs, source = cx.providers.CartoDB.Positron)
    ax.set_axis_off()

def showAveTime(time, by):
    dataGroupByHour = data.loc[:, [time, by]].groupby(by).mean()
    fig = px.bar(dataGroupByHour.reset_index(), y = time, x = by, text_auto='.3s',)
    return fig

In [9]:
# processingTimeProportionDist(data)
# proximityOrderDist(data)
# distanceIncreaseRatioDist(data)
# responseTimeWithCallPriorityDist(data)
# averageSpeedPercentStd(data)

# incidentMap(data, '2016-10-08', '2016-10-09', 16, False)
# responsTimeScatter(data)

# showWaterOnRoads(roads, (50, 25), 6)
# showTravelUpRatioOnRoads(roads, (20, 12), 10)

# showAveTime('ResponseTime', 'HourInDay')
# showAveTime('ResponseTime', 'DayOfWeek')

# Disruption analysis

In [44]:
def _addPathLen2Graph(graph, rescue, weight, newAttribute_rescueSquad, newAttribute_path):
    # some roads are disconnected from all the rescue station even in normal time (as the raw data indicates)
    voronoi = nx.voronoi_cells(graph, set(rescue.OBJECTID_nearestRoad.unique()), weight = weight)
    for rescueSquad, destinations in zip(voronoi.keys(), voronoi.values()):
        if rescueSquad == 'unreachable':
            print(len(destinations), 'nodes are unreachable when building voronoi for', newAttribute_path)
            for des in destinations:
                graph.nodes[des][newAttribute_rescueSquad] = np.nan
                graph.nodes[des][newAttribute_path] = math.inf # set path len to inf if it's disconnected from rescues
#                 print('NOTE: node', des, 'is unreachable when building voronoi for', newAttribute_path)
        else:
            for des in destinations:
                shortestPath = nx.shortest_path_length(graph, source = rescueSquad, target = des, weight = weight)
                graph.nodes[des][newAttribute_path] = shortestPath
                graph.nodes[des][newAttribute_rescueSquad] = rescueSquad
                if shortestPath == 0:
                    graph.nodes[des][newAttribute_path] = 1
                if shortestPath == math.inf:
                    graph.nodes[des][newAttribute_rescueSquad] = math.inf
    return graph, voronoi

def _addDisruption(graph, roads, newAttribute = 'weightWithDisruption', threshold = 3):
    nx.set_edge_attributes(graph, nx.get_edge_attributes(graph, "weight"), newAttribute)
    disruptedRoads = roads[roads['waterDepth'] >= threshold]['OBJECTID'].to_list()
    for disruption in disruptedRoads:
        for edge in graph.edges(disruption):
            graph.edges()[edge][newAttribute] = math.inf # set edge weight to inf if it's disrupted by inundation
    return graph

def _changeValue4DisruptedRoad(roads, graph, threshold = 3):
    # the disrupted road itself is not disconnected, so assign the shortestPath of adjancent road to this road
    for disruption in roads[roads['waterDepth'] >= threshold]['OBJECTID'].to_list():
        pathLen = []
        edgeNum = []
        for edge in graph.edges(disruption):
            pathLen.append(graph.nodes()[edge[1]]['shortestPathLenWithDisruption'])
            edgeNum.append(edge[1])
        if pathLen != []: # in case there are disconnected single node
            graph.nodes()[disruption]['shortestPathLenWithDisruption'] = min(pathLen)
            if min(pathLen) != math.inf:
                graph.nodes()[disruption]['rescueAssignedWithDisruption'] = edgeNum[pathLen.index(min(pathLen))]
            else:
                graph.nodes()[disruption]['rescueAssignedWithDisruption'] = np.nan
    return graph

def runRoutingWithDisruption(graph, rescue, roads):
    graph, _ = _addPathLen2Graph(graph, rescue, 'weight', 'rescueAssigned', 'shortestPathLen')
    graphDisrupted = _addDisruption(graph, roads, threshold = 1)
    graph, _ = _addPathLen2Graph(graphDisrupted, rescue, 'weightWithDisruption', 'rescueAssignedWithDisruption', 'shortestPathLenWithDisruption') 
    graph = _changeValue4DisruptedRoad(roads, graph, threshold = 1)
    return graph

def getDisruptionRatio(graph):
    nx.set_node_attributes(graph, 
                           {x[0]: y[1]/x[1] if y[1]/x[1] != math.inf else np.nan \
                            for x, y in zip(nx.get_node_attributes(graph, "shortestPathLen").items(), 
                                            nx.get_node_attributes(graph, "shortestPathLenWithDisruption").items() ) },
                           'travelTimeIncreaseRatio')
    roads['travelTimeIncreaseRatio'] = roads['OBJECTID'].map(nx.get_node_attributes(graph, "travelTimeIncreaseRatio"))    
    return graph


# calculate ratios
graph = runRoutingWithDisruption(graph, rescue, roads)
graph = getDisruptionRatio(graph)

# Model and training

In [None]:
# preparation time prediction






In [None]:
# resample data
def makeODmatrix(dataOneHourIndex, timeType = 'ResponseTime'):
    dataOneHour = data.loc[dataOneHourIndex.index, :]
    dataOneHour = dataOneHour.loc[:, ['OriginRoadID', 'DestinationID', timeType]]
    dataOneHour = dataOneHour.groupby(by = ['OriginRoadID', 'DestinationID'], dropna = True).mean()
    
    ODmatrix_df = pd.DataFrame(index = rescue.OBJECTID_nearestRoad.values, columns = roads.OBJECTID.values)
    for indexes in dataOneHour.index:
        ODmatrix_df.loc[int(indexes[0]), int(indexes[1])] = dataOneHour.loc[indexes].values[0]
    return ODmatrix_df.to_numpy()

dataByHour = data.resample(pd.Timedelta(1, "hour"), closed = 'left', label = 'left', origin = 'end_day').apply(makeODmatrix)

In [306]:
dataByHour

CallDateTime
2016-01-01 00:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 01:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 02:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 03:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-01-01 04:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
                                             ...                        
2016-10-15 19:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 20:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 21:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 22:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
2016-10-15 23:00:00    [[nan, nan, nan, nan, nan, nan, nan, nan, nan,...
Freq: H, Length: 6936, dtype: object

In [30]:
print(torch.__version__)
print(torch.version.cuda)

1.13.1+cu117
11.7


# Experiments

In [None]:
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(30, 7.5))
t = range(len(dataTest_y_Flood))
ax.plot(t, predictions, label = 'prediction')
ax.plot(t, dataTest_y_Flood, label = 'ground truth')
ax.set_xlabel('Incident', fontsize = 20)
ax.set_ylabel('If accessible', fontsize = 20)
ax.legend(fontsize = 20)
ax.tick_params(axis='both', labelsize = 15)
#ax.set_title('Prediction vs Ground truth', fontsize = 25)

In [None]:
from sklearn.metrics import confusion_matrix , classification_report
print(classification_report(dataTest_y_Flood, predictions))

In [None]:
def calculateWaste (row):
    if row['If accessible Real'] == 1 and row['If accessible Predicted'] == 0:
        return 1
    else:
        return 0

def calculateUnknownDanger (row):
    if row['If accessible Real'] == 0 and row['If accessible Predicted'] == 1:
        return 1
    else:
        return 0    
    
dataFlood = dataProcessed.loc['2016-10-09' : '2016-10-09']
dataFlood['If accessible Real'] = dataFlood['Accessibility'].astype('int64')
dataFlood['If accessible Predicted'] = [element[0] for element in predictions]
dataFlood['Waste'] = dataFlood.apply(calculateWaste, axis = 1)
dataFlood['Unknown Danger'] = dataFlood.apply(calculateUnknownDanger, axis = 1)
dataFlood['Error Type'] = (dataFlood['Waste'] + dataFlood['Unknown Danger'] * 2).astype('string') # 1 mean wastes, 2 means potential danger
dataFlood['Error Type'] = dataFlood['Error Type'].replace('1', 'Type 1').replace('2', 'Type 2')
pd.set_option('display.max_rows', 20)
display(dataFlood)

# visualization
px.set_mapbox_access_token(open("mapboxToken.txt").read())
fig1 = px.scatter_mapbox(dataFlood.loc[lambda df: df['Error Type'] != '0'], 
                        lat="latitude", lon="longitude",  
                        color = "Error Type", #size = "Response Time",
                        size_max = 15, zoom = 10, width = 575, height = 500)
fig1.show()

fig2 = px.scatter_mapbox(dataFlood.loc[lambda df: (df['Error Type'] != '0') &
                                      (df['Error Type'] != '1')], 
                        lat="latitude", lon="longitude",  
                        color = "Error Type", size = "Response Time",
                        size_max = 30, zoom = 10, width = 550, height = 500)
fig2.update_layout(showlegend=False)
fig2.show()

In [None]:
# import
import geopandas as gpd
from shapely.geometry import Point
shp = gpd.read_file('./data/VB_City_Boundary.geojson')
shp.crs = 'CRS84'

# generate all points
numOfPointsOneDimX = 50
deltaX = shp.bounds.maxx - shp.bounds.minx
deltaY = shp.bounds.maxy - shp.bounds.miny
numOfPointsOneDimY = numOfPointsOneDimX * (deltaY / deltaX)

xCorList = np.arange(float(shp.bounds.minx), float(shp.bounds.maxx), float(deltaX / numOfPointsOneDimX))
yCorList = np.arange(float(shp.bounds.miny), float(shp.bounds.maxy), float(deltaY / numOfPointsOneDimY))
xyPointList = [Point(x, y) for x in xCorList for y in yCorList]

# select points within the city
samplePoints = gpd.GeoSeries(xyPointList)
samplePoints.crs = 'CRS84'
withinOrNot = samplePoints.within(shp['geometry'].values[0])
gdf = pd.concat([samplePoints, withinOrNot], axis = 1)
gdf.crs = 'CRS84'
gdfSelected = gdf.loc[gdf[1] == True]
display(gdfSelected)

In [None]:
# do the prediction for these points
gdfSelected['latitude'] = gdfSelected[0].values.y
gdfSelected['longitude'] = gdfSelected[0].values.x

def addTimeFeature(gdf, hourInDay, dayOfWeek):
    gdf_out = gdf.copy()
    gdf_out['Hour in Day'] = hourInDay
    gdf_out['Day of Week'] = dayOfWeek
    return gdf_out

gdfSelected_withTime = addTimeFeature(gdfSelected, 0, 6) #The day of week is 1, because it is the normalized value, flooding day is Sunday
for hour in range(23):
    gdfSelected_withTime = pd.concat([gdfSelected_withTime, addTimeFeature(gdfSelected, hour + 1, 6)])

gdfForPrediction = gdfSelected_withTime.reset_index().loc[:,['latitude', 'longitude', 'Hour in Day']]
gdfForPrediction_norm = normalizer(gdfForPrediction.copy())
gdfForPrediction_norm['Day of Week'] = 1 #The day of week is 1, because it is the normalized value, flooding day is Sunday
gdfForPrediction_norm

In [None]:
predictionsFull = model.predict(gdfForPrediction_norm.values)
predictionsFull = np.where(predictionsFull < 0.5, 0, 1).tolist()
print(len(predictionsFull))

In [None]:
dataFloodFull = gdfForPrediction.copy()
dataFloodFull['If accessible'] = [element[0] for element in predictionsFull]
dataFloodFull['If accessible'] = dataFloodFull['If accessible'].astype('string').replace('1', 'Accessible').replace('0', 'Inaccessible')
display(dataFloodFull)

# visualization
px.set_mapbox_access_token(open("mapboxToken.txt").read())
fig = px.scatter_mapbox(dataFloodFull.loc[dataFloodFull['Hour in Day'] == 23], 
                        lat = "latitude", lon = "longitude",  
                        color = "If accessible", #size = "Response Time",
                        zoom = 9.5, opacity = 0.7, width = 600, height =700)
fig.show()