Author: Joshua, Will, Ethan <br />
Summary: Makes a coverage matrix {0,1} if an ambulance station covers a region

#big patch! -> you need to adjust coverage by the regression.

In [7]:
import csv
import collections
import pandas as pd
import numpy as np
import math
import json
import matplotlib.pyplot as plt

In [24]:
REGRESSION_FLAG = True #do you want to include a regression?

In [25]:
# This is the grid object, which is used throughout all data preprocessing.
# It represents the city of Austin through a series of grids.
# It thus makes a tractable way to compute distance between grids, ect. 
class Grid():
    def __init__(self, grid_json):
        self.grid = grid_json
        self.min_lat = self.grid["latitude_min"]
        self.min_lon = self.grid["longitude_min"]
        self.max_lat = self.grid["latitude_max"]
        self.max_lon = self.grid["longitude_max"]
        self.latitude_delta = self.grid["latitude_step"]
        self.longitude_delta = self.grid["longitude_step"]
        self.nrows = math.ceil((self.max_lat - self.min_lat) / self.latitude_delta)
        self.ncols = math.ceil((self.max_lon - self.min_lon) / self.longitude_delta)
        self.times = self.grid["time_matrix"]
        self.census_tract_region_map = self.grid["census_tract_region_mapping"]
        self.region_to_tract = collections.defaultdict(list)
        for census_tract in self.census_tract_region_map:
            for region in self.census_tract_region_map[census_tract]:
                self.region_to_tract[region].append(census_tract)
    def map_point_to_region(self, latitude, longitude):
        return math.floor((latitude-self.min_lat)/self.latitude_delta) * self.ncols  + math.floor((longitude-self.min_lon)/self.longitude_delta)
    def get_representative(self, region_num):
        row_num = region_num//self.ncols
        col_num = region_num - row_num*self.ncols
        lat = self.min_lat + row_num * self.latitude_delta + 0.5*self.latitude_delta
        lon = self.min_lon + col_num * self.longitude_delta + 0.5*self.longitude_delta
        return [lon, lat]
    def get_time(self, region1, region2):
        try:
            return self.times[region1][region2]
        except IndexError:
            return -1
    def region_to_census_tract(self, region):
        try:
            return self.region_to_tract[region]
        except KeyError:
            return "0_0"

In [10]:
# Using smaller distance matrix for hopefully faster runtime of Julia code
with open("../Input_Data/grid_info_multiple.json", "r") as f:
    grid_json = json.load(f)
    
g = Grid(grid_json)


In [26]:
stations = pd.read_csv("../Input_Data/austin_data/stations.csv")

#stations = stations[stations["FACILITY_TYPE"].str.contains("Medic Station")]

numstations = stations["LATITUDE"].size # Should be 51
numregions = g.nrows * g.ncols

coverage_times = np.zeros(shape=(numstations, numregions))
coverage = np.zeros(shape=(numstations, numregions))
print(numregions)
numstations

3233


44

In [27]:
region_numbers = range(1, numregions + 1)

station_numbers = ["".join(["x", str(i)]) for i in range(1, numstations + 1)]

coverage = pd.DataFrame(data=coverage, index=station_numbers, columns=region_numbers)
coverage_times = pd.DataFrame(data=coverage_times, index=station_numbers, columns=region_numbers)

In [28]:
max_travel_time = 10 #grid time get time is in seconds. So the threshhold is 15 minutes times 60 seconds

#make 0 if travel time is greater than shortfall time
#travel time is none
#travel time = 0 and not at yourself location
print(str(REGRESSION_FLAG))
for station in range(0, numstations):
    region_of_station = g.map_point_to_region(stations["LATITUDE"][station], stations["LONGITUDE"][station])
    for region in range(0, numregions):
        travel_time = g.get_time(region_of_station, region) 
        
        #REGRESSION LINE
        if (travel_time is not None ):
            travel_time = travel_time / 60
            
        if (travel_time is not None ) and (travel_time > 0) and (REGRESSION_FLAG):
            slope = 0.23868463365149437 
            intercept = 1.261777659720721
            ax = np.log(travel_time)*slope
            travel_time = np.exp(ax + intercept)      
           
            
        # travel_times[station][region] = travel_time
        # covered if <= 600 seconds = 10 minutes
        # not covered if >= 600 seconds, not reachable (travel_time is None), 
        #    or invalid region (travel_time == 0 and region_of_station != region)
        if (travel_time is None) or ( travel_time > max_travel_time ) or (travel_time == 0 and region_of_station != region):
            coverage.at["x"+str(station+1), region+1] = 0
        else:
            coverage.at["x"+str(station+1), region+1] = 1


True


In [29]:
coverage_np = coverage.to_numpy()
sum(sum(coverage_np))

85200.0

#### 
(total count = 142252) <br>
Original -> regression = 2074.0 -> 83748 <br>
taking out -1 travel times: 85200.0 -> 83748 <br>

## Make all regions with no coverage fully covered (why?)
##### Is this related to the 0 min grid travel times? I need to filter out calls outside of the grid region.

In [30]:
for col in coverage:
    print(col, sum(coverage[col]))
    if sum(coverage[col]) == 0:
        coverage[col] += 1
        print("\t->", sum(coverage[col]))

1 0.0
	-> 44.0
2 0.0
	-> 44.0
3 0.0
	-> 44.0
4 0.0
	-> 44.0
5 0.0
	-> 44.0
6 0.0
	-> 44.0
7 0.0
	-> 44.0
8 0.0
	-> 44.0
9 0.0
	-> 44.0
10 0.0
	-> 44.0
11 0.0
	-> 44.0
12 0.0
	-> 44.0
13 0.0
	-> 44.0
14 0.0
	-> 44.0
15 0.0
	-> 44.0
16 0.0
	-> 44.0
17 0.0
	-> 44.0
18 0.0
	-> 44.0
19 0.0
	-> 44.0
20 0.0
	-> 44.0
21 0.0
	-> 44.0
22 0.0
	-> 44.0
23 0.0
	-> 44.0
24 0.0
	-> 44.0
25 0.0
	-> 44.0
26 0.0
	-> 44.0
27 0.0
	-> 44.0
28 0.0
	-> 44.0
29 0.0
	-> 44.0
30 0.0
	-> 44.0
31 0.0
	-> 44.0
32 0.0
	-> 44.0
33 44.0
34 44.0
35 44.0
36 44.0
37 44.0
38 44.0
39 44.0
40 44.0
41 0.0
	-> 44.0
42 0.0
	-> 44.0
43 0.0
	-> 44.0
44 0.0
	-> 44.0
45 0.0
	-> 44.0
46 0.0
	-> 44.0
47 0.0
	-> 44.0
48 0.0
	-> 44.0
49 0.0
	-> 44.0
50 0.0
	-> 44.0
51 0.0
	-> 44.0
52 0.0
	-> 44.0
53 0.0
	-> 44.0
54 0.0
	-> 44.0
55 0.0
	-> 44.0
56 0.0
	-> 44.0
57 0.0
	-> 44.0
58 0.0
	-> 44.0
59 0.0
	-> 44.0
60 0.0
	-> 44.0
61 0.0
	-> 44.0
62 0.0
	-> 44.0
63 0.0
	-> 44.0
64 0.0
	-> 44.0
65 0.0
	-> 44.0
66 0.0
	-> 44.0
67 0.0
	-> 44.0
6

1064 44.0
1065 44.0
1066 44.0
1067 44.0
1068 44.0
1069 44.0
1070 44.0
1071 44.0
1072 44.0
1073 44.0
1074 44.0
1075 44.0
1076 44.0
1077 44.0
1078 44.0
1079 44.0
1080 44.0
1081 44.0
1082 44.0
1083 44.0
1084 44.0
1085 44.0
1086 44.0
1087 44.0
1088 44.0
1089 44.0
1090 44.0
1091 42.0
1092 0.0
	-> 44.0
1093 0.0
	-> 44.0
1094 0.0
	-> 44.0
1095 0.0
	-> 44.0
1096 0.0
	-> 44.0
1097 0.0
	-> 44.0
1098 0.0
	-> 44.0
1099 42.0
1100 42.0
1101 42.0
1102 42.0
1103 42.0
1104 42.0
1105 43.0
1106 43.0
1107 43.0
1108 43.0
1109 44.0
1110 44.0
1111 43.0
1112 44.0
1113 44.0
1114 44.0
1115 0.0
	-> 44.0
1116 44.0
1117 44.0
1118 43.0
1119 43.0
1120 44.0
1121 44.0
1122 44.0
1123 44.0
1124 44.0
1125 44.0
1126 44.0
1127 44.0
1128 44.0
1129 44.0
1130 44.0
1131 44.0
1132 44.0
1133 44.0
1134 44.0
1135 44.0
1136 44.0
1137 44.0
1138 44.0
1139 44.0
1140 44.0
1141 44.0
1142 44.0
1143 44.0
1144 44.0
1145 44.0
1146 44.0
1147 44.0
1148 44.0
1149 44.0
1150 44.0
1151 44.0
1152 42.0
1153 0.0
	-> 44.0
1154 0.0
	-> 44.0
1155 0.0
	

2169 44.0
2170 44.0
2171 44.0
2172 44.0
2173 44.0
2174 44.0
2175 44.0
2176 44.0
2177 44.0
2178 44.0
2179 44.0
2180 44.0
2181 44.0
2182 44.0
2183 44.0
2184 44.0
2185 44.0
2186 44.0
2187 44.0
2188 43.0
2189 44.0
2190 44.0
2191 44.0
2192 44.0
2193 42.0
2194 44.0
2195 0.0
	-> 44.0
2196 0.0
	-> 44.0
2197 0.0
	-> 44.0
2198 0.0
	-> 44.0
2199 0.0
	-> 44.0
2200 0.0
	-> 44.0
2201 0.0
	-> 44.0
2202 0.0
	-> 44.0
2203 0.0
	-> 44.0
2204 42.0
2205 36.0
2206 8.0
2207 16.0
2208 17.0
2209 8.0
2210 5.0
2211 29.0
2212 44.0
2213 44.0
2214 44.0
2215 44.0
2216 44.0
2217 44.0
2218 44.0
2219 44.0
2220 44.0
2221 44.0
2222 44.0
2223 44.0
2224 44.0
2225 44.0
2226 44.0
2227 44.0
2228 44.0
2229 44.0
2230 44.0
2231 44.0
2232 44.0
2233 44.0
2234 44.0
2235 44.0
2236 44.0
2237 44.0
2238 44.0
2239 44.0
2240 44.0
2241 44.0
2242 44.0
2243 44.0
2244 44.0
2245 44.0
2246 44.0
2247 44.0
2248 44.0
2249 44.0
2250 44.0
2251 44.0
2252 44.0
2253 44.0
2254 44.0
2255 44.0
2256 0.0
	-> 44.0
2257 0.0
	-> 44.0
2258 0.0
	-> 44.0
2259 0.

2738 0.0
	-> 44.0
2739 0.0
	-> 44.0
2740 0.0
	-> 44.0
2741 0.0
	-> 44.0
2742 0.0
	-> 44.0
2743 0.0
	-> 44.0
2744 0.0
	-> 44.0
2745 0.0
	-> 44.0
2746 0.0
	-> 44.0
2747 0.0
	-> 44.0
2748 0.0
	-> 44.0
2749 0.0
	-> 44.0
2750 0.0
	-> 44.0
2751 0.0
	-> 44.0
2752 0.0
	-> 44.0
2753 0.0
	-> 44.0
2754 0.0
	-> 44.0
2755 0.0
	-> 44.0
2756 0.0
	-> 44.0
2757 0.0
	-> 44.0
2758 0.0
	-> 44.0
2759 0.0
	-> 44.0
2760 0.0
	-> 44.0
2761 41.0
2762 42.0
2763 44.0
2764 44.0
2765 44.0
2766 44.0
2767 44.0
2768 44.0
2769 44.0
2770 44.0
2771 44.0
2772 0.0
	-> 44.0
2773 0.0
	-> 44.0
2774 0.0
	-> 44.0
2775 0.0
	-> 44.0
2776 0.0
	-> 44.0
2777 0.0
	-> 44.0
2778 0.0
	-> 44.0
2779 0.0
	-> 44.0
2780 0.0
	-> 44.0
2781 0.0
	-> 44.0
2782 0.0
	-> 44.0
2783 0.0
	-> 44.0
2784 0.0
	-> 44.0
2785 0.0
	-> 44.0
2786 0.0
	-> 44.0
2787 0.0
	-> 44.0
2788 0.0
	-> 44.0
2789 0.0
	-> 44.0
2790 0.0
	-> 44.0
2791 0.0
	-> 44.0
2792 0.0
	-> 44.0
2793 0.0
	-> 44.0
2794 0.0
	-> 44.0
2795 0.0
	-> 44.0
2796 0.0
	-> 44.0
2797 0.0
	-> 44.0
2798 0.0

In [31]:
coverage.to_csv("../Output_Data/austin_data_3200/coverage_regression.csv")
coverage.transpose()
print(numregions)
coverage.shape
# travel_times.to_csv("coverage_times.csv")

3233


(44, 3233)

In [32]:
#sanity check
#this is checking that a station covers at least one region. Which is somewhat obvious
for index, row in coverage.iterrows():
    if sum(row) == 0:
        print("NO COVERAGE: (region#: ", index, "), ", "(census tract: ", g.region_to_census_tract(index), "), ", g.get_representative(index))
    

In [17]:
#coverage.to_csv("Austin_data/coverage_real.csv", header=None, index=False)