#### Summary: austin_test_calls_update1 - remove any negative indices
v2 - remove regions greater than max region count (2) remove regions that are not covered (150?) remove regions that are outside of grid (0)

In [72]:
#test_calls = CSV.File("../test/austin-data/austin_test_calls_update1.csv")|> DataFrame 

In [33]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import collections
import pandas as pd
import math
import json

In [34]:
# This is the grid object, which is used throughout all data preprocessing.
# It represents the city of Austin through a series of grids.
# It thus makes a tractable way to compute distance between grids, ect. 
class Grid():
    def __init__(self, grid_json):
        self.grid = grid_json
        self.min_lat = self.grid["latitude_min"]
        self.min_lon = self.grid["longitude_min"]
        self.max_lat = self.grid["latitude_max"]
        self.max_lon = self.grid["longitude_max"]
        self.latitude_delta = self.grid["latitude_step"]
        self.longitude_delta = self.grid["longitude_step"]
        self.nrows = math.ceil((self.max_lat - self.min_lat) / self.latitude_delta)
        self.ncols = math.ceil((self.max_lon - self.min_lon) / self.longitude_delta)
        self.times = self.grid["time_matrix"]
        self.census_tract_region_map = self.grid["census_tract_region_mapping"]
        self.region_to_tract = collections.defaultdict(list)
        for census_tract in self.census_tract_region_map:
            for region in self.census_tract_region_map[census_tract]:
                self.region_to_tract[region].append(census_tract)
    def map_point_to_region(self, latitude, longitude):
        return math.floor((latitude-self.min_lat)/self.latitude_delta) * self.ncols  + math.floor((longitude-self.min_lon)/self.longitude_delta)
    def get_representative(self, region_num):
        row_num = region_num//self.ncols
        col_num = region_num - row_num*self.ncols
        lat = self.min_lat + row_num * self.latitude_delta + 0.5*self.latitude_delta
        lon = self.min_lon + col_num * self.longitude_delta + 0.5*self.longitude_delta
        return [lon, lat]
    def get_time(self, region1, region2):
        try:
            return self.times[region1][region2]
        except IndexError:
            return -1
    def region_to_census_tract(self, region):
        try:
            return self.region_to_tract[region]
        except KeyError:
            return "0_0"

In [35]:
with open("..\Input_Data/grid_info_3200_v3.json", "r") as f:
    grid_json = json.load(f)
g = Grid(grid_json)

In [36]:
data = (np.genfromtxt("../Output_Data/austin_data_3200/austin_test_calls_v3.csv", delimiter=","))
data_s = (np.genfromtxt("../Output_Data/austin_data_3200/austin_test_calls_v3.csv", delimiter=",", dtype = str))
coverage = (np.genfromtxt("../Output_Data/austin_data_3200/coverage_regression.csv", delimiter=","))

In [37]:
print(data_s[0])
data_s[0,1]

['interarrival_seconds' 'neighborhood' 'dow' 'Longitude_Of_Emergency'
 'Latitude_Of_Emergency' 'stn1_min' 'stn2_min' 'stn3_min' 'stn4_min'
 'stn5_min' 'stn6_min' 'stn7_min' 'stn8_min' 'stn9_min' 'stn10_min'
 'stn11_min' 'stn12_min' 'stn13_min' 'stn14_min' 'stn15_min' 'stn16_min'
 'stn17_min' 'stn18_min' 'stn19_min' 'stn20_min' 'stn21_min' 'stn22_min'
 'stn23_min' 'stn24_min' 'stn25_min' 'stn26_min' 'stn27_min' 'stn28_min'
 'stn29_min' 'stn30_min' 'stn31_min' 'stn32_min' 'stn33_min' 'stn34_min'
 'stn35_min' 'stn36_min' 'stn37_min' 'stn38_min' 'stn39_min' 'stn40_min'
 'stn41_min' 'stn42_min' 'stn43_min' 'stn44_min' 'hosp1_min' 'hosp2_min'
 'hosp3_min' 'hosp4_min' 'hosp5_min' 'hosp6_min' 'hosp7_min' 'hosp8_min'
 'hosp9_min' 'hosp10_min' 'hosp11_min' 'hosp12_min']


'neighborhood'

In [38]:
data.shape

(30105, 61)

In [39]:
maxmin_skipped = 0
covered_skipped = 0
data_update = []
data_update.append(data_s[0,:])
for i in range(1, data.shape[0]):
    #call should be withhin grid
    lon = data[0,-1]
    lat = data[0,-1]
    if (lon > g.max_lon) or (lon < g.min_lon):
        print("invalid grid")
    if (lat > g.max_lat) or (lat < g.min_lat):
        print("invalid grid")
    #coverage
    if(data[i,1] > 3200):
        #print(data[i,1])
        maxmin_skipped = maxmin_skipped + 1
        continue
    #no negative nbhd indices
    if(data[i,1] < 0):
        maxmin_skipped = maxmin_skipped + 1
        continue
    covered = np.sum(coverage[1:,int(data[i,1])])
    if(covered == 0):
        #print(data[i,1])
        covered_skipped = covered_skipped + 1
        continue
    data_update.append(data_s[i,:])
    
print(maxmin_skipped)    

0


In [40]:
len(data_update)

30105

In [41]:
data_update = np.array(data_update)
data_update[0]

array(['interarrival_seconds', 'neighborhood', 'dow',
       'Longitude_Of_Emergency', 'Latitude_Of_Emergency', 'stn1_min',
       'stn2_min', 'stn3_min', 'stn4_min', 'stn5_min', 'stn6_min',
       'stn7_min', 'stn8_min', 'stn9_min', 'stn10_min', 'stn11_min',
       'stn12_min', 'stn13_min', 'stn14_min', 'stn15_min', 'stn16_min',
       'stn17_min', 'stn18_min', 'stn19_min', 'stn20_min', 'stn21_min',
       'stn22_min', 'stn23_min', 'stn24_min', 'stn25_min', 'stn26_min',
       'stn27_min', 'stn28_min', 'stn29_min', 'stn30_min', 'stn31_min',
       'stn32_min', 'stn33_min', 'stn34_min', 'stn35_min', 'stn36_min',
       'stn37_min', 'stn38_min', 'stn39_min', 'stn40_min', 'stn41_min',
       'stn42_min', 'stn43_min', 'stn44_min', 'hosp1_min', 'hosp2_min',
       'hosp3_min', 'hosp4_min', 'hosp5_min', 'hosp6_min', 'hosp7_min',
       'hosp8_min', 'hosp9_min', 'hosp10_min', 'hosp11_min', 'hosp12_min'],
      dtype='<U22')

In [42]:
data_update.shape

(30105, 61)

In [32]:
np.savetxt('../Output_Data/austin_data_3200/austin_test_calls_v3.csv', data_update, delimiter=',', fmt='%s') 

In [18]:
coverage[:,461]

array([461.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.])

In [43]:
#### New issue what if the times are in seconds for some reason?