Author: Joshua, Will, Ethan <br />
Summary: Collect calls and group them into calls by hour. Then it takes 


In [48]:
import csv
import collections
import pandas as pd
import numpy as np
import math
import json
from datetime import datetime, timedelta


In [49]:
# Straight up grid
class Grid():
    def __init__(self, grid_json):
        self.grid = grid_json
        self.min_lat = self.grid["latitude_min"]
        self.min_lon = self.grid["longitude_min"]
        self.max_lat = self.grid["latitude_max"]
        self.max_lon = self.grid["longitude_max"]
        self.latitude_delta = self.grid["latitude_step"]
        self.longitude_delta = self.grid["longitude_step"]
        self.nrows = math.ceil((self.max_lat - self.min_lat) / self.latitude_delta)
        self.ncols = math.ceil((self.max_lon - self.min_lon) / self.longitude_delta)
        self.times = self.grid["time_matrix"]
        self.census_tract_region_map = self.grid["census_tract_region_mapping"]
        self.region_to_tract = collections.defaultdict(list)
        for census_tract in self.census_tract_region_map:
            for region in self.census_tract_region_map[census_tract]:
                self.region_to_tract[region].append(census_tract)
    def map_point_to_region(self, latitude, longitude):
        return math.floor((latitude-self.min_lat)/self.latitude_delta) * self.ncols  + math.floor((longitude-self.min_lon)/self.longitude_delta)
    def get_representative(self, region_num):
        row_num = region_num//self.ncols
        col_num = region_num - row_num*self.ncols
        lat = self.min_lat + row_num * self.latitude_delta + 0.5*self.latitude_delta
        lon = self.min_lon + col_num * self.longitude_delta + 0.5*self.longitude_delta
        return [lon, lat]
    def get_time(self, region1, region2):
        try:
            return self.times[region1][region2]
        except IndexError:
            return -1
    def region_to_census_tract(self, region):
        try:
            return self.region_to_tract[region]
        except KeyError:
            return "0_0"

In [50]:
# Using smaller distance matrix for hopefully faster runtime in Julia code
with open("grid_info_smaller.json", "r") as f:
    grid_json = json.load(f)
g = Grid(grid_json)

In [51]:
g = Grid(grid_json)

In [52]:
data = pd.read_csv("cleaned_data.csv")

In [53]:
data

Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,Time_Available,Time_Call_Cleared,Call_Disposition,Longitude_Of_Emergency,Latitude_Of_Emergency,transport_time,grid_time
0,1,23397873,M09,-97.972866,30.333385,2019-01-01 19:43:36.000,2019-01-01 19:45:27.030,2019-01-01 19:48:39.770,2019-01-01 20:19:00.870,2019-01-01 20:24:43.093,2019-01-01 20:51:12.877,2019-01-01 20:51:12.877,Baylor Scott & White - Lakeway,-97.961,30.325,192,222.72
1,3,23408585,DM03,-97.751235,30.246114,2019-01-03 14:20:41.000,2019-01-03 14:20:54.110,2019-01-03 14:27:27.613,,,2019-01-03 14:35:14.657,2019-01-03 14:35:14.657,False Alarm Call,-97.777,30.251,393,343.52
2,4,23408585,DMO02,-97.751966,30.245411,2019-01-03 14:20:58.117,2019-01-03 14:20:58.140,2019-01-03 14:26:59.083,,,2019-01-03 14:34:24.343,2019-01-03 14:34:24.343,Dual w/ other ATCEMS Unit,-97.777,30.251,360,343.52
3,8,23439455,M10,-97.742590,30.418169,2019-01-07 17:28:48.000,2019-01-07 17:30:12.067,2019-01-07 17:47:53.030,,,2019-01-07 17:56:40.097,2019-01-07 17:56:40.097,False Alarm Call,-97.747,30.407,1060,412.58
4,10,23444785,M04,-97.725892,30.290517,2019-01-08 13:55:38.000,2019-01-08 13:55:55.987,2019-01-08 14:04:26.413,2019-01-08 14:20:11.810,2019-01-08 14:41:12.577,2019-01-08 15:09:21.000,2019-01-08 15:09:21.000,Saint Davids Med Ctr,-97.626,30.294,510,997.13
5,14,23461774,M20,-97.823258,30.139617,2019-01-11 01:06:11.000,2019-01-11 01:07:45.857,2019-01-11 01:15:10.577,,,2019-01-11 01:45:02.457,2019-01-11 01:45:02.457,Refusal,-97.788,30.141,444,450.83
6,15,23474281,M31,-97.898073,30.183457,2019-01-12 23:22:09.000,2019-01-12 23:23:06.587,2019-01-12 23:32:01.403,,,2019-01-12 23:39:05.350,2019-01-12 23:39:05.350,False Alarm Call,-97.859,30.222,534,566.48
7,16,23482328,M15,-97.797480,30.177219,2019-01-14 11:43:03.000,2019-01-14 11:43:07.000,2019-01-14 11:55:32.857,,,2019-01-14 11:56:34.443,2019-01-14 11:56:34.443,False Alarm Call,-97.837,30.187,745,446.88
8,18,23486206,M20,-97.823259,30.139661,2019-01-14 23:42:54.000,2019-01-14 23:44:39.300,2019-01-14 23:53:18.880,,,2019-01-15 00:02:34.830,2019-01-15 00:02:34.830,No Patient,-97.838,30.176,519,657.70
9,22,23509088,M06,-97.739610,30.266114,2019-01-18 11:41:00.000,2019-01-18 11:41:40.780,2019-01-18 11:52:10.143,,,2019-01-18 11:56:10.190,2019-01-18 11:56:10.190,False Alarm Call,-97.779,30.253,629,657.16


In [54]:
data["Time_Arrived"] = data.apply(lambda x: datetime.strptime(x["Time_ArrivedAtScene"], "%Y-%m-%d %H:%M:%S.%f"), axis=1)

In [55]:
data.sort_values(by=['Time_Arrived'], inplace=True, ascending=True)

In [56]:
data["timedelta"] = data["Time_Arrived"] - data["Time_Arrived"].values[0]

In [57]:
data["arrival_seconds"] = data.apply(lambda x: x["timedelta"].days*86400 + x["timedelta"].seconds, axis=1)

In [58]:
data["interarrival_seconds"] =  np.hstack((np.array([0]), data["arrival_seconds"].values[1:] - data["arrival_seconds"].values[:-1]))

In [59]:
data["neighborhood"] = data.apply(lambda x: g.map_point_to_region(x["Latitude_Of_Emergency"], x["Longitude_Of_Emergency"]), axis=1)

In [60]:
weekdays = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
data["dow"] = data.apply(lambda x: weekdays[datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").weekday()], axis=1)

## Adding travel time from call to each station

In [61]:
stations = pd.read_csv("Austin_Data/stations.csv")

In [62]:
station_regions = [g.map_point_to_region(lat, long) for lat, long in zip(stations['LATITUDE'], stations['LONGITUDE'])]

In [63]:
for i, region in enumerate(station_regions):
    data["stn{}_min".format(i+1)] = data.apply(lambda x: g.get_time(x["neighborhood"], region), axis=1)

In [64]:
data

Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,...,stn35_min,stn36_min,stn37_min,stn38_min,stn39_min,stn40_min,stn41_min,stn42_min,stn43_min,stn44_min
51208,66520,23391588,M02,-97.774677,30.227350,2019-01-01 00:07:26.000,2019-01-01 00:07:37.947,2019-01-01 00:10:01.923,,,...,818.84,1568.36,970.32,1313.30,1415.33,999.42,1996.32,1472.08,887.81,1295.48
43271,56533,23391562,M33,-97.708630,30.299894,2019-01-01 00:05:58.000,2019-01-01 00:06:18.037,2019-01-01 00:11:37.950,,,...,1265.26,1040.96,1434.89,2027.50,2129.53,300.00,1545.87,837.98,1352.38,1241.57
165765,216073,23391302,DMO04,-97.697770,30.337490,2019-01-01 00:12:27.563,2019-01-01 00:12:27.630,2019-01-01 00:15:34.313,,,...,1265.26,1040.96,1434.89,2027.50,2129.53,300.00,1545.87,837.98,1352.38,1241.57
141575,185388,23391695,M01,-97.767595,30.229402,2019-01-01 00:14:06.000,2019-01-01 00:15:33.120,2019-01-01 00:19:32.527,,,...,1068.27,1343.99,1328.09,1419.83,1602.57,775.05,1661.35,1317.83,1245.58,1641.61
6459,9646,23391664,M03,-97.740107,30.268286,2019-01-01 00:15:50.000,2019-01-01 00:15:57.387,2019-01-01 00:20:02.367,,,...,1118.66,1244.19,875.45,1880.90,1982.93,603.90,1749.10,1218.02,792.94,1152.54
137918,180690,23391655,M08,-97.735108,30.275810,2019-01-01 00:13:30.000,2019-01-01 00:14:54.727,2019-01-01 00:22:25.390,2019-01-01 00:50:15.500,2019-01-01 01:03:01.393,...,1367.14,749.07,1468.89,2014.77,2197.51,641.56,1223.15,1029.21,1386.38,1430.00
29474,38959,23391680,M17,-97.772190,30.277188,2019-01-01 00:16:07.000,2019-01-01 00:17:20.400,2019-01-01 00:24:58.907,2019-01-01 00:32:42.447,2019-01-01 00:48:43.000,...,1068.27,1343.99,1328.09,1419.83,1602.57,775.05,1661.35,1317.83,1245.58,1641.61
124367,163757,23391795,DMO04,-97.689198,30.319857,2019-01-01 00:22:44.420,2019-01-01 00:22:44.480,2019-01-01 00:26:29.027,,,...,1265.26,1040.96,1434.89,2027.50,2129.53,300.00,1545.87,837.98,1352.38,1241.57
126787,166973,23391824,M33,-97.734738,30.275694,2019-01-01 00:24:38.000,2019-01-01 00:24:48.467,2019-01-01 00:26:30.070,2019-01-01 00:45:27.167,2019-01-01 00:52:34.910,...,1068.27,1343.99,1328.09,1419.83,1602.57,775.05,1661.35,1317.83,1245.58,1641.61
124366,163756,23391795,M05,-97.725892,30.290517,2019-01-01 00:22:16.000,2019-01-01 00:22:22.470,2019-01-01 00:26:32.947,2019-01-01 00:59:06.573,2019-01-01 01:09:02.017,...,1265.26,1040.96,1434.89,2027.50,2129.53,300.00,1545.87,837.98,1352.38,1241.57


## Adding travel time from call to each hospital

In [65]:
hospitals = pd.read_csv("Austin_Data/hospitals.csv")

In [66]:
hospital_regions = [g.map_point_to_region(lat, long) for lat, long in zip(hospitals['LATITUDE'], hospitals['LONGITUDE'])]

In [67]:
for i, region in enumerate(hospital_regions):
    data["hosp{}_min".format(i+1)] = data.apply(lambda x: g.get_time(x["neighborhood"], region), axis=1)

In [68]:
data.columns

Index(['Unnamed: 0', 'IncidentForeignKey', 'Radio_Name',
       'Longitude_At_Assign_Time', 'Latitude_At_Assign_Time', 'Time_Assigned',
       'Time_Enroute', 'Time_ArrivedAtScene', 'Time_Depart_Scene',
       'Time_Arrive_Destination', 'Time_Available', 'Time_Call_Cleared',
       'Call_Disposition', 'Longitude_Of_Emergency', 'Latitude_Of_Emergency',
       'transport_time', 'grid_time', 'Time_Arrived', 'timedelta',
       'arrival_seconds', 'interarrival_seconds', 'neighborhood', 'dow',
       'stn1_min', 'stn2_min', 'stn3_min', 'stn4_min', 'stn5_min', 'stn6_min',
       'stn7_min', 'stn8_min', 'stn9_min', 'stn10_min', 'stn11_min',
       'stn12_min', 'stn13_min', 'stn14_min', 'stn15_min', 'stn16_min',
       'stn17_min', 'stn18_min', 'stn19_min', 'stn20_min', 'stn21_min',
       'stn22_min', 'stn23_min', 'stn24_min', 'stn25_min', 'stn26_min',
       'stn27_min', 'stn28_min', 'stn29_min', 'stn30_min', 'stn31_min',
       'stn32_min', 'stn33_min', 'stn34_min', 'stn35_min', 'stn36_min'

In [69]:
data["hour"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").hour, axis=1)
data["month"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").month, axis=1)
data["year"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").year, axis=1)
data["day"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").day, axis=1)
data["minute"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").minute, axis=1)
data["second"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").second, axis=1)


In [70]:
data.columns

Index(['Unnamed: 0', 'IncidentForeignKey', 'Radio_Name',
       'Longitude_At_Assign_Time', 'Latitude_At_Assign_Time', 'Time_Assigned',
       'Time_Enroute', 'Time_ArrivedAtScene', 'Time_Depart_Scene',
       'Time_Arrive_Destination', 'Time_Available', 'Time_Call_Cleared',
       'Call_Disposition', 'Longitude_Of_Emergency', 'Latitude_Of_Emergency',
       'transport_time', 'grid_time', 'Time_Arrived', 'timedelta',
       'arrival_seconds', 'interarrival_seconds', 'neighborhood', 'dow',
       'stn1_min', 'stn2_min', 'stn3_min', 'stn4_min', 'stn5_min', 'stn6_min',
       'stn7_min', 'stn8_min', 'stn9_min', 'stn10_min', 'stn11_min',
       'stn12_min', 'stn13_min', 'stn14_min', 'stn15_min', 'stn16_min',
       'stn17_min', 'stn18_min', 'stn19_min', 'stn20_min', 'stn21_min',
       'stn22_min', 'stn23_min', 'stn24_min', 'stn25_min', 'stn26_min',
       'stn27_min', 'stn28_min', 'stn29_min', 'stn30_min', 'stn31_min',
       'stn32_min', 'stn33_min', 'stn34_min', 'stn35_min', 'stn36_min'

In [71]:
data_without_unneeded_cols = data.drop(columns=['IncidentForeignKey', 
                              'Radio_Name',
                              'Longitude_At_Assign_Time',
                              'Latitude_At_Assign_Time',
                              'Time_Assigned',
                              'Time_Enroute',
                              'Time_ArrivedAtScene',
                              'Time_Depart_Scene',
                              'Time_Arrive_Destination',
                              'Time_Available',
                              'Time_Call_Cleared',
                              'Call_Disposition',
                              'Longitude_Of_Emergency',
                              'Latitude_Of_Emergency',
                              'transport_time',
                              'grid_time',
                              'Time_Arrived',
                              'timedelta',
                              'Unnamed: 0'])

data_without_unneeded_cols.to_csv("calls_w_StnHospTimes.csv")

## Remove Weekends from Calls

In [72]:
data = data[data.dow != "Sat"]
data = data[data.dow != "Sun"]

## Add hour/month/year fields

In [73]:
data["hour"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").hour, axis=1)
data["month"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").month, axis=1)
data["year"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").year, axis=1)
data["day"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").day, axis=1)
data


Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,...,hosp9_min,hosp10_min,hosp11_min,hosp12_min,hour,month,year,day,minute,second
51208,66520,23391588,M02,-97.774677,30.227350,2019-01-01 00:07:26.000,2019-01-01 00:07:37.947,2019-01-01 00:10:01.923,,,...,783.95,999.42,1527.79,783.95,0,1,2019,1,7,26
43271,56533,23391562,M33,-97.708630,30.299894,2019-01-01 00:05:58.000,2019-01-01 00:06:18.037,2019-01-01 00:11:37.950,,,...,807.03,300.00,1897.91,807.03,0,1,2019,1,5,58
165765,216073,23391302,DMO04,-97.697770,30.337490,2019-01-01 00:12:27.563,2019-01-01 00:12:27.630,2019-01-01 00:15:34.313,,,...,807.03,300.00,1897.91,807.03,0,1,2019,1,12,27
141575,185388,23391695,M01,-97.767595,30.229402,2019-01-01 00:14:06.000,2019-01-01 00:15:33.120,2019-01-01 00:19:32.527,,,...,438.15,775.05,1258.36,438.15,0,1,2019,1,14,6
6459,9646,23391664,M03,-97.740107,30.268286,2019-01-01 00:15:50.000,2019-01-01 00:15:57.387,2019-01-01 00:20:02.367,,,...,821.15,603.90,1710.09,821.15,0,1,2019,1,15,50
137918,180690,23391655,M08,-97.735108,30.275810,2019-01-01 00:13:30.000,2019-01-01 00:14:54.727,2019-01-01 00:22:25.390,2019-01-01 00:50:15.500,2019-01-01 01:03:01.393,...,793.20,641.56,1729.53,793.20,0,1,2019,1,13,30
29474,38959,23391680,M17,-97.772190,30.277188,2019-01-01 00:16:07.000,2019-01-01 00:17:20.400,2019-01-01 00:24:58.907,2019-01-01 00:32:42.447,2019-01-01 00:48:43.000,...,438.15,775.05,1258.36,438.15,0,1,2019,1,16,7
124367,163757,23391795,DMO04,-97.689198,30.319857,2019-01-01 00:22:44.420,2019-01-01 00:22:44.480,2019-01-01 00:26:29.027,,,...,807.03,300.00,1897.91,807.03,0,1,2019,1,22,44
126787,166973,23391824,M33,-97.734738,30.275694,2019-01-01 00:24:38.000,2019-01-01 00:24:48.467,2019-01-01 00:26:30.070,2019-01-01 00:45:27.167,2019-01-01 00:52:34.910,...,438.15,775.05,1258.36,438.15,0,1,2019,1,24,38
124366,163756,23391795,M05,-97.725892,30.290517,2019-01-01 00:22:16.000,2019-01-01 00:22:22.470,2019-01-01 00:26:32.947,2019-01-01 00:59:06.573,2019-01-01 01:09:02.017,...,807.03,300.00,1897.91,807.03,0,1,2019,1,22,16


## Split 80/20

In [74]:
# Split 80/20

first_80_percent = (data.head(math.floor(len(data) * 4 / 5))).copy(deep=True)
last_20_percent = (data.tail(math.ceil(len(data) / 5))).copy(deep=True)

len(first_80_percent) + len(last_20_percent)

150979

## Test call stuff
### remove unneccesary test call columns

In [75]:
last_20_percent.drop(columns=['IncidentForeignKey', 
                              'Radio_Name',
                              'Longitude_At_Assign_Time',
                              'Latitude_At_Assign_Time',
                              'Time_Assigned',
                              'Time_Enroute',
                              'Time_ArrivedAtScene',
                              'Time_Depart_Scene',
                              'Time_Arrive_Destination',
                              'Time_Available',
                              'Time_Call_Cleared',
                              'Call_Disposition',
                              'Longitude_Of_Emergency',
                              'Latitude_Of_Emergency',
                              'transport_time',
                              'grid_time',
                              'Time_Arrived',
                              'timedelta',
                              'arrival_seconds',
                              'day',
                              'Unnamed: 0'])

Unnamed: 0,interarrival_seconds,neighborhood,dow,stn1_min,stn2_min,stn3_min,stn4_min,stn5_min,stn6_min,stn7_min,...,hosp8_min,hosp9_min,hosp10_min,hosp11_min,hosp12_min,hour,month,year,minute,second
93941,38,83,Wed,300.00,1619.74,647.46,790.88,647.46,603.90,2503.11,...,647.46,821.15,603.90,1710.09,821.15,20,4,2020,25,53
96996,537,38,Wed,1012.38,1580.48,1369.76,1379.66,1369.76,1568.98,2946.06,...,1369.76,1554.31,1568.98,2298.14,1554.31,20,4,2020,25,50
140031,98,129,Wed,1291.87,2280.92,1335.77,1106.28,1335.77,1046.35,2601.02,...,1335.77,1321.60,1046.35,2353.39,1321.60,20,4,2020,35,32
122890,18,107,Wed,2655.15,2258.48,2196.05,3050.22,2196.05,2764.35,1082.07,...,2196.05,2172.70,2764.35,2496.61,2172.70,20,4,2020,37,22
181726,475,113,Wed,879.18,1770.93,923.08,805.69,923.08,641.56,1977.15,...,923.08,793.20,641.56,1729.53,793.20,20,4,2020,42,13
114179,21,51,Wed,1263.60,832.90,1133.15,1886.70,1133.15,1372.80,2560.18,...,1133.15,1254.78,1372.80,2055.05,1254.78,20,4,2020,43,18
122891,263,76,Wed,2613.10,2044.65,2188.84,3133.33,2188.84,2722.30,1979.69,...,2188.84,2165.49,2722.30,2991.13,2165.49,20,4,2020,41,49
55848,128,67,Wed,890.21,1069.46,576.63,1384.38,576.63,999.42,2175.70,...,576.63,783.95,999.42,1527.79,783.95,20,4,2020,55,38
36182,62,52,Wed,1074.69,1369.15,1051.54,1670.57,1051.54,1183.89,2627.84,...,1051.54,1236.08,1183.89,1979.92,1236.08,20,4,2020,49,45
55849,198,67,Wed,890.21,1069.46,576.63,1384.38,576.63,999.42,2175.70,...,576.63,783.95,999.42,1527.79,783.95,20,4,2020,55,38


### Save last 20 percent as our austin_test_calls.csv

In [76]:
test = pd.DataFrame()

In [77]:
test["interarrival_seconds"] = last_20_percent["interarrival_seconds"]
test["neighborhood"] = last_20_percent["neighborhood"]
test["dow"] = last_20_percent["dow"]

for i in range(1, 45):
    test["stn{}_min".format(i)] = last_20_percent["stn{}_min".format(i)]
    
for i in range(1, 13):
    test["hosp{}_min".format(i)] = last_20_percent["hosp{}_min".format(i)]

In [78]:
test

Unnamed: 0,interarrival_seconds,neighborhood,dow,stn1_min,stn2_min,stn3_min,stn4_min,stn5_min,stn6_min,stn7_min,...,hosp3_min,hosp4_min,hosp5_min,hosp6_min,hosp7_min,hosp8_min,hosp9_min,hosp10_min,hosp11_min,hosp12_min
93941,38,83,Wed,300.00,1619.74,647.46,790.88,647.46,603.90,2503.11,...,1152.23,1196.41,2361.74,1371.38,603.90,647.46,821.15,603.90,1710.09,821.15
96996,537,38,Wed,1012.38,1580.48,1369.76,1379.66,1369.76,1568.98,2946.06,...,1826.82,1399.17,2564.49,1574.13,1568.98,1369.76,1554.31,1568.98,2298.14,1554.31
140031,98,129,Wed,1291.87,2280.92,1335.77,1106.28,1335.77,1046.35,2601.02,...,767.74,1857.59,2935.34,2032.55,1046.35,1335.77,1321.60,1046.35,2353.39,1321.60
122890,18,107,Wed,2655.15,2258.48,2196.05,3050.22,2196.05,2764.35,1082.07,...,2716.38,2120.80,610.13,1684.74,2764.35,2196.05,2172.70,2764.35,2496.61,2172.70
181726,475,113,Wed,879.18,1770.93,923.08,805.69,923.08,641.56,1977.15,...,586.79,1444.90,2311.47,1585.96,641.56,923.08,793.20,641.56,1729.53,793.20
114179,21,51,Wed,1263.60,832.90,1133.15,1886.70,1133.15,1372.80,2560.18,...,1849.79,869.99,2178.61,985.50,1372.80,1133.15,1254.78,1372.80,2055.05,1254.78
122891,263,76,Wed,2613.10,2044.65,2188.84,3133.33,2188.84,2722.30,1979.69,...,2844.56,2078.75,1598.12,1548.90,2722.30,2188.84,2165.49,2722.30,2991.13,2165.49
55848,128,67,Wed,890.21,1069.46,576.63,1384.38,576.63,999.42,2175.70,...,1463.03,628.81,1794.13,803.78,999.42,576.63,783.95,999.42,1527.79,783.95
36182,62,52,Wed,1074.69,1369.15,1051.54,1670.57,1051.54,1183.89,2627.84,...,1660.88,1051.58,2246.27,1241.25,1183.89,1051.54,1236.08,1183.89,1979.92,1236.08
55849,198,67,Wed,890.21,1069.46,576.63,1384.38,576.63,999.42,2175.70,...,1463.03,628.81,1794.13,803.78,999.42,576.63,783.95,999.42,1527.79,783.95


In [79]:
test.to_csv("Austin_Data/austin_test_calls.csv", index=False)

## TODO: Remake weekday calls CSV

## Sudeep's Code from WeekdayCalls

## Now let's try to see how our grid information compares to this

In [80]:
data = first_80_percent.copy(deep=True)

In [81]:
data["S"] = 1

In [82]:
d = data.groupby(["year", "month", "day", "hour", "neighborhood"])["S"].sum()

In [83]:
d.keys()

MultiIndex(levels=[[2019, 2020], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [-8528, -234, -200, -10, -6, 7, 8, 9, 21, 22, 23, 24, 25, 28, 30, 35, 36, 37, 38, 39, 40, 41, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, 174, 178, 182, 183, 184, 188, 189, 219, 239, 262]],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [92]:
p = pd.DataFrame(columns=["year", "month", "day", "hour"]+[i+1 for i in range(g.nrows*g.ncols + 1)])

In [93]:
p

Unnamed: 0,year,month,day,hour,1,2,3,4,5,6,...,202,203,204,205,206,207,208,209,210,211


In [86]:
year = d.keys()[0][0]
month = d.keys()[0][1]
day = d.keys()[0][2]
hour = d.keys()[0][3]
curr = datetime(year=year, month=month, day=day, hour=hour)
t = timedelta(hours=1)
years = []
months = []
days = []
hours = []
while not (curr.year == d.keys()[-1][0] and curr.month == d.keys()[-1][1] and curr.day == d.keys()[-1][2]):
    if curr.weekday() < 5:
        years.append(curr.year)
        months.append(curr.month)
        days.append(curr.day)
        hours.append(curr.hour)
    curr += t
    
    

In [94]:
p["year"] = years
p["month"] = months
p["day"] = days
p["hour"] = hours

In [95]:
for i in range(1, g.nrows*g.ncols+1):
    p[i] = 0

In [96]:
#for i in range(3201, ):
    #p[i] = 0

In [97]:
#p.to_csv("intermediate.csv")

In [98]:
p

Unnamed: 0,year,month,day,hour,1,2,3,4,5,6,...,202,203,204,205,206,207,208,209,210,211
0,2019,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
1,2019,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
2,2019,1,1,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
3,2019,1,1,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
4,2019,1,1,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
5,2019,1,1,5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
6,2019,1,1,6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
7,2019,1,1,7,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
8,2019,1,1,8,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
9,2019,1,1,9,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,


In [99]:
for k in d.keys():
    p.loc[(p["year"] == k[0]) & (p["month"] == k[1]) & (p["day"] == k[2]) & (p["hour"] == k[3]), k[4]] = d[k]

In [100]:
for col in p.columns[228:]:
    p = p.drop(col, axis=1)

In [101]:
p

Unnamed: 0,year,month,day,hour,1,2,3,4,5,6,...,210,211,-8528,219,239,262,-200,-6,-234,-10
0,2019,1,1,0,0,0,0,0,0,0,...,0,,,,,,,,,
1,2019,1,1,1,0,0,0,0,0,0,...,0,,,,,,,,,
2,2019,1,1,2,0,0,0,0,0,0,...,0,,,,,,,,,
3,2019,1,1,3,0,0,0,0,0,0,...,0,,,,,,,,,
4,2019,1,1,4,0,0,0,0,0,0,...,0,,,,,,,,,
5,2019,1,1,5,0,0,0,0,0,0,...,0,,,,,,,,,
6,2019,1,1,6,0,0,0,0,0,0,...,0,,,,,,,,,
7,2019,1,1,7,0,0,0,0,0,0,...,0,,,,,,,,,
8,2019,1,1,8,0,0,0,0,0,0,...,0,,,,,,,,,
9,2019,1,1,9,0,0,0,0,0,0,...,0,,,,,,,,,


In [102]:
p.to_csv("Austin_Data/WeekdayCalls.csv", index=False)