Author: Joshua, Will, Ethan <br />
Summary: Collect calls and group them into calls by hour. Generates 
hospitals.csv <br>
stations.csv <br>
austin_test_calls.csv <br>
WeekdayCalls.csv <br>


In [12]:
import csv
import collections
import pandas as pd
import numpy as np
import math
import json
from datetime import datetime, timedelta


In [13]:
# Straight up grid
class Grid():
    def __init__(self, grid_json):
        self.grid = grid_json
        self.min_lat = self.grid["latitude_min"]
        self.min_lon = self.grid["longitude_min"]
        self.max_lat = self.grid["latitude_max"]
        self.max_lon = self.grid["longitude_max"]
        self.latitude_delta = self.grid["latitude_step"]
        self.longitude_delta = self.grid["longitude_step"]
        self.nrows = math.ceil((self.max_lat - self.min_lat) / self.latitude_delta)
        self.ncols = math.ceil((self.max_lon - self.min_lon) / self.longitude_delta)
        self.times = self.grid["time_matrix"]
        self.census_tract_region_map = self.grid["census_tract_region_mapping"]
        self.region_to_tract = collections.defaultdict(list)
        for census_tract in self.census_tract_region_map:
            for region in self.census_tract_region_map[census_tract]:
                self.region_to_tract[region].append(census_tract)
    def map_point_to_region(self, latitude, longitude):
        return math.floor((latitude-self.min_lat)/self.latitude_delta) * self.ncols  + math.floor((longitude-self.min_lon)/self.longitude_delta)
    def get_representative(self, region_num):
        row_num = region_num//self.ncols
        col_num = region_num - row_num*self.ncols
        lat = self.min_lat + row_num * self.latitude_delta + 0.5*self.latitude_delta
        lon = self.min_lon + col_num * self.longitude_delta + 0.5*self.longitude_delta
        return [lon, lat]
    def get_time(self, region1, region2):
        try:
            return self.times[region1][region2]
        except IndexError:
            return -1
    def region_to_census_tract(self, region):
        try:
            return self.region_to_tract[region]
        except KeyError:
            return "0_0"

In [14]:
# Using smaller distance matrix for hopefully faster runtime in Julia code
with open("..\Input_Data\grid_info_multiple.json", "r") as f:
    grid_json = json.load(f)
g = Grid(grid_json)

In [15]:
len(g.times)

3200

In [16]:
data = pd.read_csv("..\Input_Data\cleaned_data.csv")

In [17]:
data

Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,Time_Available,Time_Call_Cleared,Call_Disposition,Longitude_Of_Emergency,Latitude_Of_Emergency,transport_time,grid_time
0,1,23397873,M09,-97.972866,30.333385,2019-01-01 19:43:36.000,2019-01-01 19:45:27.030,2019-01-01 19:48:39.770,2019-01-01 20:19:00.870,2019-01-01 20:24:43.093,2019-01-01 20:51:12.877,2019-01-01 20:51:12.877,Baylor Scott & White - Lakeway,-97.961,30.325,192,222.72
1,3,23408585,DM03,-97.751235,30.246114,2019-01-03 14:20:41.000,2019-01-03 14:20:54.110,2019-01-03 14:27:27.613,,,2019-01-03 14:35:14.657,2019-01-03 14:35:14.657,False Alarm Call,-97.777,30.251,393,343.52
2,4,23408585,DMO02,-97.751966,30.245411,2019-01-03 14:20:58.117,2019-01-03 14:20:58.140,2019-01-03 14:26:59.083,,,2019-01-03 14:34:24.343,2019-01-03 14:34:24.343,Dual w/ other ATCEMS Unit,-97.777,30.251,360,343.52
3,8,23439455,M10,-97.742590,30.418169,2019-01-07 17:28:48.000,2019-01-07 17:30:12.067,2019-01-07 17:47:53.030,,,2019-01-07 17:56:40.097,2019-01-07 17:56:40.097,False Alarm Call,-97.747,30.407,1060,412.58
4,10,23444785,M04,-97.725892,30.290517,2019-01-08 13:55:38.000,2019-01-08 13:55:55.987,2019-01-08 14:04:26.413,2019-01-08 14:20:11.810,2019-01-08 14:41:12.577,2019-01-08 15:09:21.000,2019-01-08 15:09:21.000,Saint Davids Med Ctr,-97.626,30.294,510,997.13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210102,279971,27262452,CHP11,-97.769524,30.231354,2020-06-29 14:57:29.090,2020-06-29 14:57:29.090,2020-06-29 15:33:16.447,,,2020-06-29 16:50:48.823,2020-06-29 16:50:48.823,Other,-97.654,30.379,2147,1318.73
210103,279972,27317236,CHP11,-97.774368,30.224984,2020-07-07 10:03:20.000,2020-07-07 10:03:55.657,2020-07-07 11:17:14.023,,,2020-07-07 12:10:23.387,2020-07-07 12:10:23.387,Other,-97.789,30.204,4398,304.24
210104,279973,27336505,CHP21,-97.734738,30.275694,2020-07-10 09:10:57.000,2020-07-10 09:11:46.533,2020-07-10 09:22:19.883,,,2020-07-10 09:36:30.773,2020-07-10 09:36:30.773,Other,-97.767,30.190,633,754.85
210105,279974,27413735,CHP08,-97.770123,30.191624,2020-07-22 13:10:46.880,2020-07-22 13:10:46.880,2020-07-22 13:42:29.970,,,2020-07-22 14:13:23.757,2020-07-22 14:13:23.757,CHP Intervention(CHP use only),-97.694,30.227,1903,757.36


In [18]:
#collect headers?
data["Time_Arrived"] = data.apply(lambda x: datetime.strptime(x["Time_ArrivedAtScene"], "%Y-%m-%d %H:%M:%S.%f"), axis=1)
data.sort_values(by=['Time_Arrived'], inplace=True, ascending=True)
data["timedelta"] = data["Time_Arrived"] - data["Time_Arrived"].values[0]
data["arrival_seconds"] = data.apply(lambda x: x["timedelta"].days*86400 + x["timedelta"].seconds, axis=1)
data["interarrival_seconds"] =  np.hstack((np.array([0]), data["arrival_seconds"].values[1:] - data["arrival_seconds"].values[:-1]))
data["neighborhood"] = data.apply(lambda x: g.map_point_to_region(x["Latitude_Of_Emergency"], x["Longitude_Of_Emergency"]), axis=1)
weekdays = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
data["dow"] = data.apply(lambda x: weekdays[datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").weekday()], axis=1)

## Adding travel time from call to each station

In [19]:
stations = pd.read_csv("../Input_Data/Austin_Data/stations.csv")

In [20]:
station_regions = [g.map_point_to_region(lat, long) for lat, long in zip(stations['LATITUDE'], stations['LONGITUDE'])]
print(len(station_regions))
station_regions

44


[1011,
 452,
 948,
 1381,
 1070,
 1377,
 1789,
 886,
 700,
 1132,
 1136,
 1377,
 1071,
 1682,
 1435,
 1240,
 1924,
 818,
 890,
 1563,
 1379,
 579,
 1857,
 1129,
 1745,
 2042,
 333,
 1126,
 1967,
 1511,
 155,
 2339,
 1535,
 1481,
 644,
 1809,
 772,
 571,
 875,
 1256,
 2289,
 1507,
 468,
 960]

In [21]:
for i, region in enumerate(station_regions):
    data["stn{}_min".format(i+1)] = data.apply(lambda x: g.get_time(x["neighborhood"], region), axis=1)

In [22]:
data

Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,...,stn35_min,stn36_min,stn37_min,stn38_min,stn39_min,stn40_min,stn41_min,stn42_min,stn43_min,stn44_min
51208,66520,23391588,M02,-97.774677,30.227350,2019-01-01 00:07:26.000,2019-01-01 00:07:37.947,2019-01-01 00:10:01.923,,,...,379.51,1340.31,763.02,975.34,1040.42,745.18,1884.75,1290.50,1031.45,1202.24
43271,56533,23391562,M33,-97.708630,30.299894,2019-01-01 00:05:58.000,2019-01-01 00:06:18.037,2019-01-01 00:11:37.950,,,...,1091.42,789.11,1198.70,1448.26,1513.34,178.74,1333.56,718.14,1467.14,1177.87
165765,216073,23391302,DMO04,-97.697770,30.337490,2019-01-01 00:12:27.563,2019-01-01 00:12:27.630,2019-01-01 00:15:34.313,,,...,1179.77,762.08,1139.11,1536.61,1601.69,387.67,1297.74,603.35,1407.55,1133.07
141575,185388,23391695,M01,-97.767595,30.229402,2019-01-01 00:14:06.000,2019-01-01 00:15:33.120,2019-01-01 00:19:32.527,,,...,753.21,1385.62,883.47,941.49,1006.57,790.49,1860.06,1335.81,1151.91,1322.69
6459,9646,23391664,M03,-97.740107,30.268286,2019-01-01 00:15:50.000,2019-01-01 00:15:57.387,2019-01-01 00:20:02.367,,,...,1000.70,985.26,952.09,1357.54,1422.62,382.24,1529.71,935.46,1220.52,1149.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121625,160313,27627818,M04,-97.684206,30.277996,2020-08-24 09:02:45.000,2020-08-24 09:04:08.563,2020-08-24 09:14:07.160,,,...,448.10,1551.79,630.26,1147.90,1212.98,956.66,2091.80,1352.85,898.69,1069.48
207974,276637,27627793,CHP11,-97.734581,30.275668,2020-08-24 08:58:32.373,2020-08-24 08:58:32.417,2020-08-24 09:23:21.343,,,...,694.33,1480.67,824.59,809.18,874.26,885.55,1797.87,1430.87,1093.02,1263.80
165764,216072,27627946,M15,-97.793500,30.174652,2020-08-24 09:22:27.000,2020-08-24 09:23:12.147,2020-08-24 09:27:30.790,,,...,991.99,1866.41,1324.33,901.82,1142.98,1271.28,2410.85,1816.60,1592.76,1763.55
24424,32064,27627921,M35,-97.613164,30.347335,2020-08-24 09:19:02.000,2020-08-24 09:19:47.220,2020-08-24 09:28:30.100,,,...,1271.88,1135.89,1113.58,1920.35,1985.43,740.41,1570.20,777.49,1382.01,963.11


## Adding travel time from call to each hospital

In [23]:
hospitals = pd.read_csv("../Input_Data/austin_data/hospitals.csv")

In [24]:
hospital_regions = [g.map_point_to_region(lat, long) for lat, long in zip(hospitals['LATITUDE'], hospitals['LONGITUDE'])]

In [25]:
for i, region in enumerate(hospital_regions):
    data["hosp{}_min".format(i+1)] = data.apply(lambda x: g.get_time(x["neighborhood"], region), axis=1)

In [26]:
data.columns

Index(['Unnamed: 0', 'IncidentForeignKey', 'Radio_Name',
       'Longitude_At_Assign_Time', 'Latitude_At_Assign_Time', 'Time_Assigned',
       'Time_Enroute', 'Time_ArrivedAtScene', 'Time_Depart_Scene',
       'Time_Arrive_Destination', 'Time_Available', 'Time_Call_Cleared',
       'Call_Disposition', 'Longitude_Of_Emergency', 'Latitude_Of_Emergency',
       'transport_time', 'grid_time', 'Time_Arrived', 'timedelta',
       'arrival_seconds', 'interarrival_seconds', 'neighborhood', 'dow',
       'stn1_min', 'stn2_min', 'stn3_min', 'stn4_min', 'stn5_min', 'stn6_min',
       'stn7_min', 'stn8_min', 'stn9_min', 'stn10_min', 'stn11_min',
       'stn12_min', 'stn13_min', 'stn14_min', 'stn15_min', 'stn16_min',
       'stn17_min', 'stn18_min', 'stn19_min', 'stn20_min', 'stn21_min',
       'stn22_min', 'stn23_min', 'stn24_min', 'stn25_min', 'stn26_min',
       'stn27_min', 'stn28_min', 'stn29_min', 'stn30_min', 'stn31_min',
       'stn32_min', 'stn33_min', 'stn34_min', 'stn35_min', 'stn36_min'

In [27]:
data["hour"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").hour, axis=1)
data["month"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").month, axis=1)
data["year"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").year, axis=1)
data["day"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").day, axis=1)
data["minute"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").minute, axis=1)
data["second"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").second, axis=1)


In [28]:
data.columns

Index(['Unnamed: 0', 'IncidentForeignKey', 'Radio_Name',
       'Longitude_At_Assign_Time', 'Latitude_At_Assign_Time', 'Time_Assigned',
       'Time_Enroute', 'Time_ArrivedAtScene', 'Time_Depart_Scene',
       'Time_Arrive_Destination', 'Time_Available', 'Time_Call_Cleared',
       'Call_Disposition', 'Longitude_Of_Emergency', 'Latitude_Of_Emergency',
       'transport_time', 'grid_time', 'Time_Arrived', 'timedelta',
       'arrival_seconds', 'interarrival_seconds', 'neighborhood', 'dow',
       'stn1_min', 'stn2_min', 'stn3_min', 'stn4_min', 'stn5_min', 'stn6_min',
       'stn7_min', 'stn8_min', 'stn9_min', 'stn10_min', 'stn11_min',
       'stn12_min', 'stn13_min', 'stn14_min', 'stn15_min', 'stn16_min',
       'stn17_min', 'stn18_min', 'stn19_min', 'stn20_min', 'stn21_min',
       'stn22_min', 'stn23_min', 'stn24_min', 'stn25_min', 'stn26_min',
       'stn27_min', 'stn28_min', 'stn29_min', 'stn30_min', 'stn31_min',
       'stn32_min', 'stn33_min', 'stn34_min', 'stn35_min', 'stn36_min'

In [29]:
data_without_unneeded_cols = data.drop(columns=['IncidentForeignKey', 
                              'Radio_Name',
                              'Longitude_At_Assign_Time',
                              'Latitude_At_Assign_Time',
                              'Time_Assigned',
                              'Time_Enroute',
                              'Time_ArrivedAtScene',
                              'Time_Depart_Scene',
                              'Time_Arrive_Destination',
                              'Time_Available',
                              'Time_Call_Cleared',
                              'Call_Disposition',
                              'Longitude_Of_Emergency',
                              'Latitude_Of_Emergency',
                              'transport_time',
                              'grid_time',
                              'Time_Arrived',
                              'timedelta',
                              'Unnamed: 0'])

data_without_unneeded_cols.to_csv("calls_w_StnHospTimes.csv")

## Remove Weekends from Calls

In [30]:
data = data[data.dow != "Sat"]
data = data[data.dow != "Sun"]

## Add hour/month/year fields

In [31]:
data["hour"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").hour, axis=1)
data["month"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").month, axis=1)
data["year"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").year, axis=1)
data["day"] = data.apply(lambda x: datetime.strptime(x["Time_Assigned"], "%Y-%m-%d %H:%M:%S.%f").day, axis=1)
data


Unnamed: 0.1,Unnamed: 0,IncidentForeignKey,Radio_Name,Longitude_At_Assign_Time,Latitude_At_Assign_Time,Time_Assigned,Time_Enroute,Time_ArrivedAtScene,Time_Depart_Scene,Time_Arrive_Destination,...,hosp9_min,hosp10_min,hosp11_min,hosp12_min,hour,month,year,day,minute,second
51208,66520,23391588,M02,-97.774677,30.227350,2019-01-01 00:07:26.000,2019-01-01 00:07:37.947,2019-01-01 00:10:01.923,,,...,943.36,593.20,1087.50,865.53,0,1,2019,1,7,26
43271,56533,23391562,M33,-97.708630,30.299894,2019-01-01 00:05:58.000,2019-01-01 00:06:18.037,2019-01-01 00:11:37.950,,,...,645.42,293.91,1560.42,368.89,0,1,2019,1,5,58
165765,216073,23391302,DMO04,-97.697770,30.337490,2019-01-01 00:12:27.563,2019-01-01 00:12:27.630,2019-01-01 00:15:34.313,,,...,733.77,382.26,1641.00,507.13,0,1,2019,1,12,27
141575,185388,23391695,M01,-97.767595,30.229402,2019-01-01 00:14:06.000,2019-01-01 00:15:33.120,2019-01-01 00:19:32.527,,,...,775.65,638.51,1044.30,910.84,0,1,2019,1,14,6
6459,9646,23391664,M03,-97.740107,30.268286,2019-01-01 00:15:50.000,2019-01-01 00:15:57.387,2019-01-01 00:20:02.367,,,...,594.33,201.90,1448.84,510.49,0,1,2019,1,15,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121625,160313,27627818,M04,-97.684206,30.277996,2020-08-24 09:02:45.000,2020-08-24 09:04:08.563,2020-08-24 09:14:07.160,,,...,1115.92,804.68,1260.06,1077.01,9,8,2020,24,2,45
207974,276637,27627793,CHP11,-97.734581,30.275668,2020-08-24 08:58:32.373,2020-08-24 08:58:32.417,2020-08-24 09:23:21.343,,,...,713.46,733.57,857.60,876.43,8,8,2020,24,58,32
165764,216072,27627946,M15,-97.793500,30.174652,2020-08-24 09:22:27.000,2020-08-24 09:23:12.147,2020-08-24 09:27:30.790,,,...,1408.50,1119.30,1609.06,1391.63,9,8,2020,24,22,27
24424,32064,27627921,M35,-97.613164,30.347335,2020-08-24 09:19:02.000,2020-08-24 09:19:47.220,2020-08-24 09:28:30.100,,,...,1183.83,879.64,1913.46,932.11,9,8,2020,24,19,2


## Split 80/20

In [32]:
# Split 80/20

first_80_percent = (data.head(math.floor(len(data) * 4 / 5))).copy(deep=True)
last_20_percent = (data.tail(math.ceil(len(data) / 5))).copy(deep=True)

len(first_80_percent) + len(last_20_percent)

150979

## Test call stuff
### remove unneccesary test call columns

In [33]:
last_20_percent.drop(columns=['IncidentForeignKey', 
                              'Radio_Name',
                              'Longitude_At_Assign_Time',
                              'Latitude_At_Assign_Time',
                              'Time_Assigned',
                              'Time_Enroute',
                              'Time_ArrivedAtScene',
                              'Time_Depart_Scene',
                              'Time_Arrive_Destination',
                              'Time_Available',
                              'Time_Call_Cleared',
                              'Call_Disposition',
                              'transport_time',
                              'grid_time',
                              'Time_Arrived',
                              'timedelta',
                              'arrival_seconds',
                              'day',
                              'Unnamed: 0'])

 
#'Longitude_Of_Emergency', 'Latitude_Of_Emergency',

Unnamed: 0,Longitude_Of_Emergency,Latitude_Of_Emergency,interarrival_seconds,neighborhood,dow,stn1_min,stn2_min,stn3_min,stn4_min,stn5_min,...,hosp8_min,hosp9_min,hosp10_min,hosp11_min,hosp12_min,hour,month,year,minute,second
93941,-97.711,30.260,38,1012,Wed,142.25,1437.24,568.46,784.89,480.71,...,466.25,856.99,472.13,1508.41,744.46,20,4,2020,25,53
96996,-97.695,30.115,537,220,Wed,1246.60,1331.43,1204.43,1258.80,1380.22,...,1360.06,1630.49,1319.24,1774.62,1591.57,20,4,2020,25,50
140031,-97.654,30.379,98,1687,Wed,1101.43,2032.91,1181.98,723.57,1103.53,...,963.75,1197.39,845.88,2003.56,988.27,20,4,2020,35,32
122890,-98.059,30.350,18,1536,Wed,2664.60,2456.50,2459.60,3013.84,2450.99,...,2665.14,2467.13,2700.84,1788.26,2630.09,20,4,2020,37,22
181726,-97.716,30.362,475,1621,Wed,925.46,1678.98,1006.02,573.26,927.56,...,787.79,808.14,669.92,1436.58,716.16,20,4,2020,42,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121625,-97.702,30.228,1142,829,Mon,533.50,1283.71,630.99,1016.74,865.66,...,845.50,1115.92,804.68,1260.06,1077.01,9,8,2020,2,45
207974,-97.796,30.232,554,884,Mon,697.32,977.69,452.53,1209.32,595.39,...,774.39,713.46,733.57,857.60,876.43,8,8,2020,58,32
165764,-97.816,30.173,249,516,Mon,1083.06,617.89,1004.49,1595.05,1180.28,...,1160.12,1408.50,1119.30,1609.06,1391.63,9,8,2020,22,27
24424,-97.658,30.311,60,1321,Mon,969.73,2056.16,1215.73,301.51,1137.28,...,985.93,1183.83,879.64,1913.46,932.11,9,8,2020,19,2


### Save last 20 percent as our austin_test_calls.csv

In [34]:
test = pd.DataFrame()

In [42]:
test["interarrival_seconds"] = last_20_percent["interarrival_seconds"]
test["neighborhood"] = last_20_percent["neighborhood"]
test["dow"] = last_20_percent["dow"]
test["Longitude_Of_Emergency"] = last_20_percent["Longitude_Of_Emergency"]
test["Latitude_Of_Emergency"] = last_20_percent["Latitude_Of_Emergency"]

for i in range(1, 45):
    test["stn{}_min".format(i)] = last_20_percent["stn{}_min".format(i)]
    
for i in range(1, 13):
    test["hosp{}_min".format(i)] = last_20_percent["hosp{}_min".format(i)]

In [43]:
test

Unnamed: 0,interarrival_seconds,neighborhood,dow,stn1_min,stn2_min,stn3_min,stn4_min,stn5_min,stn6_min,stn7_min,...,hosp5_min,hosp6_min,hosp7_min,hosp8_min,hosp9_min,hosp10_min,hosp11_min,hosp12_min,Longitude_Of_Emergency,Latitude_Of_Emergency
93941,38,1012,Wed,142.25,1437.24,568.46,784.89,480.71,769.69,2627.85,...,2201.33,1469.84,519.56,466.25,856.99,472.13,1508.41,744.46,-97.711,30.260
96996,537,220,Wed,1246.60,1331.43,1204.43,1258.80,1380.22,1548.16,2918.94,...,2459.24,1727.74,1465.25,1360.06,1630.49,1319.24,1774.62,1591.57,-97.695,30.115
140031,98,1687,Wed,1101.43,2032.91,1181.98,723.57,1103.53,848.98,2524.48,...,2707.44,2065.51,904.72,963.75,1197.39,845.88,2003.56,988.27,-97.654,30.379
122890,18,1536,Wed,2664.60,2456.50,2459.60,3013.84,2450.99,2770.78,1485.64,...,1001.02,1913.20,2852.82,2665.14,2467.13,2700.84,1788.26,2630.09,-98.059,30.350
181726,475,1621,Wed,925.46,1678.98,1006.02,573.26,927.56,574.34,1957.49,...,2140.45,1642.80,728.75,787.79,808.14,669.92,1436.58,716.16,-97.716,30.362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121625,1142,829,Mon,533.50,1283.71,630.99,1016.74,865.66,1102.24,2404.38,...,1944.67,1213.18,956.66,845.50,1115.92,804.68,1260.06,1077.01,-97.702,30.228
207974,554,884,Mon,697.32,977.69,452.53,1209.32,595.39,1017.11,2021.19,...,1561.48,874.45,885.55,774.39,713.46,733.57,857.60,876.43,-97.796,30.232
165764,249,516,Mon,1083.06,617.89,1004.49,1595.05,1180.28,1416.86,2537.40,...,2077.69,1049.18,1271.28,1160.12,1408.50,1119.30,1609.06,1391.63,-97.816,30.173
24424,60,1321,Mon,969.73,2056.16,1215.73,301.51,1137.28,711.40,2434.37,...,2617.34,1985.62,740.41,985.93,1183.83,879.64,1913.46,932.11,-97.658,30.311


In [44]:
test.to_csv("../Output_Data/austin_data_3200/austin_test_calls.csv", index=False)

## TODO: Remake weekday calls CSV

## Sudeep's Code from WeekdayCalls

## Now let's try to see how our grid information compares to this

In [48]:
data = first_80_percent.copy(deep=True)

In [49]:
data["S"] = 1

In [50]:
d = data.groupby(["year", "month", "day", "hour", "neighborhood"])["S"].sum()

In [51]:
d.keys()

MultiIndex([(2019, 1,  1,  0,  335),
            (2019, 1,  1,  0,  529),
            (2019, 1,  1,  0,  576),
            (2019, 1,  1,  0,  765),
            (2019, 1,  1,  0,  876),
            (2019, 1,  1,  0,  947),
            (2019, 1,  1,  0, 1009),
            (2019, 1,  1,  0, 1010),
            (2019, 1,  1,  0, 1070),
            (2019, 1,  1,  0, 1071),
            ...
            (2020, 4, 22, 20,  822),
            (2020, 4, 22, 20,  879),
            (2020, 4, 22, 20,  883),
            (2020, 4, 22, 20,  889),
            (2020, 4, 22, 20, 1072),
            (2020, 4, 22, 20, 1316),
            (2020, 4, 22, 20, 1378),
            (2020, 4, 22, 20, 1437),
            (2020, 4, 22, 20, 1868),
            (2020, 4, 22, 20, 1990)],
           names=['year', 'month', 'day', 'hour', 'neighborhood'], length=106725)

In [52]:
p = pd.DataFrame(columns=["year", "month", "day", "hour"]+[i+1 for i in range(g.nrows*g.ncols + 1)])

In [53]:
p

Unnamed: 0,year,month,day,hour,1,2,3,4,5,6,...,3225,3226,3227,3228,3229,3230,3231,3232,3233,3234


In [54]:
year = d.keys()[0][0]
month = d.keys()[0][1]
day = d.keys()[0][2]
hour = d.keys()[0][3]
curr = datetime(year=year, month=month, day=day, hour=hour)
t = timedelta(hours=1)
years = []
months = []
days = []
hours = []
while not (curr.year == d.keys()[-1][0] and curr.month == d.keys()[-1][1] and curr.day == d.keys()[-1][2]):
    if curr.weekday() < 5:
        years.append(curr.year)
        months.append(curr.month)
        days.append(curr.day)
        hours.append(curr.hour)
    curr += t
    
    

In [None]:
p["year"] = years
p["month"] = months
p["day"] = days
p["hour"] = hours

In [None]:
for i in range(1, g.nrows*g.ncols+1):
    p[i] = 0

In [None]:
#for i in range(3201, ):
    #p[i] = 0

In [None]:
#p.to_csv("intermediate.csv")

In [None]:
p

In [None]:
for k in d.keys():
    p.loc[(p["year"] == k[0]) & (p["month"] == k[1]) & (p["day"] == k[2]) & (p["hour"] == k[3]), k[4]] = d[k]

In [None]:
for col in p.columns[228:]:
    p = p.drop(col, axis=1)

In [None]:
p

In [57]:
p.to_csv("../Output_Data/austin_data_3200/WeekdayCalls.csv", index=False)