In [1]:
import io
import pandas as pd
import requests
import numpy as np
import h3
from os import listdir

# check for the vbohcar.xlsx file in the current directory
if 'VBOHCAR.xlsx' in listdir():
        # read the third sheet of the excel file
    vb_ohca_in = pd.read_excel('VBOHCAR.xlsx', sheet_name=3)
else:
    # clone the excel file from github
    url = 'https://github.com/INFORMSJoC/2020.1022/blob/master/results/VBOHCAR.xlsx?raw=true'
    file = requests.get(url)
    file_bytes = io.BytesIO(file.content)
    # read the third sheet of the excel file
    vb_ohca_in = pd.read_excel(file_bytes, sheet_name=3)
vb_ohca_in.head()

Unnamed: 0,ID_OHCA,ReceivedTime,MinimumResponseTime,Latitude,Longitude,Incident_Location,X_OHCA,Y_OHCA,Z_OHCA
0,1,2017-01-01 00:49:54,5.666667,36.862471,-76.024169,1000 LINDSLEY DR_x000D_,1232.449963,-4951.984772,3826.080166
1,2,2017-01-01 06:35:29,20.75,36.766897,-76.042337,2600 SPRINGHAVEN DR_x000D_,1232.417401,-4958.562301,3817.562796
2,3,2017-01-01 06:56:25,9.65,36.766897,-76.042337,2600 SPRINGHAVEN DR_x000D_,1232.417401,-4958.562301,3817.562796
3,4,2017-01-01 17:05:56,11.083333,36.90588,-76.118769,4200 SHORE DR_x000D_,1223.576354,-4951.197334,3829.945177
4,5,2017-01-01 19:53:20,0.0,36.62085,-76.09009,5200 BLACKWATER RD_x000D_,1230.619605,-4969.017372,3804.526862


In [2]:
hexid_ohca_cnt = {}
for i, row in vb_ohca_in.iterrows():
    # get the hex_id for each row
    hex_id = h3.latlng_to_cell(np.float64(row['Latitude']), np.float64(row['Longitude']), 9)
    # if the hex_id is not in the dictionary, add it
    if hex_id not in hexid_ohca_cnt:
        hexid_ohca_cnt[hex_id] = 0
    # increment the count of OHCA in the hex_id
    hexid_ohca_cnt[hex_id] += 1

# create a dataframe from the dictionary with the hex_id as the index
main_ohca_df = pd.DataFrame.from_dict(hexid_ohca_cnt, orient='index', columns=['OHCA'])
print(sum(main_ohca_df['OHCA']))
main_ohca_df.head()

2706


Unnamed: 0,OHCA
892af636e43ffff,3
892af0c96abffff,2
892af63720bffff,4
892af051523ffff,2
892af6266bbffff,2


In [3]:
# lat	lng	desc	zip	title	timeStamp	twp	addr	e
# 0	40.297876	-75.581294	REINDEER CT & DEAD END; NEW HANOVER; Station ...	19525.0	EMS: BACK PAINS/INJURY	2015-12-10 17:10:52	NEW HANOVER	REINDEER CT & DEAD END	1
# 1	40.258061	-75.264680	BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...	19446.0	EMS: DIABETIC EMERGENCY	2015-12-10 17:29:21	HATFIELD TOWNSHIP	BRIAR PATH & 


mtgmry_ohca_df = pd.read_csv('montgomery/mtgmry_unfiltered.csv')
# filter by 'title' containing 'CARDIAC ARREST'
mtgmry_ohca_df = mtgmry_ohca_df[mtgmry_ohca_df['title'].str.contains('CARDIAC ARREST')]
# timeStamp contatins 2017 2018 2019
mtgmry_ohca_df = mtgmry_ohca_df[mtgmry_ohca_df['timeStamp'].str.contains('2017|2018|2019')]
# create a dictionary to hold the counts of OHCA in each hex_id
hexid_ohca_cnt = {}
# iterate through the rows of the dataframe
for i, row in mtgmry_ohca_df.iterrows():
    # get the hex_id for each row
    hex_id = h3.latlng_to_cell(np.float64(row['lat']), np.float64(row['lng']), 9)
    # if the hex_id is not in the dictionary, add it
    if hex_id not in hexid_ohca_cnt:
        hexid_ohca_cnt[hex_id] = 0
    # increment the count of OHCA in the hex_id
    hexid_ohca_cnt[hex_id] += 1

mtgmry_ohca_df = pd.DataFrame.from_dict(hexid_ohca_cnt, orient='index', columns=['OHCA'])
print(sum(mtgmry_ohca_df['OHCA']))
# add the OHCA count to the main dataframe
main_ohca_df = pd.concat([main_ohca_df, mtgmry_ohca_df], ignore_index=False, axis=0)
main_ohca_df.head()

4773


Unnamed: 0,OHCA
892af636e43ffff,3
892af0c96abffff,2
892af63720bffff,4
892af051523ffff,2
892af6266bbffff,2


In [4]:
# now read virginia_beach data
main_hexagon_df = pd.read_csv('virginia_beach_osm.csv')
main_hexagon_df.rename(columns={'Unnamed: 0': 'hex_id'}, inplace=True)
# pivot the dataframe to have the hex_id as the index
main_hexagon_df.set_index('hex_id', inplace=True)
main_hexagon_df.head()

Unnamed: 0_level_0,animal_boarding,atm,bank_x,bar_x,bbq,bench,bicycle_parking,bicycle_rental,bicycle_repair_station,cafe_x,...,storage_tank_neighbour_count,studio_neighbour_count,terrace_neighbour_count,theatre_y_neighbour_count,toilets_y_neighbour_count,townhall_neighbour_count,university_y_neighbour_count,veterinary_y_neighbour_count,warehouse_neighbour_count,yes_neighbour_count
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
892af0500a7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
892af05019bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
892af0501b3ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
892af0504c7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
892af050527ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# add montgomery
mtgmry_hexagon_df = pd.read_csv('montgomery_osm.csv')
mtgmry_hexagon_df.rename(columns={'Unnamed: 0': 'hex_id'}, inplace=True)
# pivot the dataframe to have the hex_id as the index
mtgmry_hexagon_df.set_index('hex_id', inplace=True)
main_hexagon_df = pd.concat([main_hexagon_df, mtgmry_hexagon_df], ignore_index=False, axis=0)
main_hexagon_df.head()

Unnamed: 0_level_0,animal_boarding,atm,bank_x,bar_x,bbq,bench,bicycle_parking,bicycle_rental,bicycle_repair_station,cafe_x,...,lighthouse_neighbour_count,no_neighbour_count,recycling_y_neighbour_count,shopping_center_neighbour_count,social_facility_y_neighbour_count,stable_neighbour_count,synagogue_neighbour_count,temple_neighbour_count,tower_neighbour_count,university_neighbour_count
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
892af0500a7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
892af05019bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
892af0501b3ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
892af0504c7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
892af050527ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [6]:
# add the OHCA count to the main dataframe
main_hexagon_df = pd.concat([main_hexagon_df, main_ohca_df], ignore_index=False, axis=1)
# fill the NaN values with 0
main_hexagon_df.fillna(0, inplace=True)
# save to a csv file
main_hexagon_df.to_csv('main_hexagon_df.csv')
main_hexagon_df.head()

Unnamed: 0,animal_boarding,atm,bank_x,bar_x,bbq,bench,bicycle_parking,bicycle_rental,bicycle_repair_station,cafe_x,...,no_neighbour_count,recycling_y_neighbour_count,shopping_center_neighbour_count,social_facility_y_neighbour_count,stable_neighbour_count,synagogue_neighbour_count,temple_neighbour_count,tower_neighbour_count,university_neighbour_count,OHCA
892af0500a7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
892af05019bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
892af0501b3ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
892af0504c7ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
892af050527ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# shuffle the rows of the dataframe
main_hexagon_df = main_hexagon_df.sample(frac=1)

# create a dataframe with the OHCA count and the features
X = main_hexagon_df.drop(['OHCA'], axis=1)
y = main_hexagon_df['OHCA']
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model
model = XGBRegressor(max_depth=1, n_jobs=-1)
# fit the model
model.fit(X_train, y_train)
# make predictions
y_pred = model.predict(X_test)
# calculate the mean squared error
mse = mean_squared_error(y_test, y_pred, squared=False)
print('MSE: %.2f' % mse)

MSE: 2.40


In [8]:
import h2o
from h2o.automl import H2OAutoML

# Start and connect to a local H2O cluster
h2o.init()
h2o_df = h2o.H2OFrame(main_hexagon_df)
x = list(main_hexagon_df.columns)  # Features
y = "OHCA"   # Target column
train, valid = h2o_df.split_frame(ratios=[0.8], seed=1234)
aml = H2OAutoML(max_runtime_secs=60, seed=1)  # You can adjust the parameters
aml.train(x=x, y=y, training_frame=train, validation_frame=valid)
lb = aml.leaderboard
print(lb)
best_model = aml.leader
predictions = best_model.predict(valid)
model_path = h2o.save_model(model=best_model, path="h2o_model_mojo", force=True)

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "17.0.9" 2023-10-17; OpenJDK Runtime Environment (build 17.0.9+9-Debian-1deb12u1); OpenJDK 64-Bit Server VM (build 17.0.9+9-Debian-1deb12u1, mixed mode, sharing)
  Starting server from /home/radekaadek/myaed/.venv/lib/python3.11/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp6hhu3ihy
  JVM stdout: /tmp/tmp6hhu3ihy/h2o_radekaadek_started_from_python.out
  JVM stderr: /tmp/tmp6hhu3ihy/h2o_radekaadek_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,01 secs
H2O_cluster_timezone:,Europe/Warsaw
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,17 days
H2O_cluster_name:,H2O_from_python_radekaadek_psq56a
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.848 Gb
H2O_cluster_total_cores:,16
H2O_cluster_allowed_cores:,16


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |
18:48:03.787: _train param, Dropping bad and constant columns: [pub_x_neighbour_count, shelter_y_neighbour_count, community_centre_x_neighbour_count, vending_machine_neighbour_count, residential_neighbour_count, ship, shower_y_neighbour_count, dojo_x_neighbour_count, waste_disposal_neighbour_count, theatre, boathouse, nightclub_y_neighbour_count, dojo_neighbour_count, carport_neighbour_count, kindergarten_y_neighbour_count, compressed_air, dormitory_neighbour_count, post_box_neighbour_count, marketplace_neighbour_count, prison, loading_dock_neighbour_count, gambling_x_neighbour_count, cafe_y, arts_centre, bunker, clinic_x_neighbour_count, animal_shelter_neighbour_count, synagogue, police_x_neighbour_count, office_neighbour_count, chapel, warehouse, childcare_x_neighbour_count, shower_y, theatre_y_neighbour_count, commercial;yes, pharmacy_x_neighbour_count, shed_neighbour_co

In [9]:

# read the csv file
poland_df = pd.read_csv('lublin_osm.csv')
# set unnamed column name to hex_id
poland_df.rename(columns={'Unnamed: 0': 'hex_id'}, inplace=True)
# pivot the dataframe to have the hex_id as the index
poland_df.set_index('hex_id', inplace=True)
poland_df.head()

Unnamed: 0_level_0,animal_breeding,atm,bank_x,bar_x,bbq,bench,bicycle_parking_x,bicycle_rental,bicycle_repair_station,boat_rental_x,...,townhall_neighbour_count,train_station_neighbour_count,transportation_neighbour_count,university_neighbour_count,vehicle_inspection_y_neighbour_count,veterinary_y_neighbour_count,warehouse_neighbour_count,waste_disposal_y_neighbour_count,waste_transfer_station_neighbour_count,yes_neighbour_count
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
891e2d08003ffff,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
891e2d08007ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
891e2d0800bffff,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
891e2d0800fffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
891e2d0801bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# shutdown the h2o cluster
