In [96]:
import aquire_data
import h3
import pandas as pd
import numpy as np
import overpass
from tpot import TPOTRegressor

# Load data
# import data from VBOHCAR.xlsx to a pandas dataframe
import io
import pandas as pd
import requests
from os import listdir

# check for the vbohcar.xlsx file in the current directory
if 'VBOHCAR.xlsx' in listdir():
        # read the third sheet of the excel file
    df = pd.read_excel('VBOHCAR.xlsx', sheet_name=3)
else:
    # clone the excel file from github
    url = 'https://github.com/INFORMSJoC/2020.1022/blob/master/results/VBOHCAR.xlsx?raw=true'
    file = requests.get(url)
    file_bytes = io.BytesIO(file.content)
    # read the third sheet of the excel file
    df = pd.read_excel(file_bytes, sheet_name=3)
df.head()

Unnamed: 0,ID_OHCA,ReceivedTime,MinimumResponseTime,Latitude,Longitude,Incident_Location,X_OHCA,Y_OHCA,Z_OHCA
0,1,2017-01-01 00:49:54,5.666667,36.862471,-76.024169,1000 LINDSLEY DR_x000D_,1232.449963,-4951.984772,3826.080166
1,2,2017-01-01 06:35:29,20.75,36.766897,-76.042337,2600 SPRINGHAVEN DR_x000D_,1232.417401,-4958.562301,3817.562796
2,3,2017-01-01 06:56:25,9.65,36.766897,-76.042337,2600 SPRINGHAVEN DR_x000D_,1232.417401,-4958.562301,3817.562796
3,4,2017-01-01 17:05:56,11.083333,36.90588,-76.118769,4200 SHORE DR_x000D_,1223.576354,-4951.197334,3829.945177
4,5,2017-01-01 19:53:20,0.0,36.62085,-76.09009,5200 BLACKWATER RD_x000D_,1230.619605,-4969.017372,3804.526862


In [97]:
hexid_ohca_cnt = {}
for i, row in df.iterrows():
    # get the hex_id for each row
    hex_id = h3.latlng_to_cell(np.float64(row['Latitude']), np.float64(row['Longitude']), 9)
    # if the hex_id is not in the dictionary, add it
    if hex_id not in hexid_ohca_cnt:
        hexid_ohca_cnt[hex_id] = 0
    # increment the count of OHCA in the hex_id
    hexid_ohca_cnt[hex_id] += 1

# create a dataframe from the dictionary with the hex_id as the index
ohca_df = pd.DataFrame.from_dict(hexid_ohca_cnt, orient='index', columns=['OHCA'])
print(sum(ohca_df['OHCA']))
ohca_df.head()

2706


Unnamed: 0,OHCA
892af636e43ffff,3
892af0c96abffff,2
892af63720bffff,4
892af051523ffff,2
892af6266bbffff,2


In [98]:
# load virginia beach data
# check if virginia_beach.csv is in the current directory
if 'virginia_beach.csv' in listdir():
    # read the csv file
    vb_df = pd.read_csv('virginia_beach.csv')
    # set unnamed column name to hex_id
    vb_df.rename(columns={'Unnamed: 0': 'hex_id'}, inplace=True)
    # pivot the dataframe to have the hex_id as the index
    vb_df.set_index('hex_id', inplace=True)
else:
    api = overpass.API()
    vb_df = aquire_data.get_all_data('Virginia Beach', api)
vb_df.head()

Unnamed: 0_level_0,accountant,administrative,amusement_arcade,animal_boarding,atm,bank,bar,bbq,bench,bicycle_parking,...,ship_neighbour_count,sports_centre_y_neighbour_count,stadium_neighbour_count,static_caravan_neighbour_count,storage_tank_neighbour_count,terrace_neighbour_count,toilets_y_neighbour_count,university_y_neighbour_count,warehouse_neighbour_count,yes_y_neighbour_count
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89f0430825bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89f04309047ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89f04309053ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89f04309057ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89f0430907bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [99]:
# add OHCA count to virginia beach dataframe
vb_df = vb_df.join(ohca_df)
# fill NaN values with 0
vb_df.fillna(0, inplace=True)
print(f"Ammount of rows: {len(vb_df)}")
print(f"Amount of OHCA: {sum(vb_df['OHCA'])}")
vb_df.head()

Ammount of rows: 5623
Amount of OHCA: 2699.0


Unnamed: 0_level_0,accountant,administrative,amusement_arcade,animal_boarding,atm,bank,bar,bbq,bench,bicycle_parking,...,sports_centre_y_neighbour_count,stadium_neighbour_count,static_caravan_neighbour_count,storage_tank_neighbour_count,terrace_neighbour_count,toilets_y_neighbour_count,university_y_neighbour_count,warehouse_neighbour_count,yes_y_neighbour_count,OHCA
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89f0430825bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
89f04309047ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
89f04309053ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
89f04309057ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
89f0430907bffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0


In [105]:
# load warsaw data
# check if warsaw.csv is in the current directory
if 'lublin.csv' in listdir():
    # read the csv file
    warsaw_df = pd.read_csv('lublin.csv')
    # set unnamed column name to hex_id
    warsaw_df.rename(columns={'Unnamed: 0': 'hex_id'}, inplace=True)
    # pivot the dataframe to have the hex_id as the index
    warsaw_df.set_index('hex_id', inplace=True)
else:
    api = overpass.API()
    warsaw_df = aquire_data.get_all_data('Lublin', api)
warsaw_df.head()

Unnamed: 0_level_0,accountant,administrative,adult_gaming_centre,advertising_agency,animal_breeding,architect,association,atm,bank,bar,...,synagogue_neighbour_count,temple_neighbour_count,terrace_neighbour_count,toilets_y_neighbour_count,train_station_neighbour_count,transportation_neighbour_count,university_neighbour_count,warehouse_neighbour_count,wayside_shrine_neighbour_count,yes_y_neighbour_count
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89534bc0003ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89534bc0007ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89534bc000bffff,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89534bc000fffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
89534bc0013ffff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [106]:
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split

X = vb_df.drop(columns=['OHCA'])
y = vb_df['OHCA']

# create a TPOTRegressor
tpot = TPOTRegressor(generations=5, population_size=20, verbosity=2, random_state=42)
# fit the TPOTRegressor
tpot.fit(X, y)
# export the pipeline as a python script file
tpot.export('tpot_vb_pipeline.py')

Optimization Progress:  12%|█▏        | 14/120 [00:48<05:31,  3.12s/pipeline]

In [None]:
# delete columns from warsaw_df that are not in vb_df, do the same for vb_df
vb_df = vb_df[warsaw_df.columns]
warsaw_df = warsaw_df[vb_df.columns]

# create a TPOTRegressor
tpot = TPOTRegressor(generations=5, population_size=20, verbosity=2, random_state=42)
# fit the TPOTRegressor
X = vb_df.drop(columns=['OHCA'])
y = vb_df['OHCA']

tpot.fit(X, y)
# predict the OHCA count for warsaw
warsaw_df['OHCA'] = tpot.predict(warsaw_df.drop(columns=['OHCA']))
warsaw_df.head()

NameError: name 'warsaw_df' is not defined

In [None]:
# add a column if there is a defibrillator in the hexagon
# https://aed.openstreetmap.org.pl/aed_poland.geojson
import requests
aed_url = 'https://aed.openstreetmap.org.pl/aed_poland.geojson'
aed_file = requests.get(aed_url)
aed_json = aed_file.json()

# create a column with aed count
warsaw_df['aed_count'] = 0

# iterate through aeds find the hexagon and add 1 to the column
for aed in aed_json['features']:
    x, y = aed["geometry"]["coordinates"]
    hexagon = h3.geo_to_h3(y, x, 9)
    if hexagon in warsaw_df.index:
        warsaw_df.loc[hexagon, 'aed_count'] += 1

In [None]:
# create a map, color hexagons by the predicted number of ohca
import folium

m = folium.Map(location=[52.2297, 21.0122], zoom_start=11)

max_ohca = warsaw_df['predicted_ohca_count'].max()

# get top 10 hexagons with the most predicted ohca that have no defibrillators or hospitals
top_10_hexagons = warsaw_df[warsaw_df['aed_count'] == 0].sort_values(by='OHCA', ascending=False).head(10)

# add hexagons with opacity based on the number of ohca
for hexagon in warsaw_df.index:
    if hexagon in top_10_hexagons.index:
        color = 'blue'
        opacity = 0.7
    else:
    # check if there is a defibrillator in the hexagon or a hospital
        if warsaw_df.loc[hexagon, 'aed_count'] > 0 or warsaw_df.loc[hexagon, 'hospital'] > 0:
            # color the hexagon green
            color = 'green'
            opacity = 0.7
        else:
            # get ohca from the hexagon_amenities_df
            ohca = warsaw_df.loc[hexagon, 'OHCA']
            opacity = ohca / max_ohca
            color = 'red'
    locs = [(pos[0], pos[1]) for pos in h3.h3_to_geo_boundary(hexagon)]
    # create a polygon from the hexagon
    folium.Polygon(locations=locs, color=color, fill_color=color, fill_opacity=opacity).add_to(m)
m.save('warsaw_map.html')
m