In [406]:
import os
from collections import OrderedDict
import numpy as np
from joblib import dump, load
import pandas as pd
import pickle
import IPython
import ujson

In [225]:
# and this will help us with some colors for a choropleth map
from branca.colormap import linear

In [430]:
# https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive
import requests

def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    



In [5]:
# pickle has a bug which won't let you save more than large (GB) files

def save_obj(obj, name, name_dir='data' ):
    """
    Save to pickle.
    
    Parameters
    ----------
    obj : any object 
        This can be a dictionary or ndarray.
    name : str
        The name for the object to be saved.
    name_dir : str, default 'data'
        Name of the directory.
    
    Returns
    -------
    No return.
        Save the pickle object to the local file system.
    """
    
    if not os.path.isdir(name_dir):
        os.makedirs(name_dir)

    data_path = os.path.join(name_dir, name+'.pkl')
    
    with open(data_path, 'wb') as f:
        joblib.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name, name_dir='data' ):
    """
    Load the pickle object from the local file system.
    
    Parameters
    ----------
    obj : any object 
        This can be a dictionary or ndarray.
    name : str
        The name for the object to be saved.
    name_dir : str, default 'data'
        Name of the directory.
    
    Returns
    -------
    object
        Return an object such as a dictionary.
    """
    data_path = os.path.join(name_dir, name+'.pkl')
    
    with open(data_path, 'rb') as f:
        return joblib.load(f)

In [428]:
# Use offline plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import plotly.graph_objs as go


def enable_plotly_in_cell():
  import IPython
  from plotly.offline import init_notebook_mode
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
  '''))
  init_notebook_mode(connected=False)

## Download Data Files
### Create a data directory named 'data'

In [431]:
if not os.path.isdir('data'):
    os.makedirs('data')

### Download EMS dispatch data file. 
- More information can be found [here](https://data.wprdc.org/dataset/allegheny-county-911-dispatches-ems-and-fire). 
- The file we are going to download is already clean up and filled missing values.
- The file is hosted from google drive.

In [433]:
# https://drive.google.com/open?id=1OCt0jm--zCE_mTKAub_Ddo4W6BayKQ0B
download_file_from_google_drive(id='1OCt0jm--zCE_mTKAub_Ddo4W6BayKQ0B',
                                destination='data/df_ems.pkl')

### Load df_ems pickle file

In [6]:
df_ems = load_obj('df_ems')

In [179]:
df_ems.head()

Unnamed: 0,Call_ID_Hash,SERVICE,PRIORITY,PRIORITY_DESC,AGENCY,CALL_QUARTER,CALL_YEAR,DESCRIPTION_SHORT,CITY_CODE,CITY_NAME,GEOID
0,336ba345f5a0,EMS,E0,EMS ALS life threatening response w/ backup,E620,Q1,2015,HEMORRHAGE,PVU,PORT VUE,420035003002
1,f59d50d394fa,EMS,E0,EMS ALS life threatening response w/ backup,E380,Q1,2015,UNCONSCIOUS,PEN,PENN HILLS,420035235011
2,8d56b4f6f9fe,EMS,E0,EMS ALS life threatening response w/ backup,E620,Q1,2015,UNCONSCIOUS,MCK,MCKEESPORT,420035523003
3,e20045d2f6b4,EMS,E0,EMS ALS life threatening response w/ backup,E050,Q1,2015,"GUNSHOT, STABBING, OR OTHER WOUND",PGH,PITTSBURGH,420031306003
4,dbe1c9db46f6,EMS,E0,EMS ALS life threatening response w/ backup,E050,Q1,2015,ASSAULT,PGH,PITTSBURGH,420031609002


In [180]:
def get_geoid11(x):
    return int(str(x)[:11])
df_ems['GEOID11'] = df_ems['GEOID'].apply(get_geoid11)
df_ems.head()

Unnamed: 0,Call_ID_Hash,SERVICE,PRIORITY,PRIORITY_DESC,AGENCY,CALL_QUARTER,CALL_YEAR,DESCRIPTION_SHORT,CITY_CODE,CITY_NAME,GEOID,GEOID11
0,336ba345f5a0,EMS,E0,EMS ALS life threatening response w/ backup,E620,Q1,2015,HEMORRHAGE,PVU,PORT VUE,420035003002,42003500300
1,f59d50d394fa,EMS,E0,EMS ALS life threatening response w/ backup,E380,Q1,2015,UNCONSCIOUS,PEN,PENN HILLS,420035235011,42003523501
2,8d56b4f6f9fe,EMS,E0,EMS ALS life threatening response w/ backup,E620,Q1,2015,UNCONSCIOUS,MCK,MCKEESPORT,420035523003,42003552300
3,e20045d2f6b4,EMS,E0,EMS ALS life threatening response w/ backup,E050,Q1,2015,"GUNSHOT, STABBING, OR OTHER WOUND",PGH,PITTSBURGH,420031306003,42003130600
4,dbe1c9db46f6,EMS,E0,EMS ALS life threatening response w/ backup,E050,Q1,2015,ASSAULT,PGH,PITTSBURGH,420031609002,42003160900


## Group ems call per GEOID

In [212]:
call_geoid_df = (df_ems.groupby(['GEOID11'])['GEOID11']
                 .agg(['count'])
                 .rename(columns={'count':'TOTAL_CALL'})
                 .reset_index())
call_geoid_df.head()

Unnamed: 0,GEOID11,TOTAL_CALL
0,42003010300,6482
1,42003020100,19349
2,42003020300,1876
3,42003030500,2916
4,42003040200,2889


## Download  and import Geojson file for Mapping
- This is an Allegheny County extract of the 2016 US Census Tracts available
[here](https://data.wprdc.org/dataset/allegheny-county-census-tracts-2016).

In [435]:
!wget http://openac-alcogis.opendata.arcgis.com/datasets/31a3233d728549458e68cb02cb5bc9bb_0.geojson \
    -O data/Allegheny_County_Census_Tracts_2016.geojson

--2019-03-02 07:57:37--  http://openac-alcogis.opendata.arcgis.com/datasets/31a3233d728549458e68cb02cb5bc9bb_0.geojson
Resolving openac-alcogis.opendata.arcgis.com (openac-alcogis.opendata.arcgis.com)... 54.165.78.223, 34.197.166.129
Connecting to openac-alcogis.opendata.arcgis.com (openac-alcogis.opendata.arcgis.com)|54.165.78.223|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘data/Allegheny_County_Census_Tracts_2016.geojson’

data/Allegheny_Coun     [ <=>                ] 541.75K  --.-KB/s    in 0.1s    

2019-03-02 07:57:38 (4.78 MB/s) - ‘data/Allegheny_County_Census_Tracts_2016.geojson’ saved [554757]



In [438]:
allegheny_tracts = ujson.load(open("data/Allegheny_County_Census_Tracts_2016.geojson"))

In [407]:
def build_geoid_dict(tract_json):
    adict = OrderedDict()
    for feat in tract_json['features']:
        geoid = feat['properties']['GEOID']
        adict[geoid] = feat
    
    return adict

alle_geoid_dict = build_geoid_dict(allegheny_tracts)
        
    

## Define face color

In [365]:
facecolor = linear.viridis.scale(call_geoid_df['TOTAL_CALL'].min(), 
                                call_geoid_df['TOTAL_CALL'].max())

print(facecolor(296))

facecolor

#450659


In [367]:
min_call = call_geoid_df['TOTAL_CALL'].min()
max_call = call_geoid_df['TOTAL_CALL'].max()

In [368]:
colorbar_min = ('rgb{}'.format(facecolor
                       .rgb_bytes_tuple(min_call)))
colorbar_max = ('rgb{}'
                .format(facecolor
                .rgb_bytes_tuple(max_call)))

In [369]:
facecolor_tuple = ['rgb{byte}'
                   .format(byte=facecolor
                           .rgb_bytes_tuple(val)) 
                   for val in call_geoid_df['TOTAL_CALL'].values]

## Hover Text Over Map

In [386]:
text=['GEOID11: '+str(geoid)+'<br>Total call: '+'{}'.format(call) 
      for geoid, call in zip(call_geoid_df['GEOID11'].values,
                      call_geoid_df['TOTAL_CALL'].values)]

In [387]:
text[:2]

['GEOID11: 42003010300<br>Total call: 6482',
 'GEOID11: 42003020100<br>Total call: 19349']

## Import mapbox public key
- To use [Mapbox](https://www.mapbox.com) with Plotly, we have to register and get a token from Mapbox. 
- It is a free registration and you can access 50,000 WEP API call per month. 
 

In [372]:
mapbox_access_token = "Your API Key" # load_obj('mapbox_key')

In [408]:
sources=[]
for geoid in call_geoid_df['GEOID11']:
    akey = str(geoid)
    if akey in alle_geoid_dict:
        feat = alle_geoid_dict[akey]
        sources.append({"type": "FeatureCollection", 
                        'features': [feat]})
    else:
        print("missings key ",akey)

In [424]:
layers=[dict(sourcetype = 'geojson',
             source =sources[k],
             below="water", 
             type = 'fill',   
             color = facecolor_tuple[k],
             opacity=0.8
            ) for k in range(len(sources))]

## Get Lat , Lons for Map from sources

In [421]:
def get_long_lat_from_sources(sources):
    lons=[]
    lats=[]
    for k in range(len(sources)):
        county_coords=np.array(sources[k]['features'][0]['geometry']['coordinates'][0])
        m, M =county_coords[:,0].min(), county_coords[:,0].max()
        lons.append(0.5*(m+M))
        m, M =county_coords[:,1].min(), county_coords[:,1].max()
        lats.append(0.5*(m+M))
    return lons,lats

In [422]:
alle_lons,alle_lats = get_long_lat_from_sources(sources)

In [423]:
allegheny = dict(type='scattermapbox',
             lat=alle_lats, 
             lon=alle_lons,
             mode='markers',
             text=text,
             marker=dict(size=1, 
                         color=facecolor_tuple,
                         showscale=True,  colorscale='Viridis',
#                          colorscale=[[0, colorbar_min],
#                                      [1, colorbar_max]],
                         cmin=min_call,
                         cmax=max_call),
             hoverinfo='text')

In [425]:
layers[0].keys()

dict_keys(['sourcetype', 'source', 'below', 'type', 'color', 'opacity'])

In [452]:
layout = dict(title='Mapbox Choropleth<br>Allegheny County EMS CALL',
              font=dict(family='Balto'),
              autosize=True,
              width=800,
              height=800,
              hovermode='closest',
   
              mapbox=dict(accesstoken=mapbox_access_token,
                          layers=layers,
                          bearing=0,
                          center=dict(
                          lat=40.44, 
                          lon=-79.99),
                          pitch=0,
                          zoom=9,
                    ) 
              )

fig = dict(data=[allegheny], layout=layout)

In [453]:
iplot(fig)

<img src="map.gif" width="618" height="546" />