This is a study of data of emergency (911) calls in Montegomery County Pennsylvania, USA [[source](https://www.kaggle.com/datasets/mchirico/montcoalert/data)].
The data consists of the following information:
- title: Call title listing calls category and subcategory
- desc: Description of calls
- timeStamp: Calls time in 'Year-Month-Day hr:min:sec' format
- twp: Township of calls
- zip: ZIP code of calls
- lat: Lattitude position of calls
- lng: Longitude position of calls
- addr: Address of calls
- e: Redundant variable with always eual to 1

Plotly library has been used for visual exploration of data study. mm

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib as mpl 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px
import plotly.graph_objects as go 
import cufflinks as cf 

data = pd.read_csv('/Users/siddhesh/Work/datasets/Emergency911Calls/911_original.csv')
data

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
print(f"Number of duplicate rows: {data.duplicated().sum()}. Drop duplicates.")
data.drop_duplicates(inplace=True)

In [None]:
data.isna().sum()

In [None]:
data

Datatype of 'timeStamp' coulmn is string. Converting it to 'datatime' format would help us accessing date easily for our study.

In [None]:
print(f"Datatype of 'timeStamp column: {type(data['timeStamp'].loc[0]) = } {data['timeStamp'].loc[0] = }")
data['timeStamp'] = pd.to_datetime(data['timeStamp'])
#data.info()

Separate 'timeStamp' information into 'Year', 'Month', 'Day' and 'Hour' columns.

In [None]:
data['Year'] = data['timeStamp'].dt.year
data['Month'] = data['timeStamp'].dt.month
data['Day'] = data['timeStamp'].dt.day
data['Hour'] = data['timeStamp'].dt.hour
data['DayOfWeek'] = data['timeStamp'].dt.day_of_week
data['DayOfWeekMpd'] = data['DayOfWeek'].map({0:'Sunday', 1:'Monday',2:'Tuesday',3:'Wednesday',4:'Thursday',5:'Friday',6:'Saturday'})
data

Information in 'title' coulmn is stored in 'Category : subcategory' format. Add separate columns for 'Category' and 'Subcategory'.

In [None]:
print(f"Check first few row of 'title' column to understand its format: {data['title'].loc[0:10] = }")
data['Category'] = data['title'].str.split(":").str.get(0)
data['Subcategory'] = data['title'].str.split(":").str.get(1)
data['Subcategory'] = data['Subcategory'].str.replace('-', '') # drop proceeding '-'
data

Plot number of call per year using cufflinks

In [None]:
cf.set_config_file(theme='ggplot',sharing='public',offline=True)
#data.iplot(x='Year', kind='hist')
data['Year'].iplot( kind='hist')

Plot number of calls per year using plotly.express

In [None]:
dataCountsPerYear = data.value_counts('Year')
#px.bar(dataCountsPerYear, x=dataCountsPerYear.index, y='count')
fig = px.histogram(data, x='Year', color='Category', barmode='group')
fig.show()

Number of calls per month

In [None]:
px.histogram(data, x='Month', color='Category')

Number of call per day

In [None]:
px.histogram(data, x='Day', color='Category')

Number of call per week day

In [None]:
px.histogram(data, x='DayOfWeekMpd', color='Category')

Number of call per hour

In [None]:
px.histogram(data, x='Hour', color='Category', barmode='group')

Number of call in day vs hour

In [None]:
px.density_heatmap(data, x='Hour', y='DayOfWeekMpd', text_auto=True, marginal_x='histogram',  marginal_y='histogram')

Number of calls in months over years

In [None]:
px.density_heatmap(data, x='Month', y='Year', text_auto=True)

In [None]:
data.value_counts('zip')

In [None]:
dataByZip = data.groupby(['zip']).count()['Category'].reset_index()
dataByZip['zip'] = dataByZip['zip'].astype(int).astype(str)
dataByZip

In [None]:
dataByZip.info()

In [None]:
dataByZip.rename(columns={"Category":'Calls'}, inplace=True)
dataByZip

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/refs/heads/master/pa_pennsylvania_zip_codes_geo.min.json') as response:
    pennsylvania_geojson = json.load(response)

plotly.express choropleth() is working, but choropleth_map() is not working.

In [None]:
fig = px.choropleth(
    dataByZip, 
    geojson=pennsylvania_geojson, #geojson='https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/refs/heads/master/pa_pennsylvania_zip_codes_geo.min.json', 
    locations='zip', color='Calls', 
    color_continuous_scale='viridis', 
    featureidkey='properties.ZCTA5CE10', range_color=(0, max(dataByZip['Calls'])))
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
fig = px.choropleth_map(
    dataByZip, 
    geojson=pennsylvania_geojson, 
    color="Calls",                           
    locations="zip", 
    featureidkey="properties.ZCTA5CE10",                           
    center={"lat": 40.2517, "lon": -75.5},                           
    map_style="carto-positron", 
    zoom=8)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
fig = go.Figure(go.Choroplethmap(
    geojson=pennsylvania_geojson, 
    locations=dataByZip['zip'], 
    z=dataByZip['Calls'],                                 
    featureidkey="properties.ZCTA5CE10",   
    colorscale="Viridis", 
    zmin=0, zmax=dataByZip['Calls'].max(),                                    
    marker_opacity=0.5, marker_line_width=0
))
fig.update_layout(map_style="carto-positron",
                  map_zoom=8, map_center = {"lat": 40.2517, "lon": -75.5})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()