In [54]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from time import time
from IPython.display import display # Allows the use of display() for dataframes

# Import the packages for geospatial visualization
import folium # Pachage to visualize maps
from geopy.geocoders import Nominatim # convert an address into latitude and longitude
from folium import plugins
import geopandas as gpd

# Import supplementary visualization code visuals.py
import visuals as vs

# Pretty display for notebooks
%matplotlib inline

In [55]:
# Load the incidents data
incidents=pd.read_csv('incidents.csv', header=0)
print(incidents.columns)
print(incidents.shape)

Index(['ID', 'Latitude_Decimal', 'Longitude_Decimal', 'Postal_Code',
       'Response_Date', 'Problem', 'assigned_station'],
      dtype='object')
(420031, 7)


In [56]:
# Load the location data for all 16 weather stations near San Diego
weather_station_location=pd.read_csv('weather_stations.csv', header=0)
print(weather_station_location.columns)
print(weather_station_location.shape)

Index(['stations', 'abbreviations', 'latitude', 'longitude'], dtype='object')
(16, 4)


In [57]:
# Find the geographical coordinates of San Diego to use as a starting point for the folium map
address = 'San Diego, CA'

geolocator = Nominatim(user_agent="san_diego_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of San Diego are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of San Diego are 32.7174209, -117.1627714.


## 2.1. Locations of all Weather Stations and Incidents

In [58]:
# Show the locations of all weather stations
sd_map = folium.Map(location=[latitude, longitude], height=900, zoom_start=8)

for i, (lat, lon) in enumerate(weather_station_location[['latitude','longitude']].values):
    folium.Marker([lat, lon], popup=weather_station_location[['stations','abbreviations']].iloc[i].values,icon=folium.Icon(color='blue'),
                  ).add_to(sd_map)
sd_map.save('1-weather_stations.html')    

In [91]:
# Show the locations of all incidents and weather stations together
sd_map = folium.Map(location=[latitude, longitude], height=900, zoom_start=8)
tooltip = 'Click me!'

for i, (lat, lon) in enumerate(weather_station_location[['latitude','longitude']].values):
    folium.Marker([lat, lon], popup=weather_station_location[['stations','abbreviations']].iloc[i].values, icon=folium.Icon(color='blue'),
                  tooltip=tooltip).add_to(sd_map)
for lat, lon in incidents[['Latitude_Decimal','Longitude_Decimal']].values:
    folium.CircleMarker([lat, lon],radius=1,color='darkblue').add_to(sd_map)
sd_map.save('2-incidents_stations.html')

## 2.2. Assignment of All Incidents to the Nearest Weather Station

In [94]:
# Show the data for all weather stations which are assigned to at least one incident
relevant_stations=weather_station_location.iloc[incidents['assigned_station'].value_counts().index]
relevant_stations

Unnamed: 0,stations,abbreviations,latitude,longitude
12,san_diego_lindbergh_field,KSAN,32.73361,-117.18306
14,san_diego_montgomery_field,KMYF,32.81444,-117.13639
13,san_diego_brown_field,KSDM,32.57528,-116.99306
11,ramona,KRNM,33.0375,-116.91583
0,carlsbad,KCRQ,33.1268,-117.27583
1,campo,KCZZ,32.62611,-116.46833
15,thermal_airport,KTRM,33.62667,-116.15944


In [None]:
# Show incidents and their assigned weather stations 
sd_map = folium.Map(location=[latitude, longitude], height=900, zoom_start=9)
colormap=['lightgreen','purple','darkblue','gray','darkgreen','black','darkred']
for i, (lat, lon) in enumerate(relevant_stations[['latitude','longitude']].values):
    folium.Marker([lat, lon], icon=folium.Icon(color=colormap[i])).add_to(sd_map)
    for lat, lon in incidents[['Latitude_Decimal','Longitude_Decimal']][incidents['assigned_station']==relevant_stations.index[i]].values:
        folium.CircleMarker([lat, lon],radius=1,color=colormap[i]).add_to(sd_map)
sd_map.save('3-assigned_stations.html')

## 2.3. Remove the Incidents That Are Not in the Area of Interest

In [60]:
# Check which weather stations are most frequently assigned to incidents
incidents['assigned_station'].value_counts()

12    217975
14    142253
13     43462
11     14082
0       2247
1          9
15         3
Name: assigned_station, dtype: int64

In [61]:
# Remove the incidents assigned to weather stations 1 and 15 which correpond to very few incidents far from other incidents
# Put the remaining incident data into a new dataframe 'incidents_sd'
incidents_sd=incidents[(incidents['assigned_station']!=relevant_stations.index[-2])&(incidents['assigned_station']!=relevant_stations.index[-1])]
incidents_sd.reset_index(drop=True,inplace=True)
print(incidents_sd.shape)
print(incidents_sd['assigned_station'].value_counts())

(420019, 7)
12    217975
14    142253
13     43462
11     14082
0       2247
Name: assigned_station, dtype: int64


In [62]:
# Save the new dataframe with all relevant incidents which will be used in the project into a csv file
incidents_sd.to_csv('incidents_sd.csv', index=False)

In [96]:
# Divide the dataframe into incidents assigned to each of the 5 weather stations,

# Dataframe for the incidents assigned to san_diego_lindbergh_field weather station
incidents_sdlf=incidents_sd[incidents_sd['assigned_station']==12]
incidents_sdlf.to_csv('incidents_sdlf.csv', index=False)

# Dataframe for the incidents assigned to san_diego_montgomery_field weather station
incidents_sdmf=incidents_sd[incidents_sd['assigned_station']==14]
incidents_sdmf.to_csv('incidents_sdmf.csv', index=False)

# Dataframe for the incidents assigned to san_diego_brown_field weather station
incidents_sdbf=incidents_sd[incidents_sd['assigned_station']==13]
incidents_sdbf.to_csv('incidents_sdbf.csv', index=False)

# Dataframe for the incidents assigned to Ramona weather station
incidents_ramona=incidents_sd[incidents_sd['assigned_station']==11]
incidents_ramona.to_csv('incidents_ramona.csv', index=False)

# Dataframe for the incidents assigned to Carlsbad weather station
incidents_carlsbad=incidents_sd[incidents_sd['assigned_station']==0]
incidents_carlsbad.to_csv('incidents_carlsbad.csv', index=False)

In [92]:
# Show the remaining incidents and their assigned weather stations 
sd_map = folium.Map(location=[latitude, longitude], height=900, zoom_start=10)
colormap=['lightgreen','purple','darkblue','gray','darkgreen','black','darkred']
for i, (lat, lon) in enumerate(relevant_stations[['latitude','longitude']][:-2].values):
    folium.Marker([lat, lon], icon=folium.Icon(color=colormap[i])).add_to(sd_map)
    for lat, lon in incidents[['Latitude_Decimal','Longitude_Decimal']][incidents['assigned_station']==relevant_stations.index[i]].values:
        folium.CircleMarker([lat, lon],radius=1,color=colormap[i]).add_to(sd_map)
sd_map.save('4-assigned_stations_sd.html')

In [64]:
# Save the data for the 5 weather stations nearest to the incidents and are used in the project
stations_analyzed=relevant_stations[:-2]
stations_analyzed.to_csv('stations_analyzed.csv')

## 2.4. Heatmap for Incidents by Postal Code and Locations of Weather Stations

In [82]:
incidents_zipcode=incidents_sd.groupby('Postal_Code',as_index=False)[['ID']].count()
incidents_zipcode['Postal_Code']=incidents_zipcode['Postal_Code'].astype('int64')
incidents_zipcode['ID'].sum()

420019

In [83]:
# Read the zip code shape file (Maintain by LA Times in 2012)
fname = "./zipcode_boundary.geojson"
zipcode_shape = gpd.read_file(fname)
zipcode_shape['external_id']=zipcode_shape['external_id'].astype('int64')
zipcode_shape['external_id'].shape

(107,)

In [84]:
# Filter out the zip codes not in the shape file
# May due to oudated shape file or errors in OpenStreetMap's geocoding service
# 92018 -> Carlsbad, 92039 -> La Jolla, 92093 -> UCSD, 92133 -> ??, 92136 -> SD Naval Base, 92182 -> SDSU
missing_set = set(incidents_zipcode['Postal_Code']) - set(zipcode_shape['external_id'])
#incidents_zipcode[incidents_zipcode['Postal_Code'].isin(missing_set)]
missing_set

{92093, 92136, 92161, 92179, 92182}

In [85]:
# Add the number of incidents at UCSD with zipcode 92093 and 92161 to the ones in La Jolla with the zipcode 92037
incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92037].iloc[0]=incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92037].iloc[0]+incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92093].iloc[0]+incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92161].iloc[0]
# Do not show the single incident in zipcode 92179 near Otay Mesa on the heatmap
# Add the number of incidents in 92136 near San Diego Naval Base to zipcode 92113 near Barrio Logan
incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92113].iloc[0]=incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92113].iloc[0]+incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92136].iloc[0]
# Add the number of incidents in 92182 near SSDSU to zipcode 92115 near Mid-City
incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92115].iloc[0]=incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92115].iloc[0]+incidents_zipcode['ID'][incidents_zipcode['Postal_Code']==92182].iloc[0]

In [90]:
zipcode_shape = zipcode_shape[zipcode_shape['external_id'].isin(set(incidents_zipcode['Postal_Code']))]

# Base map
m = folium.Map(width=600,height=600,location=[latitude, longitude], zoom_start=9)

# Map No. incidents by zip codes to shape with colors
zip_geo = r'zipcode_boundary.geojson'
m.choropleth(
    geo_data=zipcode_shape,
    name='choropleth',
    data=incidents_zipcode,
    columns=['Postal_Code', 'ID'],
    key_on='properties.external_id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Incidents in San Diego from Sep 1st, 2016 to Aug 31st, 2019',
    threshold_scale=[0, 10000, 20000,30000,40000,50000,60000,70000]
)
folium.LayerControl().add_to(m);
for i, (lat, lon) in enumerate(relevant_stations[['latitude','longitude']][:-2].values):
    folium.Marker([lat, lon], icon=folium.Icon(color='darkblue')).add_to(m)
m

In [93]:
# Save the heatmap into an html file
m.save('5-heatmap_incidents_stations.html')