In [1]:
# Imports and all that good stuff

import pandas as pd
import numpy as np
import matplotlib as plt
import plotly.plotly as py
from plotly.graph_objs import *
from geopy.geocoders import Nominatim

%matplotlib inline
from matplotlib import style
style.use('seaborn-white')

In [2]:
# Function to handle strange zipcodes

def zipcode(num):
    string_num = str(num)
    num = int(float(string_num[0:5]))
    return num

In [3]:
# Function to grab geocoded data of schools

def geo(col):
    df_ = []
    for val in col:
        try:
            df_.append((float(val['latitude']),float(val['longitude'])))
        except KeyError:
            df_.append(np.nan)
            pass
    return df_

In [4]:
# Function to split tuple (Latitude, Logitude) into separate columns

def splitLatLon(series):
    Lat = []
    Lon = []
    df_ = pd.DataFrame()
    for val in series:
        Lat.append(val[0])
        Lon.append(val[1])
    df_['Latitude'] = Lat
    df_['Longitude'] = Lon
    return df_

In [5]:
# Function to handle strange format of toxic geolocation data 

def toxicLatLon(data):
    df_ = []
    for val in data:
        val = str(val)
        if val == 'nan':
            pass
        else:
            df_.append(eval(val[6:].replace(' ',',')))
    return df_

In [6]:
# Function to switch toxic data Latiitude and Longitude Columns
# It comes in switched

def switchLatLon(series):
    df = splitLatLon(series)
    Lat = df['Longitude']
    Lon = df['Latitude']
    df = pd.DataFrame({'Latitude': Lat, 'Longitude': Lon})
    return df

In [7]:
toxic_data_url = "https://data.delaware.gov/resource/u2vy-dniy.csv"
columns = ['geocoded_facility_location','geocoded_facility_location_address','geocoded_facility_location_city','geocoded_facility_location_state','geocoded_facility_location_zip']
toxic_data = pd.read_csv(toxic_data_url, usecols=columns)

In [8]:
school_data_url = "https://data.delaware.gov/resource/tdwa-q9bt.json"
all_columns = pd.read_json(school_data_url).columns.values.tolist() # Get all column names as a list
wanted_columns = ['schoolname', 'schooltype','geocoded_location','street1','street2','city','state','zip'] 
school_data = pd.read_json(school_data_url)
[all_columns.remove(item) for item in wanted_columns] # remove wanted columns from all column list
                                                      # all column list can now be dropped from school_data using pd.drop()
school_data.drop(all_columns, axis=1,inplace=True)   

In [9]:
toxic_data['geocoded_facility_location_zip'].fillna(value=0,inplace=True) # I was getting errors with the zipcode functio
                                                                          # error raise about nan values, even though
                                                                          # I could not find those values. nan's replaced
                                                                          # by 0.
 # apply zipcode function to zipcode column
toxic_data['geocoded_facility_location_zip'] = toxic_data['geocoded_facility_location_zip'].apply(zipcode)           

In [10]:
school_data['zip'] = school_data['zip'].apply(zipcode)

In [11]:
schools_geocode = geo(school_data['geocoded_location']) 

In [12]:
final_school_data = school_data[['schoolname']].copy()
final_school_data['LatLon'] = schools_geocode

In [13]:
final_school_data.dropna(inplace=True)

In [14]:
dummy_data = splitLatLon(final_school_data['LatLon'])

In [15]:
final_school_data.reset_index(drop=True, inplace=True)
dummy_data.reset_index(drop=True, inplace=True)

In [16]:
final_school_data = pd.concat([final_school_data, dummy_data], axis=1, ignore_index=True)
final_school_data.rename(index=str, columns={0:'School Name',1:'LatLon', 2:'Latitude', 3:'Longitude'}, inplace=True)

In [17]:
toxic_data = toxic_data['geocoded_facility_location'].dropna()

In [18]:
final_toxic_data = toxicLatLon(toxic_data)

In [19]:
final_toxic_data = switchLatLon(final_toxic_data)

In [20]:
mapbox_access_token = 'pk.eyJ1Ijoic2V5ZWFkZWthbnllIiwiYSI6ImNqYjB3djM0azF4dXgzM285bWJneHA4eHYifQ.v5rp_rvbjoLRCWz_I3Zv0Q'


data = Data([
    Scattermapbox(
        lat=list(final_school_data['Latitude']),
        lon=list(final_school_data['Longitude']),
        mode='markers',
        marker=Marker(
            size=3,
            color='rgb(0, 255, 0)',
            opacity=1
        ),
        text=list(final_school_data['School Name']),
        hoverinfo='text'
    ),
    Scattermapbox(
        lat=list(final_toxic_data['Latitude']),
        lon=list(final_toxic_data['Longitude']),
        mode='markers',
        marker=Marker(
            size=6,
            color='rgb(255, 0, 0)',
            opacity=0.5
        )
    )
     ])
        
layout = Layout(
    title='Delaware Schools',
    autosize=True,
    hovermode='closest',
    showlegend=False,
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=90,
        center=dict(
            lat=int(final_school_data['Latitude'].mean()),
            lon=int(final_school_data['Longitude'].mean())
        ),
        pitch=0,
        zoom=7,
        style='light'
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Delaware Schools 2')