### Name: Penny Xu

In [1]:
import bqplot 
from bqplot import pyplot as plt
from bqplot import Tooltip
import matplotlib.pyplot as matplt
import numpy as np
import ipywidgets
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame
import contextily as ctx

# Code
## Dataset review

In [2]:
facilities = pd.read_csv('facilities_for_data_store.csv')

In [3]:
geometry = [Point(xy) for xy in zip(facilities['longitude'], facilities['latitude'])]
gdf_facilities = GeoDataFrame(facilities, geometry=geometry)   
gdf_facilities

Unnamed: 0,facilityid,facilitynumber,frsid,latitude,longitude,lloverridden,gridcode,x,y,latlongsource,...,distancetoreach,hem3id,distancetohem3,llconfirmed,waterreleases,modeledreleases,modchromreleases,Unnamed: 69,Unnamed: 70,geometry
0,89414GTCHL28MIN,53967,1.100020e+11,41.217092,-117.243919,0,14,-2164,2741,FRS,...,1482.634295,24128,58455.346210,,True,True,True,,,POINT (-117.24392 41.21709)
1,89414NWMNT35MIL,53969,1.100010e+11,41.253488,-117.167637,0,14,-2155,2744,FRS,...,334.079218,24128,65988.948700,,True,True,True,,,POINT (-117.16764 41.25349)
2,99737PGMNX38MIL,60924,1.100090e+11,64.449722,-144.939623,0,24,536,2024,FRS,...,,26415,63024.887130,,True,True,True,,,POINT (-144.93962 64.44972)
3,89820BTTLMCOPPE,54102,1.100010e+11,40.531164,-117.131496,1,14,-2173,2646,Manual,...,1469.581631,24128,69687.936350,,True,True,True,,,POINT (-117.13150 40.53116)
4,99707FRTKN1FORA,60918,1.100030e+11,64.999530,-147.358107,0,24,386,2082,FRS,...,,26411,31428.669940,,,True,True,,,POINT (-147.35811 64.99953)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2115,25405SSCTD855CR,12911,1.100300e+11,39.389506,-78.024503,0,14,1883,2424,FRS,...,1693.625551,13734,3747.418908,,,True,,,,POINT (-78.02450 39.38951)
2116,7005WFMTSH364PE,44530,1.100030e+11,29.856830,-90.062450,0,14,707,952,FRS,...,2236.936035,53917,20072.676760,,,True,True,,,POINT (-90.06245 29.85683)
2117,90058GRFFT4900G,54397,1.100120e+11,33.996889,-118.190000,0,14,-2489,1791,FRS,...,231.496872,93134,9844.913903,,,True,,,,POINT (-118.19000 33.99689)
2118,36505MSDVT124HW,21743,1.100310e+11,30.947479,-88.024888,0,14,937,1119,FRS,...,419.942828,13838,35983.198590,,,True,True,,,POINT (-88.02489 30.94748)


## Group the dataset by state

In [4]:
chromhex_city = gdf_facilities.groupby('state')['chromhexpercent'].describe()
chromhex_city

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,9.0,0.234444,0.1409,0.03,0.2,0.2,0.34,0.43
AL,82.0,0.255976,0.115019,0.03,0.19,0.28,0.34,0.51
AR,40.0,0.2775,0.085478,0.04,0.2475,0.34,0.34,0.39
AZ,21.0,0.284762,0.096313,0.09,0.18,0.34,0.34,0.43
CA,53.0,0.262642,0.160877,0.03,0.19,0.25,0.34,0.97
CO,12.0,0.285833,0.127455,0.03,0.1875,0.34,0.39,0.43
CT,30.0,0.256,0.101764,0.03,0.21,0.245,0.34,0.39
DE,4.0,0.2825,0.069462,0.2,0.2375,0.295,0.34,0.34
FL,30.0,0.23,0.107703,0.03,0.18,0.18,0.34,0.5
GA,53.0,0.276415,0.106827,0.03,0.25,0.34,0.34,0.5


## Figure 1: bar plot of percent of chromium in all states

In [5]:
# First try:
x_sch = bqplot.OrdinalScale()
y_sch = bqplot.LinearScale()

x_axh = bqplot.Axis(scale=x_sch, label='Value of 3rd Axis')
y_axh = bqplot.Axis(scale=y_sch, label='Percent of Chromium', orientation='vertical')

hist = bqplot.pyplot.bar(chromhex_city.index.tolist(), chromhex_city['mean'], interactions={'click':'select'})

fig1 = bqplot.Figure(marks=[hist], axes=[x_axh, y_axh])
fig1

Figure(axes=[Axis(label='Value of 3rd Axis', scale=OrdinalScale()), Axis(label='Percent of Chromium', orientat…

In [6]:
# Second try (add label for x axis):

up_fig = plt.figure(title='Percent of Chromium in All States')

up_fig.layout.height = '580px'
up_fig.layout.width = '1000px'

bar_chart = plt.bar(chromhex_city.index.tolist(), chromhex_city['mean'].tolist(), interactions={'click':'select'}, selected_style={'fill':'magenta'})

bar_chart.colors = ['pink']

plt.xlabel('State')
plt.ylabel('Percent of Chromium')

up_fig

Figure(axes=[Axis(label='State', scale=OrdinalScale()), Axis(label='Percent of Chromium', orientation='vertica…

## Figure 2: scatter plot of percent of chromium in one slected state

In [7]:
# Select data of Texas as the example
state_data = gdf_facilities.loc[gdf_facilities['state'] == 'TX']
state_data

Unnamed: 0,facilityid,facilitynumber,frsid,latitude,longitude,lloverridden,gridcode,x,y,latlongsource,...,distancetoreach,hem3id,distancetohem3,llconfirmed,waterreleases,modeledreleases,modchromreleases,Unnamed: 69,Unnamed: 70,geometry
10,7759WBLNCH2415T,49993,1.100280e+11,29.379167,-94.929167,0,14,128,866,FRS,...,1877.941870,12923,14356.968230,,True,True,True,,,POINT (-94.92917 29.37917)
88,77656PLPPPPOBOX,50070,1.100010e+11,30.338700,-94.066350,0,14,229,999,FRS,...,85.314938,12917,43481.090810,,True,True,,,,POINT (-94.06635 30.33870)
93,75504NTRNTPOBOX,47893,1.100080e+11,33.264400,-94.069639,0,14,221,1401,FRS,...,142.626865,13977,21845.917610,,True,True,,,,POINT (-94.06964 33.26440)
104,75702LGLRL1702E,48054,1.100000e+11,32.362643,-95.280179,0,14,83,1275,FRS,...,25.200812,13972,11482.702300,,True,True,,,,POINT (-95.28018 32.36264)
105,78403CSTLR1300C,50507,1.100010e+11,27.810775,-97.436658,0,14,-175,652,FRS,...,402.717829,12924,8949.324571,,True,True,True,,,POINT (-97.43666 27.81077)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2040,7748WGDMNM191KE,49610,1.100700e+11,30.054880,-95.857450,0,14,17,957,FRS,...,2580.538648,53910,29311.154940,,True,True,True,,,POINT (-95.85745 30.05488)
2054,79417GLDSPPOBOX,50973,1.100010e+11,33.627290,-101.909860,0,14,-672,1469,FRS,...,1085.171699,23042,8959.724784,,True,True,True,,,POINT (-101.90986 33.62729)
2062,77053CSSPR14211,49275,1.100310e+11,29.602311,-95.426238,0,14,69,896,FRS,...,1863.826998,12918,15002.823690,,,,,,,POINT (-95.42624 29.60231)
2065,77041DNLMS565BR,49150,1.100230e+11,29.849190,-95.570260,0,14,51,929,FRS,...,1404.653028,53910,23721.692510,,True,True,True,,,POINT (-95.57026 29.84919)


In [8]:
# First try: 

x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

x_ax = bqplot.Axis(scale=x_sc,label='Longitude')
y_ax = bqplot.Axis(scale=y_sc, orientation='vertical', label='Latitude')

scatters =bqplot.Scatter(x=state_data['longitude'], y=state_data['latitude'], 
                         scales={'x':x_sc, 'y':y_sc})


fig2 = bqplot.Figure(marks=[scatters], axes=[x_ax,y_ax])
fig2.layout.height = '580px'
fig2.layout.width = '1000px'
fig2

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

In [9]:
# Second try (add colors for scatters based on the percent of chromium): 

fig3 = plt.figure()

x_sc = plt.LinearScale()
y_sc = plt.LinearScale()

x_ax = plt.Axis(scale=x_sc,label='Longitude')
y_ax = plt.Axis(scale=y_sc, orientation='vertical', label='Latitude')


scatters =plt.scatter(x=state_data['longitude'], y=state_data['latitude'], 
                         scales={'x':x_sc, 'y':y_sc}, color=state_data['chromhexpercent'], cmap='RdPu')

fig3 = plt.Figure(marks=[scatters], axes=[x_ax,y_ax], title='Percent of Chromium in One Slected State')

fig3.layout.height = '580px'
fig3.layout.width = '1000px'
fig3

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

In [10]:
# Third try (add tooltip on scatters): 

out = ipywidgets.Output()

def hover_handler(self, content):
        out.clear_output()
        with out:
            print('Percent of chromium:', state_data.iloc[content['data']['index']] ['chromhexpercent'])

In [11]:
down_fig = plt.figure()

x_sc = plt.LinearScale()
y_sc = plt.LinearScale()

x_ax = plt.Axis(scale=x_sc, label='Longitude')
y_ax = plt.Axis(scale=y_sc, orientation='vertical', label='Latitude')

scatters =plt.scatter(x=state_data['longitude'], y=state_data['latitude'], 
                         scales={'x':x_sc, 'y':y_sc}, color=state_data['chromhexpercent'], cmap='RdPu', tooltip=out)
scatters.on_hover(hover_handler)

down_fig = plt.Figure(marks=[scatters], axes=[x_ax,y_ax], title='Percent of Chromium in One Slected State')
down_fig.layout.height = '580px'
down_fig.layout.width = '1000px'
down_fig

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

### Now put the cursor on one scatter, the percent of chromium will show

## Create dashboard

In [12]:
def on_selected(change):
    state=''
    if change['owner'].selected != None:
        if len(change['owner'].selected) == 1: 
            state_index = change['owner'].selected[0]
            state = chromhex_city.index.tolist()[state_index]
    
    state_data = gdf_facilities.loc[gdf_facilities['state'] == state]
    scatters.x=state_data['longitude']
    scatters.y=state_data['latitude']
    

In [13]:
bar_chart.observe(on_selected, 'selected')
myLabel = ipywidgets.Label()

In [14]:
figures = ipywidgets.VBox([up_fig,down_fig])
myDashboard = ipywidgets.VBox([figures, myLabel])
myDashboard

VBox(children=(VBox(children=(Figure(axes=[Axis(label='State', scale=OrdinalScale()), Axis(label='Percent of C…

# Prose
## How to use the dashboard: the dashboard shows the percents of Chromium in multiple dimesions, consisting of a bar plot and a scatter plot. Chromium is toxic, and higher percent means more toxicity in emission. The bar plot displays the average percents of Chromium in all US states. The scatter plot displays the percents of Chromium in all locations by latitude and longitude. The higher the percent is, the darker the point would be. It is interactive by putting the cursor on a point, and the specific percent would be shown. The two plots are linked, and the scatter plot of a state will be displayed if clicking the bar of the state in the bar plot.
## Contextual dataset: AirNow Air Quality Monitoring Site Data (https://livingatlas-dcdev.opendata.arcgis.com/datasets/EPA::airnow-air-quality-monitoring-site-data-current/explore?location=35.365521%2C-95.690500%2C4.43). It is the air quality data received from monitoring sites that report to AirNow. It contains a wider range of data pollution information with the same dimension as the first dataset: latitude, longitude, and state. Then we can get more supplementary information provided by the contextual dataset, to support the conclusions or make some comparisons.