In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import json

from shapely.geometry import shape, Point
from shapely.ops import unary_union

%matplotlib inline
pd.set_option("display.max.columns", None)

## TODOs:
- Retitle columns to be useful
- Find more complete census tract geojson
- Explore census tracts without sensors

In [2]:
# Returns dataframe of active purpleair sensors contained within LA County
def los_angeles_county_sensors(sensors_df):
    # Get LA County geometries in JSON form
    url = 'https://opendata.arcgis.com/datasets/10f1e37c065347e693cf4e8ee753c09b_15.geojson'
    shapes = requests.get(url).json()
    
    la_county_indeces = []
    
    # Union of individual LA County shapes. Speeds up check if sensor is in LAC.
    polygons = [shape(feature['geometry']) for feature in shapes['features']]
    lac_shape = unary_union(polygons)
    
    # Iterate through purpleair sensors
    for index, row in sensors_df.iterrows():
        # Build Point object from sensor's Lon and Lat (in that order!) values
        point = Point(row.Lon, row.Lat)
        
        # Check if LA Count polygon contains point
        if lac_shape.contains(point):
            la_county_indeces.append(index)
                
    return sensors_df.loc[la_county_indeces]

In [3]:
# Use 'experimental' data from purpleair
url = 'https://www.purpleair.com/data.json'
data = requests.get(url).json()

In [4]:
sensors_df = pd.DataFrame(data['data'], columns=data['fields'])
sensors_df = sensors_df.set_index('ID')
sensors_df

Unnamed: 0_level_0,pm,age,pm_0,pm_1,pm_2,pm_3,pm_4,pm_5,pm_6,conf,pm1,pm_10,p1,p2,p3,p4,p5,p6,Humidity,Temperature,Pressure,Elevation,Type,Label,Lat,Lon,Icon,isOwner,Flags,Voc,Ozone1,Adc,CH
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
20,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,1413.0,0,Oakdale,40.603077,-111.836120,0,0,0,,,0.01,1
47,,2,,,,,,,,96,,,,,,,,,,,,,0,OZONE TEST,40.476204,-111.882600,0,0,0,,1.54,1.54,0
53,7.2,1,7.2,7.3,6.2,4.7,6.4,8.7,10.3,97,5.8,9.0,1835.6,374.9,37.8,4.7,1.9,0.6,,,,,0,Lakeshore,40.246742,-111.704800,0,0,0,,,0.00,1
72,30.2,308,30.2,28.8,30.4,32.1,33.4,34.0,36.1,97,30.9,36.5,17625.6,2091.3,40.7,7.8,5.2,3.6,79.0,69.0,884.24,,0,90.9fm KRCL,40.770190,-111.947075,0,0,0,,,0.05,1
74,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0,Wasatch Commons,40.738262,-111.936250,0,0,0,,,0.05,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72251,0.6,1,0.6,1.1,1.2,1.4,3.8,10.3,3.3,100,0.2,0.9,224.3,65.1,10.7,0.9,0.3,0.0,10.0,84.0,868.26,1391.0,0,Clio | Mohawk Meadows,39.721348,-120.566940,0,0,0,,,0.05,3
72281,0.6,0,0.6,0.7,0.8,1.0,5.8,3.6,0.7,100,0.3,0.9,214.5,62.3,5.4,0.4,0.2,0.2,29.0,79.0,1004.13,197.0,0,BadgerRoad,46.177708,-119.319600,0,0,0,,,0.03,3
72337,32.7,0,32.7,28.5,41.9,48.4,19.3,5.6,0.8,100,20.3,40.8,3615.4,1078.1,263.9,29.0,6.3,2.9,36.0,92.0,996.03,138.0,0,Aliso Woods Canyon,33.541534,-117.723250,0,0,0,,,0.01,3
72559,20.2,1,20.2,22.8,27.4,32.1,33.0,15.8,2.8,100,14.3,21.3,2729.9,769.9,131.3,6.9,1.2,0.5,15.0,103.0,956.87,501.0,0,Chez nous,34.205980,-118.142310,0,0,0,,,0.01,3


In [5]:
# Get purpleair sensors in LA County
la_county_sensors_df = los_angeles_county_sensors(sensors_df)

### Data descriptors
##### Determined
- pm = current PM2.5 reading
- pm1 = raw PM1 reading
- pm_10 = raw PM10 reading
- pm_0 = current PM2.5 reading
- pm_1 = 10 minute PM2.5 average
- pm_2 = 30 minute PM2.5 average
- pm_3 = 1 hour PM2.5 average
- pm_4 = 6 hour PM2.5 average
- pm_5 = 24 hour PM2.5 average
- pm_6 = One week PM2.5 average
- p1 = Particles >= 0.3 µm
- p2 = Particles >= 0.5 µm
- p3 = Particles >= 1.0 µm
- p4 = Particles >= 2.5 µm
- p5 = Particles >= 5.0 µm
- p6 = Particles >= 10.0 µm

##### Undetermined
- age
- conf
- Icon
- isOwner
- Flags
- Voc
- Ozone1
- Adc
- CH

In [78]:
url = 'https://opendata.arcgis.com/datasets/152f90d3a34a43ef998448281505d45e_0.geojson'
shapes = requests.get(url).json()

In [81]:
# Iterate through purpleair sensors
indeces_and_census = {}
for index, row in la_county_sensors_df.iterrows():
    # Build Point object from sensor's Lon and Lat (in that order!) values
    point = Point(row.Lon, row.Lat)
    
    for feature in shapes['features']:
        tract_polygon = shape(feature['geometry'])
        if tract_polygon.contains(point):
            indeces_and_census[index] = feature['properties']['FIPS']
        

In [82]:
indeces_and_census

{407: '06037650401',
 417: '06037297300',
 489: '06037294110',
 565: '06037297601',
 567: '06037430724',
 573: '06037650102',
 583: '06037185320',
 1128: '06037199800',
 1266: '06037621102',
 1507: '06037197200',
 1517: '06037212204',
 1648: '06037401704',
 1650: '06037433101',
 1656: '06037408401',
 1680: '06037404901',
 1684: '06037480903',
 1686: '06037460800',
 1748: '06037271600',
 1760: '06037271702',
 1816: '06037577200',
 1846: '06037189902',
 1848: '06037430701',
 1852: '06037408133',
 1858: '06037276400',
 1876: '06037702300',
 1956: '06037650901',
 1970: '06037272202',
 1974: '06037297602',
 1976: '06037297500',
 2014: '06037463601',
 2259: '06037464200',
 2263: '06037464100',
 2297: '06037271600',
 2309: '06037271702',
 2315: '06037271600',
 2352: '06037575902',
 2362: '06037199000',
 2404: '06037271702',
 2414: '06037271600',
 2440: '06037920338',
 2452: '06037432602',
 2462: '06037464100',
 2464: '06037534802',
 2496: '06037432202',
 2504: '06037433200',
 2516: '060374305

In [102]:
# A few Northern County census tracts are missing from this census tract geojson
# TODO: find another geojson or handle nan census tract rows
rows_missing_tracts = [i for i in la_county_sensors_df.index if i not in indeces_and_census.keys()]
rows_missing_tracts

[8290, 8306, 12132, 12136, 17771, 25413, 31869, 31889, 63301]

In [57]:
# Add corresponding census tract to df
la_county_sensors_df['Census Tract'] = la_county_sensors_df.index.map(indeces_and_census)
la_county_sensors_df