In [None]:
import requests
import pandas as pd
import geopandas as gpd
import os
import time
import json

# Here we store our API read key in a string variable that we can reference later.
my_api_read_key = os.environ['PURPLEAIR_KEY']

In [2]:
# This function will be used to gather PurpleAir sensors within a given bounding box.
def getSensors(sensor_index='all',my_fields='',other_params={}):
    
    # my_url is assigned the URL we are going to send our request to.
    if sensor_index != 'all':
        my_url = 'https://api.purpleair.com/v1/sensors/' + str(sensor_index) + 'history'
    else:
        my_url = 'https://api.purpleair.com/v1/sensors'

    # my_headers is assigned the context of our request we want to make. In this case
    # we will pass through our API read key using the variable created above.
    my_headers = {'X-API-Key':my_api_read_key}

    # my_params is assigned a list of fields of data we are requesting. Excluding the
    # fields parameter will collect all available fields.
    # example input structure: 'temperature,pm2.5_atm'
    field_param = {'fields':my_fields}
    
    #other_params should be a dict, see the following for options:
    # https://api.purpleair.com/#api-sensors-get-sensor-history
    my_params = {**field_param,**other_params}

    # This line creates and sends the request and then assigns its response to the
    # variable, r.
    r = requests.get(my_url, headers=my_headers, params=my_params)

    # We then return the response we received.
    return r

In [3]:
#need to get a bunch of sensor IDs based on bounding box
#https://community.purpleair.com/t/finding-large-amounts-of-sensor-indexes-bounding-box-api-call/5688
#The corners used MUST be the northwest and southeast points of the bounding box
#this is an inexact houston metro bounding box
houmetro_bbox = [[30.818865037486454, -96.7550267322917],[28.579254914574868, -93.89767804917469]]

houmetro_params = {'nwlat':houmetro_bbox[0][0],
                   'nwlng':houmetro_bbox[0][1],
                   'selat':houmetro_bbox[1][0],
                   'selng':houmetro_bbox[1][1],
                   'max_age':2628000, #30 days
                   }
sensor_fields = 'name,location_type,latitude,longitude,date_created,altitude,position_rating'
houmetro_bbox_sensors = getSensors(sensor_index='all',my_fields=sensor_fields,other_params=houmetro_params)

In [None]:
houmetro_bbox_sensors.json()

In [None]:
#turn this into a df so i can export and map and make sure it seems to include all Houston metro
houmetro_bbox_sensors_df = pd.DataFrame(houmetro_bbox_sensors.json()['data'], columns=houmetro_bbox_sensors.json()['fields'])
houmetro_bbox_sensors_df['date'] = pd.to_datetime(houmetro_bbox_sensors_df['date_created'],unit='s')
houmetro_bbox_sensors_df.to_csv('../GIS/purpleair/houmetro_bbox_sensors.csv',index=False)
houmetro_bbox_points = gpd.GeoDataFrame(houmetro_bbox_sensors_df, 
                                        geometry=gpd.points_from_xy(houmetro_bbox_sensors_df.longitude,
                                                                    houmetro_bbox_sensors_df.latitude))

#ok looks good but is actually too inclusive, so i'm going to filter to just Houston Metro
houmetro_cntys = ['48157','48407','48473','48167','48201','48291','48071','48039','48015','48339']
cnty_shp = gpd.read_file('https://www2.census.gov/geo/tiger/TIGER2024/COUNTY/tl_2024_us_county.zip')
houmetro_shp = cnty_shp.loc[cnty_shp['GEOID'].isin(houmetro_cntys)]
houmetro_shp.rename(columns={'NAME':'cnty_nm','GEOID':'cnty_fips'},inplace=True)

#print(houmetro_shp.crs)
houmetro_bbox_points.crs = {'init': 'epsg:4269'}

houmetro_sensors = gpd.sjoin(houmetro_bbox_points, houmetro_shp[['cnty_fips','cnty_nm','geometry']], predicate = 'within')

houmetro_sensors.drop(columns=['geometry','index_right'],inplace=True)

#export all of them
houmetro_sensors.to_csv('../GIS/purpleair/houmetro-pa-sensors.csv')

#export ones that are at least 3 years old
atleast_3yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2022-01-01')]
atleast_3yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_3yr.csv')

#export ones that are at least 2 years old
atleast_2yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2023-01-01')]
atleast_2yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_2yr.csv')

#export ones that are at least 1 years old
atleast_1yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2024-01-01')]
atleast_1yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_1yr.csv')

In [None]:
print(len(houmetro_bbox_sensors_df))
print(len(houmetro_sensors))
print(len(atleast_3yr))
print(len(atleast_2yr))
print(len(atleast_1yr))

In [None]:
houmetro_sensors.head()