In [1]:
import requests
import pandas as pd
import geopandas as gpd
import os
import time
import json

# Here we store our API read key in a string variable that we can reference later.
my_api_read_key = os.environ['PURPLEAIR_KEY']



In [2]:
# This function will be used to gather PurpleAir sensors within a given bounding box.
def getSensors(sensor_index='all',my_fields='',other_params={}):
    
    # my_url is assigned the URL we are going to send our request to.
    if sensor_index != 'all':
        my_url = 'https://api.purpleair.com/v1/sensors/' + str(sensor_index) + 'history'
    else:
        my_url = 'https://api.purpleair.com/v1/sensors'

    # my_headers is assigned the context of our request we want to make. In this case
    # we will pass through our API read key using the variable created above.
    my_headers = {'X-API-Key':my_api_read_key}

    # my_params is assigned a list of fields of data we are requesting. Excluding the
    # fields parameter will collect all available fields.
    # example input structure: 'temperature,pm2.5_atm'
    field_param = {'fields':my_fields}
    
    #other_params should be a dict, see the following for options:
    # https://api.purpleair.com/#api-sensors-get-sensor-history
    my_params = {**field_param,**other_params}

    # This line creates and sends the request and then assigns its response to the
    # variable, r.
    r = requests.get(my_url, headers=my_headers, params=my_params)

    # We then return the response we received.
    return r

In [3]:
#need to get a bunch of sensor IDs based on bounding box
#https://community.purpleair.com/t/finding-large-amounts-of-sensor-indexes-bounding-box-api-call/5688
#The corners used MUST be the northwest and southeast points of the bounding box
#this is an inexact houston metro bounding box
houmetro_bbox = [[30.818865037486454, -96.7550267322917],[28.579254914574868, -93.89767804917469]]

houmetro_params = {'nwlat':houmetro_bbox[0][0],
                   'nwlng':houmetro_bbox[0][1],
                   'selat':houmetro_bbox[1][0],
                   'selng':houmetro_bbox[1][1],
                   'max_age':2628000, #30 days
                   }

houmetro_bbox_sensors = getSensors(sensor_index='all',my_fields='latitude,longitude,date_created',other_params=houmetro_params)

In [4]:
houmetro_bbox_sensors.json()

{'api_version': 'V1.0.14-0.0.58',
 'time_stamp': 1735939999,
 'data_time_stamp': 1735939982,
 'max_age': 2628000,
 'firmware_default_version': '7.02',
 'fields': ['sensor_index', 'date_created', 'latitude', 'longitude'],
 'data': [[2386, 1501532274, 29.532282, -95.07647],
  [3033, 1503509417, 29.955063, -95.73739],
  [3298, 1505341873, 29.58335, -95.12009],
  [3396, 1505411639, 29.571447, -95.12401],
  [3777, 1507317354, 29.762896, -95.70764],
  [4301, 1510798410, 29.853832, -96.52854],
  [6752, 1517342572, 29.71706, -95.30876],
  [25999, 1549304400, 30.053808, -95.494644],
  [27009, 1550602753, 29.73305, -95.585686],
  [27821, 1551480877, 29.77074, -95.370056],
  [27905, 1551713013, 29.790651, -95.113846],
  [28851, 1552580564, 29.73305, -95.585686],
  [30593, 1555513357, 29.557081, -95.74033],
  [31163, 1556727613, 29.78264, -95.37274],
  [34399, 1562872551, 29.762451, -95.46637],
  [34431, 1562872683, 29.762514, -95.46598],
  [46237, 1579132136, 29.70283, -95.443855],
  [47879, 1580

In [None]:
#turn this into a df so i can export and map and make sure it seems to include all Houston metro
houmetro_bbox_sensors_df = pd.DataFrame(houmetro_bbox_sensors.json()['data'], columns=houmetro_bbox_sensors.json()['fields'])
houmetro_bbox_sensors_df['date'] = pd.to_datetime(houmetro_bbox_sensors_df['date_created'],unit='s')
houmetro_bbox_sensors_df.to_csv('../GIS/purpleair/houmetro_bbox_sensors.csv',index=False)
houmetro_bbox_points = gpd.GeoDataFrame(houmetro_bbox_sensors_df, 
                                        geometry=gpd.points_from_xy(houmetro_bbox_sensors_df.longitude,
                                                                    houmetro_bbox_sensors_df.latitude))

#ok looks good but is actually too inclusive, so i'm going to filter to just Houston Metro
houmetro_cntys = ['48157','48407','48473','48167','48201','48291','48071','48039','48015','48339']
cnty_shp = gpd.read_file('https://www2.census.gov/geo/tiger/TIGER2024/COUNTY/tl_2024_us_county.zip')
houmetro_shp = cnty_shp.loc[cnty_shp['GEOID'].isin(houmetro_cntys)]
houmetro_shp.rename(columns={'NAME':'cnty_nm','GEOID':'cnty_fips'},inplace=True)

#print(houmetro_shp.crs)
houmetro_bbox_points.crs = {'init': 'epsg:4269'}

houmetro_sensors = gpd.sjoin(houmetro_bbox_points, houmetro_shp[['cnty_fips','cnty_nm','geometry']], predicate = 'within')

houmetro_sensors.drop(columns=['geometry','index_right'],inplace=True)

#export all of them
houmetro_sensors.to_csv('../GIS/purpleair/houmetro-pa-sensors.csv')

#export ones that are at least 3 years old
atleast_3yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2022-01-01')]
atleast_3yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_3yr.csv')

#export ones that are at least 2 years old
atleast_2yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2023-01-01')]
atleast_2yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_2yr.csv')

#export ones that are at least 1 years old
atleast_1yr = houmetro_sensors.loc[houmetro_sensors['date']<pd.to_datetime('2024-01-01')]
atleast_1yr.to_csv('../GIS/purpleair/houmetro-pa-sensors-atleast_1yr.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  houmetro_shp.rename(columns={'NAME':'cnty_nm','GEOID':'cnty_fips'},inplace=True)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4269 +type=crs
Right CRS: EPSG:4269

  houmetro_sensors = gpd.sjoin(houmetro_bbox_points, houmetro_shp[['cnty_fips','cnty_nm','geometry']], predicate = 'within')


In [14]:
print(len(houmetro_bbox_sensors_df))
print(len(houmetro_sensors))
print(len(atleast_3yr))
print(len(atleast_2yr))
print(len(atleast_1yr))

103
96
38
59
74


In [11]:
houmetro_sensors.head()

Unnamed: 0,sensor_index,date_created,latitude,longitude,date,cnty_fips,cnty_nm
0,2386,1501532274,29.532282,-95.07647,2017-07-31 20:17:54,48167,Galveston
1,3033,1503509417,29.955063,-95.73739,2017-08-23 17:30:17,48201,Harris
2,3298,1505341873,29.58335,-95.12009,2017-09-13 22:31:13,48201,Harris
3,3396,1505411639,29.571447,-95.12401,2017-09-14 17:53:59,48201,Harris
4,3777,1507317354,29.762896,-95.70764,2017-10-06 19:15:54,48201,Harris
