In [80]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import requests
import time
from time import sleep

In [81]:
air_df = pd.read_csv('./data/purpleair.csv')
air_df.head(1)

Unnamed: 0,id,lat,lon,name,location_type,pm_2.5,temp_f,temp_c,humidity,pressure,...,flagged,downgraded,age,is_owner,10min_avg,30min_avg,1hour_avg,6hour_avg,1day_avg,1week_avg
0,25999,30.053808,-95.494643,Villages of Bridgestone AQI,outside,1.0,50.0,10.0,62.0,1022.37,...,False,False,0,False,2.47,4.19,8.1,19.66,17.15,15.22


In [94]:
# Function to pull 2010 Census FIPs code from latitude & longitude 

def lat_lon_query(lat, lon, cen_yr):

    url_fips = f'https://geo.fcc.gov/api/census/block/find?latitude={lat}&longitude={lon}&censusYear={cen_yr}&showall=true&format=json'
    response = requests.get(url_fips)

    if response.status_code == 200:
        aq_fips = response.json()
        aq_fips_code = aq_fips['Block']['FIPS']
        # print(aq_fips_code)
    else:
        print('Error!')

    return aq_fips_code

# API: https://geo.fcc.gov/api/census/#!/block/get_block_find

In [95]:
# Pulling FIPs codes for the observations in our purpleair data set, using the above function

fips = air_df.apply(lambda row: lat_lon_query(row['lat'], row['lon'], 2010), axis=1)

In [96]:
# Saving FIPs codes to new column in a dataframe
air_df['fips_id'] = fips

In [97]:
# Saving the updated dataset with the fips_id column to a new CSV
air_df.to_csv('./data/purple_air_fips.csv')

In [104]:
# Pulling the new CSV into a new dataframe
pa_fips_df = pd.read_csv('./data/purple_air_fips.csv')
pa_fips_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14163 entries, 0 to 14162
Data columns (total 25 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     14163 non-null  int64  
 1   id             14163 non-null  int64  
 2   lat            14163 non-null  float64
 3   lon            14163 non-null  float64
 4   name           14163 non-null  object 
 5   location_type  14135 non-null  object 
 6   pm_2.5         14163 non-null  float64
 7   temp_f         14163 non-null  float64
 8   temp_c         14163 non-null  float64
 9   humidity       14163 non-null  float64
 10  pressure       14163 non-null  float64
 11  last_seen      14163 non-null  object 
 12  model          14163 non-null  object 
 13  hidden         14163 non-null  bool   
 14  flagged        14163 non-null  bool   
 15  downgraded     14163 non-null  bool   
 16  age            14163 non-null  int64  
 17  is_owner       14163 non-null  bool   
 18  10min_

In [107]:
# Drop null fips_ids
pa_fips_df.dropna(subset=['fips_id'], inplace=True)

In [138]:
# FIPs ID from float to int to string to fill to 15 numbers to first 12 numbers
pa_fips_df['fips_id'] = pa_fips_df['fips_id'].astype(int).astype(str).str.zfill(15).apply(lambda num: num[:12])
pa_fips_df['fips_id'][:5]

0    482015549031
1    530530703082
2    530530703082
3    421298086003
4    060190050003
Name: fips_id, dtype: object

In [139]:
pa_fips_df.head(1)

Unnamed: 0.1,Unnamed: 0,id,lat,lon,name,location_type,pm_2.5,temp_f,temp_c,humidity,...,downgraded,age,is_owner,10min_avg,30min_avg,1hour_avg,6hour_avg,1day_avg,1week_avg,fips_id
0,0,25999,30.053808,-95.494643,Villages of Bridgestone AQI,outside,1.0,50.0,10.0,62.0,...,False,0,False,2.47,4.19,8.1,19.66,17.15,15.22,482015549031


In [154]:
# Looking at the Walkability dataset fo LA...
walk_la_df = pd.read_csv('./LA_data/walkability_LA.csv')
walk_la_df.head(1)

Unnamed: 0.1,Unnamed: 0,OBJECTID,GEOID10,GEOID20,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,CSA,CSA_Name,...,D5DRI,D5DE,D5DEI,D2A_Ranked,D2B_Ranked,D3B_Ranked,D4A_Ranked,NatWalkInd,Shape_Length,Shape_Area
0,31470,31471,60371870000.0,60371870000.0,6,37,187200,2,348.0,"Los Angeles-Long Beach, CA",...,0.309765,0.000133,0.17882,15,7,16,13,13.333333,3796.740538,552006.8969


In [155]:
# FIPs ID from float to int to string to fill to 12 numbers
walk_la_df['GEOID10'] = walk_la_df['GEOID10'].astype(int).astype(str).str.zfill(12)

In [156]:
# Renaming column so we can merge on
walk_la_df.rename(columns={'GEOID10': 'fips_id'}, inplace=True)

In [157]:
walk_la_df.head(1)

Unnamed: 0.1,Unnamed: 0,OBJECTID,fips_id,GEOID20,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,CSA,CSA_Name,...,D5DRI,D5DE,D5DEI,D2A_Ranked,D2B_Ranked,D3B_Ranked,D4A_Ranked,NatWalkInd,Shape_Length,Shape_Area
0,31470,31471,60371872002,60371870000.0,6,37,187200,2,348.0,"Los Angeles-Long Beach, CA",...,0.309765,0.000133,0.17882,15,7,16,13,13.333333,3796.740538,552006.8969


In [160]:
air_walk_la = pd.merge(pa_fips_df, walk_la_df, on='fips_id')
air_walk_la.shape

(612, 142)

In [162]:
air_walk_la['fips_id'].value_counts().count()

496

In [163]:
air_walk_la.to_csv('./LA_data/air_walk_merged_LA.csv')