In [1]:
import pandas as pd

# AQI .csv file

In [34]:
df = pd.read_csv('daily_aqi_by_county_2023.csv')
print(df.columns)
df['State Name'].drop_duplicates()
df['Category'].drop_duplicates()
# All states plus DC and Mexico included
# Note that 2023 files are only updated to around October

Index(['State Name', 'county Name', 'State Code', 'County Code', 'Date', 'AQI',
       'Category', 'Defining Parameter', 'Defining Site',
       'Number of Sites Reporting'],
      dtype='object')


0                                 Good
46                            Moderate
482     Unhealthy for Sensitive Groups
1004                         Unhealthy
1005                    Very Unhealthy
6794                         Hazardous
Name: Category, dtype: object

In [21]:
df.head()

Unnamed: 0,State Name,county Name,State Code,County Code,Date,AQI,Category,Defining Parameter,Defining Site,Number of Sites Reporting
0,Alabama,Baldwin,1,3,2023-01-10,35,Good,PM2.5,01-003-0010,1
1,Alabama,Baldwin,1,3,2023-01-11,28,Good,PM2.5,01-003-0010,1
2,Alabama,Baldwin,1,3,2023-01-12,23,Good,PM2.5,01-003-0010,1
3,Alabama,Baldwin,1,3,2023-01-13,18,Good,PM2.5,01-003-0010,1
4,Alabama,Baldwin,1,3,2023-01-14,20,Good,PM2.5,01-003-0010,1


In [119]:
def aqi_finder(file: str, date: str, state_name: str):
    '''
    Finds air quality index in an area on a given day
    Inputs: .csv file from aqs.epa.gov (str), date as YYYY-MM-DD (str), state name (str)   
    Returns: str
    '''
    df = pd.read_csv(file)
    # Find rows where date and state match
    chosen_date = df['Date'] == date
    chosen_state = df['State Name'] == state_name
    
    # If date/state not found
    if not chosen_date.any():
        return f'Sorry, {date} is not recorded in this file.'
    elif not chosen_state.any():
        return f'Sorry, {state_name} is not recorded in this file. Check for any typos!'
    
    # Update dataframe with conditions
    df = df[chosen_date & chosen_state]
    # Values of interest:
    aqi = df['AQI'].values
    aqi_mean = aqi.mean()
    
    # Determine count of categories recorded in the filtered data
    condition = df['Category'].value_counts()
    condition_count = ", ".join([f"{count} {category}" for category, count in condition.items() if count != 0])
        
    
    number_counties = len(df['county Name'])
    
    
    return f'The AQI in {state_name} on {date} was recorded for {number_counties} {state_name} counties. \
             The average AQI was {aqi_mean:.1f}, so there were {condition_count} counties.'
            
        


In [134]:
file1 = 'daily_aqi_by_county_2023.csv'
date1 = '2023-01-19'
state1 = 'Alabama'

In [135]:
aqi_finder(file1, date1, state1)

'The AQI in Alabama on 2023-01-19 was recorded for 11 Alabama counties.              The average AQI was 29.7, so there were 10 Good, 1 Moderate counties.'

# Temperature .csv file

In [122]:
file2 = 'daily_TEMP_2023.csv'
df2 = pd.read_csv(file2)
print(df2.columns)
df2['State Name'].drop_duplicates() # Missing 4 states: Delaware, NY, NJ, Vermont
df2['Units of Measure'].drop_duplicates() # All recorded in Farenheit
df2['County Name'].head()

Index(['State Code', 'County Code', 'Site Num', 'Parameter Code', 'POC',
       'Latitude', 'Longitude', 'Datum', 'Parameter Name', 'Sample Duration',
       'Pollutant Standard', 'Date Local', 'Units of Measure', 'Event Type',
       'Observation Count', 'Observation Percent', 'Arithmetic Mean',
       '1st Max Value', '1st Max Hour', 'AQI', 'Method Code', 'Method Name',
       'Local Site Name', 'Address', 'State Name', 'County Name', 'City Name',
       'CBSA Name', 'Date of Last Change'],
      dtype='object')


0    Escambia
1    Escambia
2    Escambia
3    Escambia
4    Escambia
Name: County Name, dtype: object

In [130]:
def temp_finder(file: str, date: str, state_name: str):
    '''
    Finds air quality index in an area on a given day
    Inputs: .csv file from aqs.epa.gov (str), date as YYYY-MM-DD (str), state name (str)   
    Returns: str
    '''
    df = pd.read_csv(file)
    # Find rows where date and state match
    chosen_date = df['Date Local'] == date
    chosen_state = df['State Name'] == state_name
    
    # If date/state not found
    if not chosen_date.any():
        return f'Sorry, {date} is not recorded in this file.'
    elif not chosen_state.any():
        return f'Sorry, {state_name} is not recorded in this file. Note that the following have no available date: Delaware, New York, New Jersey, Vermont'
    
    # Update dataframe with conditions
    df = df[chosen_date & chosen_state]
    # Values of interest:
    temp = df['1st Max Value'].values
    temp_mean = temp.mean()
    
    number_counties = len(df['County Name'])
        
    return f'The temperature in {state_name} on {date} was recorded for {number_counties} {state_name} counties. \
             The average temperature was {temp_mean:.1f} degrees Farenheit across {number_counties} counties.'
            

In [131]:
temp_finder(file2, '2023-09-03', 'California')

'The temperature in California on 2023-09-03 was recorded for 1 California counties.              The average temperature was 81.0 degrees Farenheit across 1 counties.'