In [54]:
import requests
import pandas as pd
import numpy as np
from io import StringIO
from datetime import datetime, timedelta

In [43]:
day_var = datetime.strptime('0101', '%m%d')
days = []
end_date = datetime.strptime('1231','%m%d')
while day_var <= end_date:
    hold = datetime.strftime(day_var,'%m%d')
    days.append(hold)
    day_var = day_var + timedelta(days=1)

In [None]:
data_all = pd.DataFrame(columns=['Date', 'Parameters', 'Measurement','AQS_ID'])

AQS_to_Division = pd.read_csv('AQS_to_Climate_Division.csv')
AQS_to_Division['AQS_ID'] = AQS_to_Division['AQS_ID'].astype(str)

years = ['2019','2020','2021','2022','2023']

for year in years:
    for day in days:
        url = f'https://files.airnowtech.org/airnow/{year}/{year}{day}/daily_data_v2.dat'
        try:        
            response = requests.get(url)
            response.raise_for_status()
            air_data = pd.read_csv(StringIO(response.text),
                                   sep='|',
                                   header=None,
                                   names=['Date','Parameters','Measurement','AQS_ID'],
                                   usecols=[0,3,5,12])
            air_data = air_data[air_data['AQS_ID'].isin(AQS_to_Division['AQS_ID'])]
            data_all = pd.concat([data_all, air_data], ignore_index=True)
            print(f'File {year}{day} successful')
        except requests.exceptions.HTTPError as e:
            print(f'Error on {year}{day}; Status Code: {e.response.status_code}; Reason: {e.response.reason}')

data_all.to_csv('air_data_all.csv', index=False, encoding='utf-8')

In [None]:
data_all['Date'] = pd.to_datetime(data_all['Date'])
data_all = data_all.merge(AQS_to_Division, on='AQS_ID', how='left')

data_all = data_all.pivot_table(index=['Date', 'Division'], 
                                columns='Parameters', 
                                values='Measurement',
                                aggfunc='mean')
data_all.columns.name=None
data_all = data_all.round(1)
data_all = data_all.reset_index()
data_all = data_all.sort_values(by=['Division','Date']).reset_index(drop=True)

data_all = data_all.drop('OZONE-1HR',axis=1)
data_all.rename(columns={'CO-8hr': 'CO', 'OZONE-8HR': 'OZONE', 'PM10-24hr':'PM10',
                         'PM2.5-24hr':'PM2.5','SO2-24HR':'SO2'}, inplace=True)
data_all['NO2'] = np.nan
data_all = data_all[['Date', 'Division', 'CO', 'NO2', 'OZONE', 'PM10', 'PM2.5', 'SO2']]

  data_all['Date'] = pd.to_datetime(data_all['Date'])


In [None]:
airnow_api_final = pd.read_csv('airnow_api_final.csv')
airnow_data_final = pd.concat([data_all, airnow_api_final], ignore_index=True)

airnow_data_final['Date'] = pd.to_datetime(airnow_data_final['Date'])
airnow_data_final['Date_new'] = airnow_data_final['Date'].dt.date
airnow_data_final = airnow_data_final.drop('Date',axis=1)
column_move = airnow_data_final.pop('Date_new')
airnow_data_final.insert(0,'Date',column_move)

In [65]:
airnow_data_final.to_csv('AirNow_Data_Final.csv', index=False, encoding='utf-8')