In [1]:
import os
import requests
import csv
import codecs
import calendar
from datetime import datetime, timedelta

In [2]:
def fetch(station_ids, start_date, end_date):
    """Download data for the specified stations and date range, excluding rows where tmpf is 'M'."""
    localfn = f"stations_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_filtered.csv"
    if os.path.isfile(localfn):
        print(f"- File Already Exist: {localfn}")
        return
    print(f"+ Downloading data for stations {', '.join(station_ids)} from {start_date} to {end_date}")

    # Construct the URI to download data
    stations_str = ",".join(station_ids)
    uri = (
        "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
        f"station={stations_str}&"
        "data=tmpf,dwpf,relh,drct,sknt,p01i,alti,mslp,vsby,gust,"
        "skyc1,skyc2,skyc3,skyc4,skyl1,skyl2,skyl3,skyl4,wxcodes,"
        "ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,"
        "peak_wind_gust,peak_wind_drct,peak_wind_time,feel,metar,snowdepth&"
        f"year1={start_date.year}&month1={start_date.month}&day1={start_date.day}&"
        f"year2={end_date.year}&month2={end_date.month}&day2={end_date.day}&"
        "tz=UTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=yes"
    )

    # Fetch data from the API using streaming to avoid loading the entire response into memory
    with requests.get(uri, stream=True, timeout=300) as res:
        if res.status_code == 200:
            # Prepare to decode the streamed content
            decoded_stream = codecs.iterdecode(res.iter_lines(), 'utf-8')
            reader = csv.reader(decoded_stream)
            headers = next(reader)  # Extract headers

            # Find the index of 'tmpf' in headers
            try:
                tmpf_index = headers.index('tmpf')
            except ValueError:
                print("Error: 'tmpf' column not found in headers.")
                return

            # Open the output file
            with open(localfn, "w", newline='', encoding="utf-8") as fh:
                writer = csv.writer(fh)
                writer.writerow(headers)  # Write headers

                row_count = 0
                # Process each row as it comes in
                for row in reader:
                    if len(row) == len(headers):  # Ensure row has correct number of columns
                        tmpf_value = row[tmpf_index]
                        if tmpf_value != 'M':  # Exclude rows where tmpf is 'M'
                            writer.writerow(row)
                            row_count += 1

            print(f"+ Data saved to {localfn} with {row_count} rows (excluding rows where tmpf='M').")
        else:
            print(f"Failed to download data: {res.status_code}")

In [3]:
def main():
    """Main entry to fetch data."""
    # Define the station IDs
    station_ids = ['ALB', 'BGM', 'BUF', 'ELM', 'HPN', 'IAG', 'ISP', 'ITH', 'JFK', 'LGA', 'PBG', 'ROC', 'SWF', 'SYR']

    # Specify the year and month
    year = 2024
    month = 1  # For February

    # Determine the start_date and end_date
    start_date = datetime(year, month, 1)

    # Get the first weekday and number of days in the month
    first_weekday, num_days = calendar.monthrange(year, month)
    # Alternatively, if we don't need first_weekday, we can do:
    # num_days = calendar.monthrange(year, month)[1]

    # Calculate end_date as the first day of the next month
    end_date = start_date + timedelta(days=num_days)

    # Fetch data for the stations
    fetch(station_ids, start_date, end_date)


In [4]:
if __name__ == "__main__":
    main()

+ Downloading data for stations ALB, BGM, BUF, ELM, HPN, IAG, ISP, ITH, JFK, LGA, PBG, ROC, SWF, SYR from 2024-01-01 00:00:00 to 2024-02-01 00:00:00
+ Data saved to stations_20240101_20240201_filtered.csv with 14844 rows (excluding rows where tmpf='M').


**Purpose:**

The script downloads and processes weather data for specified stations and a given month and year.

Data Filtering: It excludes any data rows where the temperature (tmpf) value is missing.

Output: The cleaned data is saved to a CSV file named according to the date range.

Customization: You can change the year, month, and station_ids to fetch data for different periods and stations.
