In [5]:
import requests
import pandas as pd
import os

In [2]:
# Read the CSV file to get station_ids and airport names
input_file = "Airport_with_Nearest_Station.csv"
df = pd.read_csv(input_file)

# Create output folder
output_folder = "climate"
os.makedirs(output_folder, exist_ok=True)

In [3]:
# Base URL for data download
base_url = "https://www.ncei.noaa.gov/oa/local-climatological-data/v2/access/{}/LCD_{}_{}.csv"

# List to store any failed downloads
failed_downloads = []

# Download data
for year in range(2018, 2021):
    for index, row in df.iterrows():
        station_id = row["Closest_Station"]
        airport_name = row["AIRPORT"]
        
        # Build URL and file name
        url = base_url.format(year, station_id, year)
        filename = os.path.join(output_folder, f"LCD_{airport_name}_{year}.csv")

        # Request and save data
        response = requests.get(url)
        if response.status_code == 200:
            with open(filename, 'wb') as file:
                file.write(response.content)
            #print(f"Data for {airport_name} in {year} saved locally as {filename}")
        else:
            print(f"Failed to retrieve data for {airport_name} in {year}. Status code: {response.status_code}")
            failed_downloads.append({"Airport": airport_name, "Year": year})

Failed to retrieve data for ACY in 2018. Status code: 404
Failed to retrieve data for ADK in 2018. Status code: 404
Failed to retrieve data for AEX in 2018. Status code: 404
Failed to retrieve data for AKN in 2018. Status code: 404
Failed to retrieve data for BGR in 2018. Status code: 404
Failed to retrieve data for BIS in 2018. Status code: 404
Failed to retrieve data for DAB in 2018. Status code: 404
Failed to retrieve data for DCA in 2018. Status code: 404
Failed to retrieve data for GFK in 2018. Status code: 404
Failed to retrieve data for HNL in 2018. Status code: 404


KeyboardInterrupt: 

In [6]:
# Save any failed download attempts
if failed_downloads:
    failed_df = pd.DataFrame(failed_downloads)
    failed_df.to_csv("failed_downloads.csv", index=False)
    print("Failed downloads saved to failed_downloads.csv")
else:
    print("All files downloaded successfully.")

NameError: name 'failed_downloads' is not defined

In [3]:
# Read the new station_id and airport match
data = pd.read_csv('match.csv')

# Convert the data to a dictionary for easy lookup
airport_dict = dict(zip(data.iloc[:, 0], data.iloc[:, 1]))

# Define the function
def id(airport):
    return airport_dict.get(airport)

In [4]:
#retry the failed downloads with new stationid
fail_file = "fails.csv"
fails = pd.read_csv(fail_file)
#fails

In [9]:
retry_failed_downloads = []

# Download data
for index, row in fails.iterrows():
    airport = row["Airport"]
    year = row["Year"]
    station_id = id(airport)
        
    # Build URL and file name
    url = base_url.format(year, station_id, year)
    filename = os.path.join(output_folder, f"LCD_{airport}_{year}.csv")

    # Request and save data
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            file.write(response.content)
        #print(f"{airport} in {year} retry successfully")
    else:
        print(f"Failed to retry for {airport} in {year}. Status code: {response.status_code}")
        retry_failed_downloads.append({"Airport": airport, "Year": year})

Failed to retry for PIH in 2018. Status code: 404
