In [None]:
import requests
import pandas as pd
import os
from time import sleep

In [None]:
# Read the CSV file to get station_ids and airport names
input_file = "Airport_with_Nearest_Station.csv"
df = pd.read_csv(input_file)

# Create output folder
output_folder = "climate2024"
os.makedirs(output_folder, exist_ok=True)

In [None]:
# Base URL for data download
base_url = "https://www.ncei.noaa.gov/oa/local-climatological-data/v2/access/{}/LCD_{}_{}.csv"

# List to store any failed downloads
failed_downloads = []

# Download data
for year in range(2024, 2025):
    for index, row in df.iterrows():
        station_id = row["Closest_Station"]
        airport_name = row["AIRPORT"]
        
        # Build URL and file name
        url = base_url.format(year, station_id, year)
        filename = os.path.join(output_folder, f"LCD_{airport_name}_{year}.csv")

        # Check if the file already exists
        if os.path.exists(filename):
            print(f"File for {airport_name} in {year} already exists. Skipping download.")
            continue  # Skip to the next file if it already exists

        # Retry logic
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = requests.get(url, stream=True)
                if response.status_code == 200:
                    with open(filename, 'wb') as file:
                        for chunk in response.iter_content(chunk_size=1024):
                            if chunk:  # Filter out keep-alive chunks
                                file.write(chunk)
                    print(f"Data for {airport_name} in {year} saved locally as {filename}")
                    break  # Break the loop if download succeeds
                else:
                    print(f"Failed to retrieve data for {airport_name} in {year}. Status code: {response.status_code}")
                    failed_downloads.append({"Airport": airport_name, "Year": year})
                    break
            except (requests.exceptions.ChunkedEncodingError, requests.exceptions.IncompleteRead) as e:
                print(f"Attempt {attempt + 1} failed for {airport_name} in {year}: {e}")
                if attempt < max_retries - 1:
                    sleep(2)  # Wait a bit before retrying
                else:
                    print(f"Failed to download data for {airport_name} in {year} after {max_retries} attempts.")
                    failed_downloads.append({"Airport": airport_name, "Year": year})

In [None]:
# Save any failed download attempts
if failed_downloads:
    failed_df = pd.DataFrame(failed_downloads)
    failed_df.to_csv("failed_downloads.csv", index=False)
    print("Failed downloads saved to failed_downloads.csv")
else:
    print("All files downloaded successfully.")

In [None]:
# Read the new station_id and airport match
data = pd.read_csv('match.csv')

# Convert the data to a dictionary for easy lookup
airport_dict = dict(zip(data.iloc[:, 0], data.iloc[:, 1]))

# Define the function
def id(airport):
    return airport_dict.get(airport)

In [None]:
#retry the failed downloads with new stationid
fail_file = "failed_downloads.csv"
fails = pd.read_csv(fail_file)
#fails

In [None]:
retry_failed_downloads = []

# Download data
for index, row in fails.iterrows():
    airport = row["Airport"]
    year = row["Year"]
    station_id = id(airport)
        
    # Build URL and file name
    url = base_url.format(year, station_id, year)
    filename = os.path.join(output_folder, f"LCD_{airport}_{year}.csv")

    # Request and save data
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            file.write(response.content)
        #print(f"{airport} in {year} retry successfully")
    else:
        print(f"Failed to retry for {airport} in {year}. Status code: {response.status_code}")
        retry_failed_downloads.append({"Airport": airport, "Year": year})