In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import io
from tqdm import tqdm

rate='/subhourly01/'

def extract_stations(y=2024, url='https://www.ncei.noaa.gov/pub/data/uscrn/products'+rate):
    # Fetch the HTML content from the URL
    url = url+str(y)+'/'
    print(url)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract all the links in the HTML
    links = soup.find_all('a')

    # Extract the file names from these links and parse out the station names
    stations = set()
    for link in links:
        filename = link.get('href')
        if filename and filename.endswith('.txt'):
            # Extract the station name, which includes the state abbreviation and additional details
            station_name = filename.split('-')[-1].split('.')[0]
            stations.add(station_name)
    # Convert the set to a DataFrame and save as CSV
    stations_df = pd.DataFrame(sorted(stations), columns=['Station'])
    stations_df.to_csv(f'stations{y}.csv', index=False)
    return list(stations)

def read2file(stations, ys=2002, ye=2023, url_template='https://www.ncei.noaa.gov/pub/data/uscrn/products'+rate+'{}/CRNS0101-05-{}-{}.txt', output_file='aaa_min_2024.csv'):
    # Open the output file in write mode
    with open(output_file, 'w') as f:
        # Initialize a variable to check if headers are written
        headers_written = False

        # Define the total number of iterations for the progress bar
        total_iterations = len(range(ys, ye+1)) * len(stations)

        # Create a tqdm instance for the loops
        with tqdm(total=total_iterations) as pbar:
            # Loop through the years
            for year in range(ys, ye+1):
                # Loop through the stations
                for station in stations:
                    # Increment progress bar
                    pbar.update(1)

                    # Format the URL for the specific year and station
                    url = url_template.format(year, year, station)

                    # Send a GET request to download the file
                    response = requests.get(url)

                    # Check if the request was successful
                    if response.status_code == 200:
                        # Read the text content of the response into a DataFrame
                        df = pd.read_csv(io.StringIO(response.text), delim_whitespace=True)

                        # Add column headers
                        df.columns = ['WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', 
                                      'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION', 
                                      'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE', 
                                      'ST_FLAG', 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5',
                                      'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG']

                        # Add a 'Station' column
                        df['Station'] = station

                        # Add a column for the first two characters of the station name
                        df['State_Code'] = station[:2]

                        # Write the DataFrame to the file in append mode
                        if not headers_written:
                            df.to_csv(f, index=False)
                            headers_written = True
                        else:
                            df.to_csv(f, index=False, header=False)

# Extract stations
stations = extract_stations(y=2024)
#print (stations)
read2file(stations, ys=2024, ye=2024, output_file='aaa_min_2024.csv')


https://www.ncei.noaa.gov/pub/data/uscrn/products/subhourly01/2024/


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [23:46<00:00,  9.09s/it]


In [21]:
import pandas as pd
import folium
from datetime import datetime, timedelta
from branca.colormap import LinearColormap

# Define chunk size
chunk_size = 100000

# Get the date range for the last 4 days
end_date = datetime.now() - timedelta(0)
start_date = end_date - timedelta(3)
date_range = pd.date_range(start_date, end_date).strftime('%Y%m%d')

# Initialize an empty DataFrame to store maximum temperatures
max_temps = pd.DataFrame()

# Process the CSV file in chunks
for chunk in pd.read_csv('aaa_min_2024.csv', chunksize=chunk_size, parse_dates=['LST_DATE']):
    # Filter data for the last 4 days
    chunk_filtered = chunk[chunk['LST_DATE'].dt.strftime('%Y%m%d').isin(date_range)].copy()
    
    # Convert temperature from Celsius to Fahrenheit
    chunk_filtered.loc[:, 'AIR_TEMPERATURE_F'] = chunk_filtered['AIR_TEMPERATURE'] * 9/5 + 32
    
    # Filter out erroneous temperature values
    chunk_filtered = chunk_filtered[(chunk_filtered['AIR_TEMPERATURE_F'] != 9999) & 
                                    (chunk_filtered['AIR_TEMPERATURE_F'] != -9999)]
    
    # Find the maximum temperature for each station in the chunk
    max_chunk = chunk_filtered.loc[chunk_filtered.groupby('Station')['AIR_TEMPERATURE_F'].idxmax()].reset_index()
    
    # Merge with latitude and longitude information from the chunk
    stations_info = chunk_filtered[['Station', 'LATITUDE', 'LONGITUDE']].drop_duplicates()
    max_chunk = max_chunk.merge(stations_info, on='Station', suffixes=('', '_orig'))
    
    # Append the result to the overall maximum temperatures DataFrame
    max_temps = pd.concat([max_temps, max_chunk], ignore_index=True)

# Create a base map
m = folium.Map(location=[37.8, -96], zoom_start=4)

# Create a color map with specified colors
colormap = LinearColormap(['blue', 'green', 'orange', 'red', 'purple', 'black'],
                          vmin=70, vmax=120,
                          caption='Max Temperature (°F)')
colormap.add_to(m)

# Add markers to the map
for idx, row in max_temps.iterrows():
    color = colormap(row['AIR_TEMPERATURE_F'])
    popup_content = f"""
    Station: {row['Station']}<br>
    Temp: {row['AIR_TEMPERATURE_F']} °F<br>
    Date: {row['LST_DATE']}<br>
    Time: {row['LST_TIME']}<br>
    Relative Humidity: {row['RELATIVE_HUMIDITY']}<br>
    Wind Flag: {row['WIND_FLAG']}
    """
    folium.CircleMarker(
        location=(row['LATITUDE'], row['LONGITUDE']),
        radius=5,
        popup=folium.Popup(popup_content, max_width=300),
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
    ).add_to(m)

# Save the map to an HTML file with date range in the name
map_filename = f'max_temperature_map_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.html'
m.save(map_filename)

# Display the map
m
