In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

start_date = datetime.datetime(2023, 5, 1)
today_date = datetime.datetime.today()


# Generate a list of dates between the start and end dates
date_range = [start_date + datetime.timedelta(days=x) for x in range((today_date - start_date).days + 1)]

# Format the dates as strings delimited by hyphens
date_range_hyphen = [date.strftime('%Y-%m-%d') for date in date_range]

for date in date_range_hyphen:
    # Specify the URL and use requests to get the HTML content
    url = f'https://www.wunderground.com/dashboard/pws/KNYJACKS2/table/{today_date}/{date}/daily'
    response = requests.get(url)
    html_content = response.content

    # Parse the HTML content using Beautiful Soup
    soup = BeautifulSoup(html_content, 'html.parser')

    # Find the last table on the page and extract the data into a list of lists
    table = soup.find_all('table')[-1]
    rows = table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        data.append(cols)

    data[0] = ['Time', 'Temperature', 'Dew_Point', 'Humidity', 'Wind', 'Speed', 'Gust', 'Pressure', 'Precip_Rate', 'Precip_Accum', 'UV', 'Solar']

    # Convert the list of lists into a pandas dataframe
    df = pd.DataFrame(data[2:], columns=data[0])

    df['Time'] = pd.to_datetime(df['Time'], format='%I:%M %p')
    df['Hour'] = df['Time'].dt.hour

    df.drop('Wind', axis = 1, inplace = True)

    for col in df:
        if col == 'UV':
            df[col] = pd.to_numeric(df[col])
        elif df[col].dtypes == 'object':
            df[col] = df[col].map(lambda x: x.split()[0])
            df[col] = pd.to_numeric(df[col])

    df_hourly = df.groupby(df.Hour).mean()
    df_hourly.reset_index(inplace=True)

    if date == str(start_date.date()):
        dfw = df_hourly
    else:
        dfw = pd.concat([dfw,df_hourly], ignore_index=True)


In [22]:
display(dfw)

Unnamed: 0,Hour,Temperature,Dew_Point,Humidity,Speed,Gust,Pressure,Precip_Rate,Precip_Accum,UV,Solar
0,0,59.091667,57.100000,93.000000,0.0,0.0,29.012500,0.000000,0.000000,0.000000,0.000000
1,1,57.741667,55.750000,93.000000,0.0,0.0,29.014167,0.005833,0.006667,0.000000,0.000000
2,2,54.500000,52.333333,92.500000,0.0,0.0,29.060833,0.005000,0.010000,0.000000,0.000000
3,3,51.491667,47.458333,86.250000,0.0,0.0,29.112500,0.000000,0.010000,0.000000,0.000000
4,4,51.291667,43.541667,74.666667,0.0,0.0,29.140833,0.000000,0.010000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
275,11,82.266667,45.900000,28.250000,0.0,0.0,30.078333,0.000000,0.000000,5.083333,825.666667
276,12,83.300000,45.750000,26.916667,0.0,0.0,30.060833,0.000000,0.000000,5.666667,887.500000
277,13,84.616667,45.425000,25.416667,0.0,0.0,30.037500,0.000000,0.000000,5.333333,866.000000
278,14,85.625000,45.216667,24.500000,0.0,0.0,30.015000,0.000000,0.000000,4.333333,727.666667
