# Weather Channel Data Scraping

In [None]:
# Import necessary libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# Get data URL
URL = 'https://weather.com/weather/tenday/l/58ce32964ce60c21632b79f2eb9dc596af190484e5639847e40fc43b45225dbe'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())

In [None]:
table = soup.find('main')

###Scrape Weather Info

In [None]:
# Find daily temp
DayTemp = table.find('span','DailyContent--temp--axgOn','TemperatureValue').text
print(DayTemp)

74°


In [None]:
# Find daily weather summary
text = table.find('p','DailyContent--narrative--jqi6P','wxPhrase').text
print(text)

Rain likely. High 74F. Winds WNW at 5 to 10 mph. Chance of rain 70%.


In [None]:
# Find percentage chance of rain
DayRainChance = table.find_all('span','DailyContent--value--Xgh8M','PercentageValue')[0].text
print(DayRainChance)

66%


In [None]:
# Find the day wind speed
DayWind = table.find_all('span','DailyContent--value--Xgh8M','PercentageValue')[1].text
print(DayWind)

WNW 8 mph


In [None]:
#Find humidity, UV index, sunrise, sunset
DayHumidity = table.find_all('span','DetailsTable--value--pWEVz')[0].text
UVIndex = table.find_all('span','DetailsTable--value--pWEVz')[1].text
Sunrise = table.find_all('span','DetailsTable--value--pWEVz')[2].text
Sunset = table.find_all('span','DetailsTable--value--pWEVz')[3].text
print(DayHumidity, UVIndex, Sunrise, Sunset)

76% 7 of 11 6:34 am 6:51 pm


In [None]:
# Find night temp, rain chance, wind speed
NightTemp = table.find('span','DailyContent--temp--axgOn DailyContent--tempN--DPsDJ','TemperatureValue').text
NightRainChance = table.find_all('span','DailyContent--value--Xgh8M','PercentageValue')[2].text
NightWind = table.find_all('span','DailyContent--value--Xgh8M','PercentageValue')[3].text
print(NightTemp, NightRainChance, NightWind)

70° 54% NNW 8 mph


In [None]:
# Night humidity, moonrise, moonset
NightHumidity = table.find_all('span','DetailsTable--value--pWEVz')[4].text
Moonrise = table.find_all('span','DetailsTable--value--pWEVz')[6].text
Moonset = table.find_all('span','DetailsTable--value--pWEVz')[7].text

In [None]:
# Find moon phase
MoonPhase = table.find('span','DetailsTable--moonPhrase--AB7uO','moonPhase').text
print(MoonPhase)

Waning Crescent


In [None]:
# Create df with info
data = {'DayTemp':DayTemp,'DayRainChance':DayRainChance,'DayWind':DayWind,'DayHumidity':DayHumidity,
        'UVIndex':UVIndex,'Sunrise':Sunrise,'Sunset':Sunset,'NightTemp':NightTemp,
        'NightRainChance':NightRainChance,'NightWind':NightWind,'NightHumidity':NightHumidity,
        'Moonrise':Moonrise,'Moonset':Moonset,'MoonPhase':MoonPhase,'TextSummary':text}
df = pd.DataFrame(data, index=[0])
df.head()

Unnamed: 0,DayTemp,DayRainChance,DayWind,DayHumidity,UVIndex,Sunrise,Sunset,NightTemp,NightRainChance,NightWind,NightHumidity,Moonrise,Moonset,MoonPhase,TextSummary
0,74°,66%,WNW 8 mph,76%,7 of 11,6:34 am,6:51 pm,70°,54%,NNW 8 mph,80%,2:20 am,4:50 pm,Waning Crescent,Rain likely. High 74F. Winds WNW at 5 to 10 mp...


In [None]:
# Clean data for viualization

# Remove percentage and degree symbols
for i in df[['DayTemp','DayRainChance','DayHumidity','NightTemp','NightRainChance','NightHumidity']]:
  df[i] = df[i].str[:-1]

# Clean UVIndex
df['UVIndex'] = df['UVIndex'].str.split(' ').str[0]

# Split wind into direction and speed
df['DayWindDirection'] = df['DayWind'].str.split(' ').str[0]
df['NightWindDirection'] = df['NightWind'].str.split(' ').str[0]
df['DayWindSpeed'] = df['DayWind'].str.split(' ').str[1].str[:-4]
df['NightWindSpeed'] = df['NightWind'].str.split(' ').str[1].str[:-4]
df.drop(['DayWind','NightWind'], axis=1, inplace=True)

# Change strings into integers
num_cols = ['DayTemp','DayRainChance','DayHumidity','UVIndex','NightTemp',
            'NightRainChance','NightHumidity','DayWindSpeed','NightWindSpeed']
for i in df[num_cols]:
  df[i] = pd.to_numeric(df[i])

# Change Percentages to decimals
percentage_cols = ['DayRainChance','DayHumidity','NightRainChance','NightHumidity']
for i in df[percentage_cols]:
  df[i] = df[i]*0.01


df.head()

Unnamed: 0,DayTemp,DayRainChance,DayHumidity,UVIndex,Sunrise,Sunset,NightTemp,NightRainChance,NightHumidity,Moonrise,Moonset,MoonPhase,TextSummary,DayWindDirection,NightWindDirection,DayWindSpeed,NightWindSpeed
0,74,0.66,0.76,7,6:34 am,6:51 pm,70,0.54,0.8,2:20 am,4:50 pm,Waning Crescent,Rain likely. High 74F. Winds WNW at 5 to 10 mp...,WNW,NNW,8,8


In [None]:
# Add 10-day forecast data
Date = []
HighTemp = []
LowTemp = []
RainChance = []
Wind = []
for i in soup.find_all('h2','DetailsSummary--daypartName--CcVUz','daypartName'):
  Date.append(i.text)
for i in soup.find_all('span','DetailsSummary--highTempValue--VHKaO','TemperatureValue'):
  HighTemp.append(i.text)
for i in soup.find_all('span','DetailsSummary--lowTempValue--ogrzb','TemperatureValue'):
  LowTemp.append(i.text)
for i in soup.find_all('div','DetailsSummary--precip--YXw9t','Precip'):
  j = i.find_all('span')[-1].text
  RainChance.append(j)
for i in soup.find_all('span','Wind--windWrapper--NsCjc undefined','Wind'):
  j = i.find_all('span')[-2].text + ' ' + i.find_all('span')[-1].text
  Wind.append(j)

In [None]:
# Create df
df2 = pd.DataFrame({'Date':Date,'HighTemp':HighTemp,'LowTemp':LowTemp,'RainChance':RainChance,'WindSpeed':Wind})
df2.head()

Unnamed: 0,Date,HighTemp,LowTemp,RainChance,WindSpeed
0,Today,74°,70°,66%,8 mph
1,Thu 18,81°,71°,19%,9 mph
2,Fri 19,77°,70°,9%,11 mph
3,Sat 20,77°,69°,8%,11 mph
4,Sun 21,76°,70°,23%,11 mph


In [None]:
# Remove percentage and degree symbols
for i in df2.columns[1:4]:
  df2[i] = df2[i].str[:-1]
df2['WindSpeed'] = df2['WindSpeed'].str[:-4]
df2.head()

Unnamed: 0,Date,HighTemp,LowTemp,RainChance,WindSpeed
0,Today,74,70,66,8
1,Thu 18,81,71,19,9
2,Fri 19,77,70,9,11
3,Sat 20,77,69,8,11
4,Sun 21,76,70,23,11


In [None]:
# Change columns to numeric
for i in df2.columns[1:5]:
  df2[i] = pd.to_numeric(df2[i])
df2['RainChance'] = df2['RainChance']*0.01
df2.head()

Unnamed: 0,Date,HighTemp,LowTemp,RainChance,WindSpeed
0,Today,74,70,0.66,8
1,Thu 18,81,71,0.19,9
2,Fri 19,77,70,0.09,11
3,Sat 20,77,69,0.08,11
4,Sun 21,76,70,0.23,11


In [None]:
import datetime as dt
# Clean dates for visualization
today = dt.datetime.now()
month = today.month
year = today.year
# Initialize last_day with the day before today to correctly handle the first entry
last_day = today.day - 1

new_dates = []
for date_str in df2['Date']:
    if date_str == 'Today':
        day = today.day
    else:
        day = int(date_str.split(' ')[1])

    # Handle forecasts that cross into the next month
    if day < last_day:
        current_month += 1
        # Handle year rollover
        if current_month > 12:
            current_month = 1
            current_year += 1

    # Create new datetime
    try:
        new_date = dt.datetime(year, month, day)
        new_dates.append(new_date)
    except ValueError:
        # Handle cases where the day might belong to previous month at the start
        prev_month = today - dt.timedelta(days=1)
        new_date = dt.datetime(prev_month.year, prev_month.month, day)
        new_dates.append(new_date)

    last_day = day

# Assign new datetime objects to the column
df2['Date'] = pd.to_datetime(new_dates)

df2.head()

Unnamed: 0,Date,HighTemp,LowTemp,RainChance,WindSpeed
0,2025-09-17,74,70,0.66,8
1,2025-09-18,81,71,0.19,9
2,2025-09-19,77,70,0.09,11
3,2025-09-20,77,69,0.08,11
4,2025-09-21,76,70,0.23,11


In [None]:
# Add date to daily df
lst = ['Date']
df_lst = df.columns.tolist()
lst = lst + df_lst
df['Date'] = df2['Date'][0]
df = df[lst]
df.head()

Unnamed: 0,Date,DayTemp,DayRainChance,DayHumidity,UVIndex,Sunrise,Sunset,NightTemp,NightRainChance,NightHumidity,Moonrise,Moonset,MoonPhase,TextSummary,DayWindDirection,NightWindDirection,DayWindSpeed,NightWindSpeed
0,2025-09-17,74,0.66,0.76,7,6:34 am,6:51 pm,70,0.54,0.8,2:20 am,4:50 pm,Waning Crescent,Rain likely. High 74F. Winds WNW at 5 to 10 mp...,WNW,NNW,8,8


In [None]:
#Save data to BigQuery
from google.cloud import bigquery
from pandas_gbq import to_gbq
client = bigquery.Client()

project_id = 'true-bit-421817'
dest_table1 = 'Weather_Data.Daily_Weather'
dest_table2 = 'Weather_Data.10_Day_Forecast'

#Write to BigQuery
to_gbq(df, dest_table1, project_id=project_id, if_exists='replace')
to_gbq(df2, dest_table2, project_id=project_id, if_exists='replace')

100%|██████████| 1/1 [00:00<00:00, 7503.23it/s]
100%|██████████| 1/1 [00:00<00:00, 8943.08it/s]
