# Scraping single and Multiple Items from a single Page

In [11]:
# Big Picture Goal: download weather webpage > turn HTML into beautifulsoup object, find forecast section > extract each daily forecast into lists so data can be saved/analyzed

# Importing Libraries
import requests # requests: download webpage HTML
from bs4 import BeautifulSoup # parses the HTML to use for scraping
import pandas as pd # stores scraped data neatly into tables

In [12]:
page = requests.get("https://forecast.weather.gov/MapClick.php?lat=42.085651&lon=-76.049207")
# fetches entire webpage and stores in in page variable 

In [13]:
soup = BeautifulSoup(page.content, 'html.parser') # converts raw hTML into beautifulsoup object so we can search like a tree 
print(soup.prettify()) # prints into nicely formatted HTML seperating each tag and string 

<!DOCTYPE html>
<html class="no-js">
 <head>
  <!-- Meta -->
  <meta content="width=device-width" name="viewport"/>
  <link href="http://purl.org/dc/elements/1.1/" rel="schema.DC"/>
  <title>
   National Weather Service
  </title>
  <meta content="National Weather Service" name="DC.title"/>
  <meta content="NOAA National Weather Service" name="DC.description"/>
  <meta content="US Department of Commerce, NOAA, National Weather Service" name="DC.creator"/>
  <meta content="2025-12-06T18:15:16+00:00" name="DC.date.created" scheme="ISO8601"/>
  <meta content="EN-US" name="DC.language" scheme="DCTERMS.RFC1766"/>
  <meta content="weather" name="DC.keywords"/>
  <meta content="NOAA's National Weather Service" name="DC.publisher"/>
  <meta content="National Weather Service" name="DC.contributor"/>
  <meta content="/disclaimer.php" name="DC.rights"/>
  <meta content="General" name="rating"/>
  <meta content="index,follow" name="robots"/>
  <!-- Icons -->
  <link href="/build/images/favicon.eab

In [15]:
# We’ll extract data about the extended forecast - Narrowing Down!
seven_day = soup.find("div", id="seven-day-forecast")
print(seven_day)

# find and print the first div whos id attribute is equal to "seven-day forecast" and prints all the content inside of it 

<div class="panel panel-default" id="seven-day-forecast">
<div class="panel-heading">
<b>Extended Forecast for</b>
<h2 class="panel-title">
                Vestal NY    </h2>
</div>
<div class="panel-body" id="seven-day-forecast-body">
<div id="seven-day-forecast-container"><ul class="list-unstyled" id="seven-day-forecast-list"><li class="forecast-tombstone"><div class="tombstone-container"><p class="period-name">This Afternoon</p><p><img alt="This Afternoon: Partly sunny, with a high near 37. Southwest wind around 6 mph. " class="forecast-icon" src="newimages/medium/bkn.png" title="This Afternoon: Partly sunny, with a high near 37. Southwest wind around 6 mph. "/></p><p class="temp temp-high">High: 37 °F</p><p class="short-desc">Partly Sunny</p></div></li><li class="forecast-tombstone"><div class="tombstone-container"><p class="period-name">Tonight</p><p><img alt="Tonight: Mostly cloudy, with a low around 22. Calm wind. " class="forecast-icon" src="newimages/medium/nbkn.png" title="To

In [21]:
# Extracting the list of all li tags with class_="forecast-tombstone" (each li tag has a row of data)
forecast_rows = seven_day.find_all("li", class_="forecast-tombstone")

# forecast_rows = list of li tags and each <li> is like a forecast block

In [18]:
# create empty lists
list_period = []
list_short_desc = []
list_temp =[]
list_long_desc= []
list_image_link = []

In [19]:
# Iterate through each forecast item(basically each row in the data) and
# extract the cell values for each column (period, short description, temperature, long description)

# each weather block on the page is a ROW including categories(period, short desc, temp, long desc, and image)

# for each forcast row <li> on the page do the following... one forcast row includes all these categories 
for tag in forecast_rows: 
    period = tag.find("p",class_="period-name").get_text() # inside this li (forecast_rows), find the p whos class is "period_name"
    print(period) # print all the text that is in this <p> tag = "This Afternoon"
    list_period.append(period) # adds to the column list 
  
    short_desc = tag.find("p", class_="short-desc").get_text()  # inside this li (forecast_rows), find the p whos class is "sort-desc"
    print(short_desc) # print all the text that is in this <p> tag = "Partly Sunny"
    list_short_desc.append(short_desc)  # adds to the column list 
    
    temp = tag.find("p", class_="temp").get_text()  # inside this li (forecast_rows), find the p whos class is "temperature"
    print(temp) # print all the text that is in this <p> tag = high: 37 F
    list_temp.append(temp)
    
    long_desc = tag.find("img").attrs['alt'] # find the img tag whos attribute is alt 
    print(long_desc) # prints the value inside attrs, "tonight: msotly cloudy
    list_long_desc.append(long_desc) # adds to the column list 

    # In class Acivity
    image_link = "https://forecast.weather.gov/" + tag.find("img").attrs['src'] # finds the image who attribute is equal to src 
    print(image_link) # prints the link with the value attached to the attribute which is "/newimages/medium/nbkn.png"
    list_image_link.append(image_link) # adds to the column list 
    print("*******************End of Row*********************") # prints at the end 

print(list_period) : # prints the entire row into a list ['this afternnon', 'tonight']
print(list_short_desc) # prints the entire row into a list ['partly sunny', 'mostly cloudy']
print(list_temp)  # prints the entire row into a list [ 'high: 37 F', 'low: 22 f']
print(list_long_desc)  # prints the entire row into a list [statements]
print(list_image_link)  # prints the entire row into a list [ image links]

This Afternoon
Partly Sunny
High: 37 °F
This Afternoon: Partly sunny, with a high near 37. Southwest wind around 6 mph. 
https://forecast.weather.gov/newimages/medium/bkn.png
*******************End of Row*********************
Tonight
Mostly Cloudy
Low: 22 °F
Tonight: Mostly cloudy, with a low around 22. Calm wind. 
https://forecast.weather.gov/newimages/medium/nbkn.png
*******************End of Row*********************
Sunday
Mostly Cloudythen ChanceSnow Showers
High: 36 °F
Sunday: A chance of snow showers, mainly after 5pm.  Mostly cloudy, with a high near 36. Calm wind becoming south around 6 mph in the afternoon.  Chance of precipitation is 30%. Little or no snow accumulation expected. 
https://forecast.weather.gov/DualImage.php?i=bkn&j=sn&jp=30
*******************End of Row*********************
Sunday Night
Chance SnowShowers
Low: 15 °F
Sunday Night: A chance of snow showers, mainly after 7pm.  Mostly cloudy, with a low around 15. Light and variable wind becoming northwest around 6

In [22]:
# Creating a dictionary of list items
data_dict = {"periods": list_period, "short_desc":list_short_desc, "temp": list_temp, "long_desc": list_long_desc,'image_link':list_image_link}

print(data_dict)

{'periods': ['This Afternoon', 'Tonight', 'Sunday', 'Sunday Night', 'Monday', 'Monday Night', 'Tuesday', 'Tuesday Night', 'Wednesday'], 'short_desc': ['Partly Sunny', 'Mostly Cloudy', 'Mostly Cloudythen ChanceSnow Showers', 'Chance SnowShowers', 'Mostly Sunny', 'Partly Cloudy', 'Partly Sunnythen ChanceSnow Showers', 'Chance Snow', 'Rain/Snow'], 'temp': ['High: 37 °F', 'Low: 22 °F', 'High: 36 °F', 'Low: 15 °F', 'High: 23 °F', 'Low: 5 °F', 'High: 30 °F', 'Low: 23 °F', 'High: 40 °F'], 'long_desc': ['This Afternoon: Partly sunny, with a high near 37. Southwest wind around 6 mph. ', 'Tonight: Mostly cloudy, with a low around 22. Calm wind. ', 'Sunday: A chance of snow showers, mainly after 5pm.  Mostly cloudy, with a high near 36. Calm wind becoming south around 6 mph in the afternoon.  Chance of precipitation is 30%. Little or no snow accumulation expected. ', 'Sunday Night: A chance of snow showers, mainly after 7pm.  Mostly cloudy, with a low around 15. Light and variable wind becoming n

In [23]:
# Converting the dictionary to a tabular representation
weather = pd.DataFrame(data_dict)

In [24]:
# Displaying the tabular representation
print(weather)

          periods                            short_desc         temp  \
0  This Afternoon                          Partly Sunny  High: 37 °F   
1         Tonight                         Mostly Cloudy   Low: 22 °F   
2          Sunday  Mostly Cloudythen ChanceSnow Showers  High: 36 °F   
3    Sunday Night                    Chance SnowShowers   Low: 15 °F   
4          Monday                          Mostly Sunny  High: 23 °F   
5    Monday Night                         Partly Cloudy    Low: 5 °F   
6         Tuesday   Partly Sunnythen ChanceSnow Showers  High: 30 °F   
7   Tuesday Night                           Chance Snow   Low: 23 °F   
8       Wednesday                             Rain/Snow  High: 40 °F   

                                           long_desc  \
0  This Afternoon: Partly sunny, with a high near...   
1  Tonight: Mostly cloudy, with a low around 22. ...   
2  Sunday: A chance of snow showers, mainly after...   
3  Sunday Night: A chance of snow showers, mainly...   

In [25]:
# Saving our data as a .csv file
weather.to_csv("dataset.csv")