# Web Scraping a 'Weather' webpage

In [1]:
# Import necessary Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Loading Web Page with 'request'

## The url of the webpage that needs to be scraped
url = "https://weather.com/en-IN/weather/tenday/l/f246eb7c020bfee7d452818e9f16003e84db16f88ecb2acb98c6097fbca9032d"

# Making a 'Get' request
file = requests.get(url)

# To check if the code has run successfully!
file.status_code

200

In [3]:
#To extract information from the HTML file. 
## Parsing the HTML 
soup = BeautifulSoup(file.content,'html.parser')

In [4]:
# Getting the title tag
soup.title

<title data-react-helmet="true">Global village, KA 10-Day Weather Forecast - The Weather Channel | Weather.com</title>

In [5]:
# Getting the text inside the title tag 
soup.title.string

'Global village, KA 10-Day Weather Forecast - The Weather Channel | Weather.com'

In [6]:
# tag name
soup.title.name

'title'

In [7]:
# Getting the name of parent tag
soup.title.parent.name

'head'

In [8]:
# 'p' tag
soup.find('p').get_text() #the first p tag text

'Generally clear. High 27ºC. Winds E at 15 to 25 km/h.'

In [9]:
# h3 tags
date_tags = soup.find_all('h3')
date_forecast = [tags.text for tags in date_tags]
date_forecast    

['Special Forecasts',
 'Today',
 'Thu 05 | Day',
 'Thu 05 | Night',
 'Fri 06',
 'Fri 06 | Day',
 'Fri 06 | Night',
 'Sat 07',
 'Sat 07 | Day',
 'Sat 07 | Night',
 'Sun 08',
 'Sun 08 | Day',
 'Sun 08 | Night',
 'Mon 09',
 'Mon 09 | Day',
 'Mon 09 | Night',
 'Tue 10',
 'Tue 10 | Day',
 'Tue 10 | Night',
 'Wed 11',
 'Wed 11 | Day',
 'Wed 11 | Night',
 'Thu 12',
 'Thu 12 | Day',
 'Thu 12 | Night',
 'Fri 13',
 'Fri 13 | Day',
 'Fri 13 | Night',
 'Sat 14',
 'Sat 14 | Day',
 'Sat 14 | Night',
 'Sun 15',
 'Sun 15 | Day',
 'Sun 15 | Night',
 'Mon 16',
 'Mon 16 | Day',
 'Mon 16 | Night',
 'Tue 17',
 'Tue 17 | Day',
 'Tue 17 | Night',
 'Wed 18',
 'Wed 18 | Day',
 'Wed 18 | Night',
 'Thu 19',
 'Thu 19 | Day',
 'Thu 19 | Night']

In [10]:
# Extracting the forecast for the 15 days  
forecast = soup.find_all('details', class_="DaypartDetails--DayPartDetail--2XOOV Disclosure--themeList--1Dz21")

### Extracting the neccessary data to create a dataframe

In [11]:
# Finding all the date
date = [item.find(class_="DetailsSummary--daypartName--kbngc").get_text() for item in forecast]
date

['Today',
 'Fri 06',
 'Sat 07',
 'Sun 08',
 'Mon 09',
 'Tue 10',
 'Wed 11',
 'Thu 12',
 'Fri 13',
 'Sat 14',
 'Sun 15',
 'Mon 16',
 'Tue 17',
 'Wed 18',
 'Thu 19']

### Temperature Forecast

In [12]:
# Extracting the day time temperature
day_temp = [item.find("span",class_="DetailsSummary--highTempValue--3PjlX" ).get_text() for item in forecast]
print(" Morning temperature: ", day_temp, "\n", "Length:", len(day_temp))

 Morning temperature:  ['27°', '28°', '28°', '29°', '29°', '28°', '29°', '30°', '30°', '31°', '31°', '31°', '30°', '30°', '29°'] 
 Length: 15


In [13]:
# Extracting the night time temperature
night_temp = [item.find("span",class_="DetailsSummary--lowTempValue--2tesQ" ).get_text() for item in forecast]
print(" Night temperature: ", night_temp, "\n", "Length:", len(day_temp))

 Night temperature:  ['16°', '16°', '17°', '16°', '14°', '13°', '14°', '14°', '14°', '16°', '16°', '16°', '16°', '16°', '16°'] 
 Length: 15


### Humidity Forecast

In [14]:
# Using dictionary to pass key : value pair
humidity  = soup.find_all('span',{ "data-testid" : "PercentageValue"}, class_="DetailsTable--value--2YD0-") 
humidity_values=[item.get_text() for item in humidity] 
print(humidity_values, "\n\n", "Length:", len(humidity_values)) # values of both day and night

['46%', '79%', '59%', '79%', '60%', '83%', '58%', '79%', '54%', '72%', '47%', '74%', '44%', '68%', '39%', '62%', '38%', '60%', '39%', '60%', '42%', '59%', '42%', '63%', '45%', '70%', '48%', '71%', '50%', '68%'] 

 Length: 30


In [15]:
humidity_values_day = [humidity_values[i] for i in range(0, len(humidity_values), 2)]
humidity_values_night = [humidity_values[i] for i in range(1, len(humidity_values), 2)]

print(" Humidity in the day time: ", humidity_values_day,"\n", "Length: ", len(humidity_values_day), "\n\n" , "Humidity in the night time: " ,humidity_values_night, "\n", "Length: ", len(humidity_values_night))

 Humidity in the day time:  ['46%', '59%', '60%', '58%', '54%', '47%', '44%', '39%', '38%', '39%', '42%', '42%', '45%', '48%', '50%'] 
 Length:  15 

 Humidity in the night time:  ['79%', '79%', '83%', '79%', '72%', '74%', '68%', '62%', '60%', '60%', '59%', '63%', '70%', '71%', '68%'] 
 Length:  15


In [16]:
# Creating a dataframe with the data collected
df = pd.DataFrame({"Date":date, "Day_Temp":day_temp, "Day_Humidity":humidity_values_day, "Night_Temp":night_temp, "Night_Humidity":humidity_values_night})
df

Unnamed: 0,Date,Day_Temp,Day_Humidity,Night_Temp,Night_Humidity
0,Today,27°,46%,16°,79%
1,Fri 06,28°,59%,16°,79%
2,Sat 07,28°,60%,17°,83%
3,Sun 08,29°,58%,16°,79%
4,Mon 09,29°,54%,14°,72%
5,Tue 10,28°,47%,13°,74%
6,Wed 11,29°,44%,14°,68%
7,Thu 12,30°,39%,14°,62%
8,Fri 13,30°,38%,14°,60%
9,Sat 14,31°,39%,16°,60%


In [17]:
# Saving the data into a csv file
df.to_csv("weather_data.csv", index= False)

                                       *************************************