### Necessary Imports
- json - to convert the api in json format
- requests - to get the webpage(HTML code of the webpage)
- Beautiful soup - to parse the webpage(read the HTML code)
- re - regular expression operators


In [55]:
import json

from urllib.parse import urlencode

import requests
from bs4 import BeautifulSoup

import pandas as pd
import re

from datetime import datetime

Constructing an Location URL for Lucknow


In [56]:
city = "Lucknow"
location_url = 'https://locator-service.api.bbci.co.uk/locations?' + urlencode({
   'api_key': 'AGbFAKx58hyjQScCXIYrxuEwJh2W2cmv',
   's': city,
   'stack': 'aws',
   'locale': 'en',
   'filter': 'international',
   'place-types': 'settlement,airport,district',
   'order': 'importance',
   'a': 'true',
   'format': 'json'
})
location_url

'https://locator-service.api.bbci.co.uk/locations?api_key=AGbFAKx58hyjQScCXIYrxuEwJh2W2cmv&s=Lucknow&stack=aws&locale=en&filter=international&place-types=settlement%2Cairport%2Cdistrict&order=importance&a=true&format=json'

Converting an HTTP GET request into JSON format.

In [57]:
result = requests.get(location_url).json()
result

{'response': {'results': {'results': [{'id': '1264733',
     'name': 'Lucknow',
     'container': 'India',
     'containerId': 1269750,
     'language': 'en',
     'timezone': 'Asia/Kolkata',
     'country': 'IN',
     'latitude': 26.83928,
     'longitude': 80.92313,
     'placeType': 'settlement'},
    {'id': '6269318',
     'name': 'Lucknow',
     'container': 'Angus',
     'containerId': 2657306,
     'language': 'en',
     'timezone': 'Europe/London',
     'country': 'GB',
     'latitude': 56.49103,
     'longitude': -2.7804,
     'placeType': 'settlement',
     'topicId': 'c7yjndd57l9t'}],
   'totalResults': 2}}}

Fetching weather forecast URL based on location data.

In [58]:
# url      = 'https://www.bbc.com/weather/1264733' # url to BBC weather, corresponding to a specific city (Lucknow, in this example)
url      = 'https://www.bbc.com/weather/' + result['response']['results']['results'][0]['id']
response = requests.get(url)


Next, we initiate an instance of  BeautifulSoup.

In [30]:
soup = BeautifulSoup(response.content,'html.parser')

The information we want (daily high and low temp., and daily weather summary), are in specific blocks on the webpage.
We need to find the block type, type of identifier, and the identifier name (all these can be figured out by right clicking
on the webpage and selecting 'Inspect' on the Chrome browser; similar modus operandi for other browsers)

In [31]:
daily_high_values = soup.find_all('span', attrs={'class': 'wr-day-temperature__high-value'}) # block-type: span; identifier type: class; and class name: wr-day-temperature__high-value
daily_high_values

[<span class="wr-day-temperature__high-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">39°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">102°</span></span></span>,
 <span class="wr-day-temperature__high-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">34°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">94°</span></span></span>,
 <span class="wr-day-temperature__high-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">33°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">91°</span></span></span>,
 <span class="wr-day-temperature__high-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">35°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">95°</span></span></span>,
 <span class="wr-day-temperature__high-value"><span class="wr-value--temperature"><span class="

In [32]:
daily_low_values  = soup.find_all('span', attrs={'class': 'wr-day-temperature__low-value'})
daily_low_values

[<span class="wr-day-temperature__low-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">28°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">82°</span></span></span>,
 <span class="wr-day-temperature__low-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">26°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">79°</span></span></span>,
 <span class="wr-day-temperature__low-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">25°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">77°</span></span></span>,
 <span class="wr-day-temperature__low-value"><span class="wr-value--temperature"><span class="wr-value--temperature--c">26°</span><span class="wr-hide"> </span><span class="wr-value--temperature--f">78°</span></span></span>,
 <span class="wr-day-temperature__low-value"><span class="wr-value--temperature"><span class="wr-val

Obtaining the summary of the weather for each day.

In [68]:
daily_summary = soup.find('div', attrs={'class': 'wr-day-summary'})
daily_summary

<div class="wr-day-summary"><div class="gel-wrap"><span class="">Sunny and a gentle breeze</span><span class="wr-hide">Thundery showers and a gentle breeze</span><span class="wr-hide">Thundery showers and a gentle breeze</span><span class="wr-hide">Light rain and a gentle breeze</span><span class="wr-hide">Drizzle and light winds</span><span class="wr-hide">Light rain showers and light winds</span><span class="wr-hide">Light rain showers and light winds</span><span class="wr-hide">Thundery showers and light winds</span><span class="wr-hide">Thundery showers and a gentle breeze</span><span class="wr-hide">Thundery showers and a gentle breeze</span><span class="wr-hide">Light rain showers and a gentle breeze</span><span class="wr-hide">Thundery showers and light winds</span><span class="wr-hide">Thundery showers and a gentle breeze</span><span class="wr-hide">Light rain showers and a gentle breeze</span></div></div>

In [69]:
daily_summary.text

'Sunny and a gentle breezeThundery showers and a gentle breezeThundery showers and a gentle breezeLight rain and a gentle breezeDrizzle and light windsLight rain showers and light windsLight rain showers and light windsThundery showers and light windsThundery showers and a gentle breezeThundery showers and a gentle breezeLight rain showers and a gentle breezeThundery showers and light windsThundery showers and a gentle breezeLight rain showers and a gentle breeze'

Extracting the useful information from daily_high_values and daily_low_values.

In [35]:
daily_high_values[0].text.strip()

'39° 102°'

In [36]:
daily_high_values[5].text.strip()

'34° 93°'

In [37]:
daily_high_values[0].text.strip().split()[0]

'39°'

In [77]:
daily_high_values_list = [daily_high_values[i].text.strip().split()[0] for i in range(len(daily_high_values))]
daily_high_values_list

['39°',
 '34°',
 '33°',
 '35°',
 '35°',
 '34°',
 '32°',
 '32°',
 '32°',
 '32°',
 '32°',
 '32°',
 '33°',
 '32°']

In [39]:
daily_low_values_list = [daily_low_values[i].text.strip().split()[0] for i in range(len(daily_low_values))]
daily_low_values_list

['28°',
 '26°',
 '25°',
 '26°',
 '26°',
 '25°',
 '25°',
 '25°',
 '24°',
 '23°',
 '23°',
 '23°',
 '24°',
 '24°']

In [40]:
daily_summary.text

'Sunny and a gentle breezeThundery showers and a gentle breezeThundery showers and a gentle breezeLight rain and a gentle breezeDrizzle and light windsLight rain showers and light windsLight rain showers and light windsThundery showers and light windsThundery showers and a gentle breezeThundery showers and a gentle breezeLight rain showers and a gentle breezeThundery showers and light windsThundery showers and a gentle breezeLight rain showers and a gentle breeze'

Separating summary for each day.

In [78]:
daily_summary_list = re.findall('[a-zA-Z][^A-Z]*', daily_summary.text) #split the string on uppercase
daily_summary_list

['Sunny and a gentle breeze',
 'Thundery showers and a gentle breeze',
 'Thundery showers and a gentle breeze',
 'Light rain and a gentle breeze',
 'Drizzle and light winds',
 'Light rain showers and light winds',
 'Light rain showers and light winds',
 'Thundery showers and light winds',
 'Thundery showers and a gentle breeze',
 'Thundery showers and a gentle breeze',
 'Light rain showers and a gentle breeze',
 'Thundery showers and light winds',
 'Thundery showers and a gentle breeze',
 'Light rain showers and a gentle breeze']

In [79]:
datelist = pd.date_range(datetime.today(), periods=len(daily_high_values)).tolist()
datelist

[Timestamp('2023-06-19 11:38:44.395370', freq='D'),
 Timestamp('2023-06-20 11:38:44.395370', freq='D'),
 Timestamp('2023-06-21 11:38:44.395370', freq='D'),
 Timestamp('2023-06-22 11:38:44.395370', freq='D'),
 Timestamp('2023-06-23 11:38:44.395370', freq='D'),
 Timestamp('2023-06-24 11:38:44.395370', freq='D'),
 Timestamp('2023-06-25 11:38:44.395370', freq='D'),
 Timestamp('2023-06-26 11:38:44.395370', freq='D'),
 Timestamp('2023-06-27 11:38:44.395370', freq='D'),
 Timestamp('2023-06-28 11:38:44.395370', freq='D'),
 Timestamp('2023-06-29 11:38:44.395370', freq='D'),
 Timestamp('2023-06-30 11:38:44.395370', freq='D'),
 Timestamp('2023-07-01 11:38:44.395370', freq='D'),
 Timestamp('2023-07-02 11:38:44.395370', freq='D')]

In [80]:
datelist = [datelist[i].date().strftime('%y-%m-%d') for i in range(len(datelist))]
datelist

['23-06-19',
 '23-06-20',
 '23-06-21',
 '23-06-22',
 '23-06-23',
 '23-06-24',
 '23-06-25',
 '23-06-26',
 '23-06-27',
 '23-06-28',
 '23-06-29',
 '23-06-30',
 '23-07-01',
 '23-07-02']

In [81]:
zipped = zip(datelist, daily_high_values_list, daily_low_values_list, daily_summary_list)

In [82]:
df = pd.DataFrame(list(zipped), columns=['Date', 'High','Low', 'Summary'])

In [83]:
display(df)

Unnamed: 0,Date,High,Low,Summary
0,23-06-19,39°,28°,Sunny and a gentle breeze
1,23-06-20,34°,26°,Thundery showers and a gentle breeze
2,23-06-21,33°,25°,Thundery showers and a gentle breeze
3,23-06-22,35°,26°,Light rain and a gentle breeze
4,23-06-23,35°,26°,Drizzle and light winds
5,23-06-24,34°,25°,Light rain showers and light winds
6,23-06-25,32°,25°,Light rain showers and light winds
7,23-06-26,32°,25°,Thundery showers and light winds
8,23-06-27,32°,24°,Thundery showers and a gentle breeze
9,23-06-28,32°,23°,Thundery showers and a gentle breeze


In [84]:
# remove the 'degree' character
df.High = df.High.replace('\°','',regex=True).astype(float)
df.Low  = df.Low.replace('\°','',regex=True).astype(float)

In [85]:
display(df)

Unnamed: 0,Date,High,Low,Summary
0,23-06-19,39.0,28.0,Sunny and a gentle breeze
1,23-06-20,34.0,26.0,Thundery showers and a gentle breeze
2,23-06-21,33.0,25.0,Thundery showers and a gentle breeze
3,23-06-22,35.0,26.0,Light rain and a gentle breeze
4,23-06-23,35.0,26.0,Drizzle and light winds
5,23-06-24,34.0,25.0,Light rain showers and light winds
6,23-06-25,32.0,25.0,Light rain showers and light winds
7,23-06-26,32.0,25.0,Thundery showers and light winds
8,23-06-27,32.0,24.0,Thundery showers and a gentle breeze
9,23-06-28,32.0,23.0,Thundery showers and a gentle breeze


Extract the name of the city for which data is gathered.

In [86]:
#location = soup.find('div', attrs={'class':'wr-c-location'})
location = soup.find('h1', attrs={'id':'wr-location-name-id'})
location.text.split()



In [87]:
# create a recording
filename_csv = location.text.split()[0]+'.csv'
df.to_csv(filename_csv, index=None)

In [88]:
filename_xlsx = location.text.split()[0]+'.xlsx'
df.to_excel(filename_xlsx)