In [72]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import os
import json
from datetime import datetime

In [73]:
# Initialize empty list
list = []

# Get html content of the webpage
web = requests.get("https://www.gov.uk/government/publications/list-of-cities/list-of-cities-html")

# Parse content using 'lxml' parser
soup = BeautifulSoup(web.content, "lxml")

# Find the section of the HTML where rewuired UK cities are listed.
# First locate the <h3> tag with id='united-kingdom' (indicating the start of the UK cities list).
# Then find the next <h3> tag with id='crown-dependencies' (indicating the end of the UK cities list).
# Get all <li> elements between these two headers, containing city names.
response = soup.find('h3', id = 'united-kingdom').find_next('h3', id = "crown-dependencies").find_all_previous('li')

# Loop over each element of the found data and add it to initialized list
for li in response:
    list.append(li.text.replace('*', '').strip())

# Slice list and reverse its order
city_list = list[:76][::-1]

print(city_list)

['Bath', 'Birmingham', 'Bradford', 'Brighton & Hove', 'Bristol', 'Cambridge', 'Canterbury', 'Carlisle', 'Chelmsford', 'Chester', 'Chichester', 'Colchester', 'Coventry', 'Derby', 'Doncaster', 'Durham', 'Ely', 'Exeter', 'Gloucester', 'Hereford', 'Kingston-upon-Hull', 'Lancaster', 'Leeds', 'Leicester', 'Lichfield', 'Lincoln', 'Liverpool', 'London', 'Manchester', 'Milton Keynes', 'Newcastle-upon-Tyne', 'Norwich', 'Nottingham', 'Oxford', 'Peterborough', 'Plymouth', 'Portsmouth', 'Preston', 'Ripon', 'Salford', 'Salisbury', 'Sheffield', 'Southampton', 'Southend-on-Sea', 'St Albans', 'Stoke on Trent', 'Sunderland', 'Truro', 'Wakefield', 'Wells', 'Westminster', 'Winchester', 'Wolverhampton', 'Worcester', 'York', 'Armagh', 'Bangor', 'Belfast', 'Lisburn', 'Londonderry', 'Newry', 'Aberdeen', 'Dundee', 'Dunfermline', 'Edinburgh', 'Glasgow', 'Inverness', 'Perth', 'Stirling', 'Bangor', 'Cardiff', 'Newport', 'St Asaph', 'St Davids', 'Swansea', 'Wrexham']


In [74]:
# Call on API key
api_key = "c95b33314e9ad84421bf063c5876cab3"

# Initialize empty dictionary
weather_data = {}

# Loop over each city to get weather information
for city in city_list:
    query = 'q='+ city
    response = requests.get('http://api.openweathermap.org/data/2.5/weather?'+query+'&APPID='+api_key)
    
    # Parse the JSON response and store it in 'data' variable
    data = response.json()
    
    # Excluding Bangor, extract weather data for each city
    # Replace with None if information is missing
    if city != 'Bangor':
        temperature = data.get("main", {}).get("temp", None)
        wind_speed = data.get("wind", {}).get("speed", None)
        rain = data.get("rain", None)
        dt = data.get("dt", None)

      
        # Add the city as a key in the dictionary, with a list of its weather attributes as the value
        weather_data[city] = [temperature, wind_speed, rain, dt]


In [75]:
# Create a DataFrame from the weather_data dictionary where each key becomes index row label
# Each list element (dictionary value) corresponds to a column specified in the "columns" argument
df = pd.DataFrame.from_dict(weather_data, orient = 'index' , columns = ["Temperature", "WindSpeed", "Rain", "CollectionTime"])
print(df)

                 Temperature  WindSpeed  Rain  CollectionTime
Bath                  284.57       4.00  None    1.730973e+09
Birmingham            293.47       2.06  None    1.730972e+09
Bradford              280.95       5.66  None    1.730972e+09
Brighton & Hove       283.89       2.57  None    1.730972e+09
Bristol               287.65       1.34  None    1.730972e+09
...                      ...        ...   ...             ...
Newport               284.71       2.99  None    1.730973e+09
St Asaph              284.59       0.89  None    1.730973e+09
St Davids             285.91      10.60  None    1.730973e+09
Swansea               285.53       3.58  None    1.730972e+09
Wrexham               283.57       5.66  None    1.730973e+09

[74 rows x 4 columns]


In [76]:
# Replace missing values in 'CollectionTime' column with 0
df['CollectionTime'] = df['CollectionTime'].fillna(0)

# Convert each element of 'CollectionTime' column from Unix timestamp to datetime object
df['CollectionTime'] = pd.to_datetime(df['CollectionTime'], origin='unix', unit='s')

# Format datetime objects to required string format
df['CollectionTime'] = df['CollectionTime'].apply(lambda row: row.strftime("%d/%m/%y @ %H:%M:%S"))


In [77]:
print(df)


                 Temperature  WindSpeed  Rain       CollectionTime
Bath                  284.57       4.00  None  07/11/24 @ 09:43:25
Birmingham            293.47       2.06  None  07/11/24 @ 09:41:09
Bradford              280.95       5.66  None  07/11/24 @ 09:41:21
Brighton & Hove       283.89       2.57  None  07/11/24 @ 09:41:36
Bristol               287.65       1.34  None  07/11/24 @ 09:40:23
...                      ...        ...   ...                  ...
Newport               284.71       2.99  None  07/11/24 @ 09:44:11
St Asaph              284.59       0.89  None  07/11/24 @ 09:44:11
St Davids             285.91      10.60  None  07/11/24 @ 09:44:11
Swansea               285.53       3.58  None  07/11/24 @ 09:40:35
Wrexham               283.57       5.66  None  07/11/24 @ 09:42:21

[74 rows x 4 columns]


In [78]:
# Convert elements in 'Temperature' column from Kelvin to Celsius by subtracting 273.15
df['Temperature'] = df['Temperature'] - 273.15

print(df)

                 Temperature  WindSpeed  Rain       CollectionTime
Bath                   11.42       4.00  None  07/11/24 @ 09:43:25
Birmingham             20.32       2.06  None  07/11/24 @ 09:41:09
Bradford                7.80       5.66  None  07/11/24 @ 09:41:21
Brighton & Hove        10.74       2.57  None  07/11/24 @ 09:41:36
Bristol                14.50       1.34  None  07/11/24 @ 09:40:23
...                      ...        ...   ...                  ...
Newport                11.56       2.99  None  07/11/24 @ 09:44:11
St Asaph               11.44       0.89  None  07/11/24 @ 09:44:11
St Davids              12.76      10.60  None  07/11/24 @ 09:44:11
Swansea                12.38       3.58  None  07/11/24 @ 09:40:35
Wrexham                10.42       5.66  None  07/11/24 @ 09:42:21

[74 rows x 4 columns]
