Medway - Local Election Results - 2023
======================================

# Web Scrape



In [21]:
import os
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# Paths
path = {
    'html_results': 'data/html/results.html',
    'html_wards': 'data/html/wards'
}

# Medway Council - Local Election Results 2023 website
url_results = "https://www.medway.gov.uk/results2023"

In [3]:
# Ward seats
ward_seats = {
    "All Saints": 1,
    "Chatham Central and Brompton": 3,
    "Cuxton, Halling and Riverside": 2,
    "Fort Horsted": 1,
    "Fort Pitt": 3,
    "Gillingham North": 3,
    "Gillingham South": 3,
    "Hempstead and Wigmore": 2,
    "Hoo St Werburgh and High Halstow": 3,
    "Lordswood and Walderslade": 3,
    "Luton": 2,
    "Princes Park": 2,
    "Rainham North": 3,
    "Rainham South East": 3,
    "Rainham South West": 2,
    "Rochester East and Warren Wood": 3,
    "Rochester West and Borstal": 3,
    "St Mary's Island": 1,
    "Strood North and Frindsbury": 3,
    "Strood Rural": 3,
    "Strood West": 3,
    "Twydall": 2,
    "Watling": 3,
    "Wayfield and Weeds Wood": 2
 }
assert sum([x for x in ward_seats.values()]) == 59

In [4]:
# Party map
map_party = {
       "Conservative Party candidate": "Conservative",
       "Labour and Co-operative Party": "Labour",
       "Labour Party": "Labour",
       "Local Conservatives": "Conservative",
}

### 1. Get results data from website or cache

In [6]:
if os.path.exists(path['html_results']):
    print('Results cache exists - loading from disk')
    with open(path['html_results']) as fh:
        html_results = fh.read()

else:
    print('No results cache found - requesting from url and caching')
    html_results = requests.get(url=url_results).content
    with open(path['html_results'], "wb") as fh:
        fh.write(html_results)

print(len(html_results))


Results cache exists - loading from disk
38439


### 2. Get Ward data from website or cache

In [11]:
# Get ward data

soup = BeautifulSoup(html_results)

ward_as = soup.find("h2", string="Ward election results").find_next('p').find_all('a')
for ward_a in ward_as:
    ward = ward_a.text
    ward_url = ward_a.get('href')
    
    print(f'{ward}: ', end='')
    save_path = f"{path['html_wards']}/{ward}.html"

    if os.path.exists(save_path):
        print('Cache found')
    else:
        print('Cache not found - requesting from url and caching')
        html = requests.get(ward_url).text
        with open(save_path, 'w') as fh_out:
            fh_out.write(html)

All Saints: Cache found
Chatham Central and Brompton: Cache found
Cuxton, Halling and Riverside: Cache found
Fort Horsted: Cache found
Fort Pitt: Cache found
Gillingham North: Cache found
Gillingham South: Cache found
Hempstead and Wigmore: Cache found
Hoo St Werburgh and High Halstow: Cache found
Lordswood and Walderslade: Cache found
Luton: Cache found
Princes Park: Cache found
Rainham North: Cache found
Rainham South East: Cache found
Rainham South West: Cache found
Rochester East and Warren Wood: Cache found
Rochester West and Borstal: Cache found
St Mary's Island: Cache found
Strood North and Frindsbury: Cache found
Strood Rural: Cache found
Strood West: Cache found
Twydall: Cache found
Watling: Cache found
Wayfield and Weeds Wood: Cache found


In [16]:
# Load ward property data 
# Ward property data is stuff like the number of electors etc.
ward_objs = []

for ward_a in ward_as:

    ward = ward_a.text
    ward_url = ward_a.get('href')
    print(f'{ward}: Processing')
    
    ward_obj = {
        'ward': ward,
        'url': ward_url
    }
    path_html_ward = f"{path['html_wards']}/{ward}.html"
    with open(path_html_ward) as fh:
        soup = BeautifulSoup(fh.read())
        lis = soup.find("h2", string="Verification statement").find_next('ul').find_all('li')
        
        if len(lis) > 0:
            for li in lis:
                item, val = li.text.split(':')
                item = (
                    item
                    .replace('The total number of', '')
                    .replace('from the polling stations', '')
                    .replace('The', '')
                    .replace('as of today', '')
                    .strip()
                    .lower()
                    .replace(' ', '_')
                )
                ward_obj[item] = val.strip().replace(',','')
        
        # Ward seats
        ward_obj['ward_seats'] = ward_seats.get(ward)

        ward_objs.append(ward_obj)
        
        # raise KeyboardInterrupt

df_wards = (
    pd.DataFrame(ward_objs)
    .astype({
        'verified_ballot_papers': 'int',
        'verified_postal_ballot_papers': 'int',
        'ballot_papers_verified': 'int',
        'electorate':'int'
    })
    .assign(**{
        'turnout': lambda _df:_df['ballot_papers_verified'] / _df['electorate'],
        'postal_ballot_perc': lambda _df: _df['verified_postal_ballot_papers'] / _df['ballot_papers_verified']
    })
)

display(df_wards.head())
display(df_wards.dtypes)


All Saints: Processing
Chatham Central and Brompton: Processing
Cuxton, Halling and Riverside: Processing
Fort Horsted: Processing
Fort Pitt: Processing
Gillingham North: Processing
Gillingham South: Processing
Hempstead and Wigmore: Processing
Hoo St Werburgh and High Halstow: Processing
Lordswood and Walderslade: Processing
Luton: Processing
Princes Park: Processing
Rainham North: Processing
Rainham South East: Processing
Rainham South West: Processing
Rochester East and Warren Wood: Processing
Rochester West and Borstal: Processing
St Mary's Island: Processing
Strood North and Frindsbury: Processing
Strood Rural: Processing
Strood West: Processing
Twydall: Processing
Watling: Processing
Wayfield and Weeds Wood: Processing


Unnamed: 0,ward,url,verified_ballot_papers,verified_postal_ballot_papers,ballot_papers_verified,electorate,turnout,ward_seats,postal_ballot_perc
0,All Saints,https://www.medway.gov.uk/info/200670/local_an...,634,331,965,3495,0.276109,1,0.343005
1,Chatham Central and Brompton,https://www.medway.gov.uk/info/200670/local_an...,1194,1007,2201,10525,0.209121,3,0.457519
2,"Cuxton, Halling and Riverside",https://www.medway.gov.uk/info/200670/local_an...,1368,527,1895,6129,0.309186,2,0.2781
3,Fort Horsted,https://www.medway.gov.uk/info/200670/local_an...,717,373,1090,3530,0.308782,1,0.342202
4,Fort Pitt,https://www.medway.gov.uk/info/200670/local_an...,1904,1190,3094,9170,0.337405,3,0.384615


ward                              object
url                               object
verified_ballot_papers             int64
verified_postal_ballot_papers      int64
ballot_papers_verified             int64
electorate                         int64
turnout                          float64
ward_seats                         int64
postal_ballot_perc               float64
dtype: object

In [22]:
# Load ward result data into DataFrame
dfs = []

for ward_a in ward_as:
    ward = ward_a.text
    ward_url = ward_a.get('href')
    print(f'{ward}: Processing')
    path_html_ward = f"{path['html_wards']}/{ward}.html"
    with open(path_html_ward) as fh:
        soup = BeautifulSoup(fh.read())
        page_tables = soup.find_all('table')
        
        df = (
            pd.read_html(str(page_tables[0]), header=0)[0]
            .rename(columns={'Descriptions': 'Description'})
            .assign(**{
                'ward': ward,
                'ward_result_url': ward_url,
                'Description': lambda _df: _df['Description'].str.replace('party', 'Party'),
                'common_name': lambda _df: _df['Other names'].str.extract(r'(?:also|commonly) known as ([^)]+)\)?',flags=re.IGNORECASE),
                'surname': lambda x: x['Surname'].str.extract(r'(.+?)(?=\s\()'),
                'party': lambda x: x['Description'].map(map_party)
            })
            # Fillnas
            .assign(**{
                'common_name': lambda _df: _df['common_name'].fillna(_df['Other names']),
                'surname': lambda _df: _df['surname'].fillna(_df['Surname']),
                'party': lambda _df: _df['party'].fillna(_df['Description'])
              })
        )
        
        dfs.append(df)

        # display(df.head())
        
df_results = pd.concat(dfs)

df_results.sample(10)
        

All Saints: Processing
Chatham Central and Brompton: Processing
Cuxton, Halling and Riverside: Processing
Fort Horsted: Processing
Fort Pitt: Processing
Gillingham North: Processing
Gillingham South: Processing
Hempstead and Wigmore: Processing
Hoo St Werburgh and High Halstow: Processing
Lordswood and Walderslade: Processing
Luton: Processing
Princes Park: Processing
Rainham North: Processing
Rainham South East: Processing
Rainham South West: Processing
Rochester East and Warren Wood: Processing
Rochester West and Borstal: Processing
St Mary's Island: Processing
Strood North and Frindsbury: Processing
Strood Rural: Processing
Strood West: Processing
Twydall: Processing
Watling: Processing
Wayfield and Weeds Wood: Processing


Unnamed: 0,Surname,Other names,Description,Number of votes,ward,ward_result_url,common_name,surname,party
2,Holloway,Anita Jane,Liberal Democrats,303,Rochester West and Borstal,https://www.medway.gov.uk/info/200670/local_an...,Anita Jane,Holloway,Liberal Democrats
0,Batts,Gareth Wilf,Local Conservatives,1202,Strood North and Frindsbury,https://www.medway.gov.uk/info/200670/local_an...,Gareth Wilf,Batts,Conservative
2,Crack,Herbert John Alfred,Liberal Democrats,108,Wayfield and Weeds Wood,https://www.medway.gov.uk/info/200670/local_an...,Herbert John Alfred,Crack,Liberal Democrats
0,Clarke,Trevor Anthony,Local Conservatives,489,Fort Horsted,https://www.medway.gov.uk/info/200670/local_an...,Trevor Anthony,Clarke,Conservative
3,Nestorov,Marian Angelov,Labour and Co-operative Party,1823,Watling,https://www.medway.gov.uk/info/200670/local_an...,Marian Angelov,Nestorov,Labour
3,Olodo,Temitope Omontanwa (commonly known as Temi Olodo,Labour Party,425,Princes Park,https://www.medway.gov.uk/info/200670/local_an...,Temi Olodo,Olodo,Labour
6,Marchant,Patricia Anne (commonly known as Trish Marchant),Green Party,442,Rainham North,https://www.medway.gov.uk/info/200670/local_an...,Trish Marchant,Marchant,Green Party
2,Gilbourne,James Adrian (commonly known as Jim Gilbourne),Conservative Party candidate,1709,Hempstead and Wigmore,https://www.medway.gov.uk/info/200670/local_an...,Jim Gilbourne,Gilbourne,Conservative
0,Campbell,Smitha,Labour and Co-operative Party,1726,Fort Pitt,https://www.medway.gov.uk/info/200670/local_an...,Smitha,Campbell,Labour
6,Venus-Coppard,Tina Louise,Conservative Party candidate,817,Watling,https://www.medway.gov.uk/info/200670/local_an...,Tina Louise,Venus-Coppard,Conservative


In [23]:
### Save Dataframes
df_wards.to_csv('data/wards.csv')
df_results.to_csv('data/results.csv')