# Scraping UK prime ministers

### Import Python tools and Jupyter configuration

In [3]:
%load_ext lab_black

In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import altair as alt
import numpy as np
import datetime as dt
import re
import time
import random
from tqdm import tqdm

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [6]:
month_year_updated = dt.date.today().strftime("%m_%Y")

---

In [7]:
url = "https://www.gov.uk/government/history/past-prime-ministers"

In [8]:
headers = {
    "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
    "Referer": "https://www.gunviolencearchive.org/",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36",
    "sec-ch-ua-platform": '"macOS"',
}

In [9]:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text)

In [12]:
cards = soup.find_all("div", class_="gem-c-image-card")

In [89]:
dict_list = []

for c in cards:
    # print(c)
    data_dict = {
        "name": c.find("h2").text.replace("\n", "").replace("The Rt Hon ", ""),
        "detail": c.find("li").text.replace("\n", ""),
        "url": "https://www.gov.uk" + c.find("a", href=True)["href"],
        "image": c.find("img", class_="gem-c-image-card__image")["src"],
    }
    dict_list.append(data_dict)

In [90]:
src = pd.DataFrame(dict_list)

In [91]:
src[["party", "start", "drop", "end"]] = (
    src["detail"].str.strip().str.replace(" and ", "/").str.split(" ", expand=True)
)

In [92]:
src.drop(["drop", "detail"], axis=1, inplace=True)

#### Make a copy of the dataframe

In [93]:
df = src[["name", "party", "start", "end", "url", "image"]].copy()

In [94]:
df.head()

Unnamed: 0,name,party,start,end,url,image
0,Theresa May MP,Conservative,2016,2019,https://www.gov.uk/government/history/past-prime-ministers/theresa-may,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/6/s216_PM_portrait_960x640.jpg
1,David Cameron,Conservative,2010,2016,https://www.gov.uk/government/history/past-prime-ministers/david-cameron,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/1/s216_David_Cameron.jpg
2,Gordon Brown,Labour,2007,2010,https://www.gov.uk/government/history/past-prime-ministers/gordon-brown,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/964/s216_Gordon-Brown.jpg
3,Tony Blair,Labour,1997,2007,https://www.gov.uk/government/history/past-prime-ministers/tony-blair,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/965/s216_tony-blair-bw.jpg
4,Sir John Major KG CH,Conservative,1990,1997,https://www.gov.uk/government/history/past-prime-ministers/john-major,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/966/s216_John-Major.jpg


---

## Get bio info

#### List of PM urls

In [95]:
urls = df.url.to_list()

In [128]:
bios_list = []

for u in urls:
    response = requests.get(f"" + u, headers=headers)
    soup = BeautifulSoup(response.text)
    details = soup.find_all("div", class_="govuk-grid-column-two-thirds")
    for d in details[2:3]:
        bios_list.append(d.find_all("p", class_="govuk-body"))

In [144]:
for bio in bios_list[0]:
    print(bio)

<p class="govuk-body">1 October 1956, Eastbourne</p>
<p class="govuk-body">2016 to 2019</p>
<p class="govuk-body">Conservative</p>
<p class="govuk-body">Investigatory Powers Act 2016, European Union (Notification of Withdrawal) Act 2017,
European Union (Withdrawal) Act 2018, Climate Change Act (2050 Target Amendment) Order 2019</p>
<p class="govuk-body">Theresa May was the UK’s second female Prime Minister and the first world leader to serve with Type 1 diabetes.</p>


In [192]:
bios_dict_list = []

for bio in bios_list:
    born = bio[0].find(text=True).strip()
    try:
        died = bio[1].find(text=True).strip()
    except IndexError:
        died = ""    term = bio[1].find(text=True).strip()
    party = bio[2].find(text=True).strip()
    major_acts = bio[3].find(text=True).strip()
    try:
        facts = bio[4].find(text=True).strip()
    except IndexError:
        facts = ""
    bios_dict = {
        "born": born,
        "term": term,
        "party": party,
        "major_acts": major_acts,
        "facts": facts,
    }
    bios_dict_list.append(bios_dict)

In [193]:
bios_df = pd.DataFrame(bios_dict_list)

In [194]:
merged_df = pd.concat([df, bios_df], axis=1)

In [197]:
merged_df

Unnamed: 0,name,party,start,end,url,image,born,term,party.1,major_acts,facts
0,Theresa May MP,Conservative,2016,2019,https://www.gov.uk/government/history/past-prime-ministers/theresa-may,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/6/s216_PM_portrait_960x640.jpg,"1 October 1956, Eastbourne",2016 to 2019,Conservative,"Investigatory Powers Act 2016, European Union (Notification of Withdrawal) Act 2017,\r\nEuropean Union (Withdrawal) Act 2018, Climate Change Act (2050 Target Amendment) Order 2019",Theresa May was the UK’s second female Prime Minister and the first world leader to serve with Type 1 diabetes.
1,David Cameron,Conservative,2010,2016,https://www.gov.uk/government/history/past-prime-ministers/david-cameron,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/1/s216_David_Cameron.jpg,"9 October 1966, London",2010 to 2016,Conservative,Fixed Term Parliaments Act 2010\r\nAcademies Act 2010\r\nEducation Act 2011\r\nMarriage (Same Sex Couples) Act 2013\r\nSuccession to the Crown Act 2013,"David Cameron was the youngest Prime Minister since Lord Liverpool in 1812.\r\n\r\nDavid Cameron is Chairman of Patrons at National Citizen Service, the UK’s flagship youth development programme that he personally founded while leader of the opposition.\r\n\r\nHe is also President of Alzheimer’s Research UK and, together with former US Secretary of State, John Kerry, co-chairs Pew Bertarelli Ocean Ambassadors. He also sits on the Board of the ONE campaign."
2,Gordon Brown,Labour,2007,2010,https://www.gov.uk/government/history/past-prime-ministers/gordon-brown,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/964/s216_Gordon-Brown.jpg,"20 February 1951, Giffnock, Glasgow, Scotland",2007 to 2010,Labour,Climate Change Act 2008: to set a target for the year 2050 for the reduction of targeted greenhouse gas emissions.\r\n\r\nCounter-Terrorism Act 2008: to confer further powers to gather and share information for counter-terrorism and other purposes.,"Britain’s longest-serving modern Chancellor of the Exchequer. He was appointed Chancellor in May 1997, when Tony Blair became Prime Minister, through to June 2007 when he started as Prime Minister himself."
3,Tony Blair,Labour,1997,2007,https://www.gov.uk/government/history/past-prime-ministers/tony-blair,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/965/s216_tony-blair-bw.jpg,"6 May 1953, Edinburgh, Scotland",1997 to 2007,Labour,Civil Partnership Act 2004: allowed legal recognition of civil partnership relationship between two people of the same sex.,"Tony Blair initiated reforms in the House of Commons, modernising the format of Prime Minister's Question Time."
4,Sir John Major KG CH,Conservative,1990,1997,https://www.gov.uk/government/history/past-prime-ministers/john-major,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/966/s216_John-Major.jpg,"29 March 1943, St Helier, Carshalton, Surrey",1990 to 1997,Conservative,National Lottery Act 1993: licensed a body to run a National Lottery.\r\n\r\nCouncil Tax 1992: replaced the highly unpopular Poll Tax.,"In 1994 his government's representatives participated in the negotiation of a cease-fire in Northern Ireland.\r\n\r\nFollowing the death of Diana, Princess of Wales, in 1997, John Major was appointed a special guardian to Princes William and Harry with responsibility for legal and administrative matters.\r\n\r\nA cricket enthusiast, in 2005 he was elected to the Committee of the Marylebone Cricket Club.\r\n\r\nHe is a successful after dinner speaker."
5,Baroness Margaret Thatcher,Conservative,1979,1990,https://www.gov.uk/government/history/past-prime-ministers/margaret-thatcher,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/967/s216_Margaret-Thatcher.jpg,"13 October 1925, Grantham, Lincolnshire","8 April 2013, London",1979 to 1990,Conservative,"Housing Act 1980: gave security of tenure, and the right to buy homes, to tenants of local authorities and other bodies."
6,James Callaghan,Labour,1976,1979,https://www.gov.uk/government/history/past-prime-ministers/james-callaghan,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/968/s216_James-Callaghan.jpg,"27 March 1912 , Copnor area of Portsmouth, Hampshire","26 March 2005, Ringmer, East Sussex",1976 to 1979,Labour,Dangerous Wild Animals Act 1976: regulated the keeping of dangerous wild animals.\r\n\r\nRace Relations Act 1976: serious amendments to the 1968 act to make fresh provision with respect to discrimination on racial grounds and relations between people of different racial groups.
7,Harold Wilson,Labour,1974,1976,https://www.gov.uk/government/history/past-prime-ministers/harold-wilson,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/969/s216_Harold-Wilson.jpg,"11 March 1916, Huddersfield, Yorkshire","23 May 1995, London","1974 to 1976, 1964 to 1970",Labour,"Murder (Abolition of Death Penalty) Act 1965: suspended the death penalty in England, Wales and Scotland.\r\n\r\nSexual Offences Act 1967: decriminalisation of certain homosexual offences."
8,Sir Edward Heath,Conservative,1970,1974,https://www.gov.uk/government/history/past-prime-ministers/edward-heath,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/970/s216_Edward-Heath.jpg,"9 July 1916, Broadstairs, Kent","17 July 2005, Salisbury, Wiltshire",1970 to 1974,Conservative,Industrial relations Act 1971 (repealed 1974): controversial legislation to curb union power.
9,Sir Alec Douglas-Home,Conservative,1963,1964,https://www.gov.uk/government/history/past-prime-ministers/alec-douglas-home,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/person/image/971/s216_Sir-Alec-Douglas-Home.jpg,"2 July 1903, London","9 October 1995, Berwickshire",1963 to 1964,Conservative,


---

In [195]:
df.to_csv("data/processed/prime-ministers-directory.csv", index=False)

In [196]:
merged_df.to_csv("data/processed/prime-ministers-directory_w_bio.csv", index=False)