In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

## For newer cases (since 29 April 2020):

In [50]:
def details_new(soup):
    strips = list(soup.stripped_strings)
    details = {}

    caseid = strips.index('Case summary')
    date = strips.index('Judgment date')
    cit = strips.index('Neutral citation')
    justices = strips.index('Justices')
    pdf = strips.index('Judgment (PDF)')

    details['Name'] = strips[0].replace(' - The Supreme Court', '')
    if 'Case ID:' in strips[caseid-1] and 'UKSC' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID:', '')
    elif 'Case ID:' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID:', 'UKSC')
    else: 
        details['Case ID'] = strips[caseid-1]    
    details['Judgment date'] = ', '.join(strips[date+1:cit])
    details['Neutral citation'] = ', '.join(strips[cit+1:pdf])
    details['Justices'] = strips[justices+1]

    return details

## For older cases (prior to 29 April 2020): 

In [51]:
def details_old(soup):
    strips = list(soup.stripped_strings)
    details = {}

    if 'Neutral citation number' in strips:
        cit = strips.index('Neutral citation number')
    else: 
        cit = strips.index('Neutral citation number(s)')
    date = strips.index('Judgment date')
    caseid = strips.index('Case ID')
    justices = strips.index('Justices')

    details['Name'] = strips[0].replace(' - The Supreme Court', '')
    details['Case ID'] = ', '.join(strips[caseid+1:justices])
    details['Judgment date'] = ', '.join(strips[date+1:cit])
    details['Neutral citation'] = ', '.join(strips[cit+1:caseid])
    details['Justices'] = strips[justices+1]

    return details

## Main

In [52]:
def landing_to_dict(url):
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')

    strips = list(soup.stripped_strings)
    
    if 'Facts' in strips:
        details = details_new(soup)
    else: 
        details = details_old(soup)
     
    details['URL'] = url
    return details

## Additional details for new cases

In [9]:
html = requests.get('https://www.supremecourt.uk/cases/uksc-2020-0066.html').content
soup = BeautifulSoup(html, 'html.parser') 

strips = list(soup.stripped_strings)
print(strips)

['FirstPort Property Services Ltd (Appellant) v Settlers Court RTM Company and others (Respondents) - The Supreme Court', 'Skip to Primary Navigation', 'Skip to Content', 'Go to the home page', 'Go to Latest news', 'Go to the sitemap page', 'Go to search', 'Go to the Decided cases', 'Go to the Current cases', 'Go to the Complaints Policies and Judicial Conduct', 'Go to the terms and conditions page', 'Go to the contact us page', 'Go to the accessibility statement page', 'Skip to main content', 'Home', 'Accessibility', 'Education', 'Press office', 'Contact us', 'FAQs', 'Careers', 'Cymraeg', 'Bookmark', 'Case details', 'Home', 'Current cases', 'Decided cases', 'Court procedures', 'Visiting The Court', 'About The Supreme Court', 'Latest', 'news', 'Current cases', 'FirstPort Property Services Ltd (Appellant) v Settlers Court RTM Company and others (Respondents)', 'Case ID: 2020/0066', 'Case summary', 'Issue', 'Where a company incorporated by leaseholders in a block of flats acquires the ri

In [25]:
def addl_details(strips):
    details = {}
    
    caseid = strips.index('Case summary')
    if 'Issue' in strips:
        issues = strips.index('Issue')
    else: 
        issues = strips.index('Issue(s)')
    facts = strips.index('Facts')
    if 'Judgment appealed' in strips: 
        prev = strips.index('Judgment appealed')
    else: 
        prev = strips.index('Parties')
    start = strips.index('Hearing start date')
    finish = strips.index('Hearing finish date')

    if 'Case ID:' in strips[caseid-1] and 'UKSC' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID: ', '')
    elif 'Case ID:' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID:', 'UKSC')
    else: 
        details['Case ID'] = strips[caseid-1]
    details['Issue'] = ' '.join(strips[issues+1:facts])
    details['Facts'] = ' '.join(strips[facts+1:prev])
    details['Judgment appealed'] = strips[prev+1]
    details['Hearing start date'] = strips[start+1]
    details['Hearing finish date'] = strips[finish+1]

    return details

def addl_main(url):
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser') 
    strips = list(soup.stripped_strings)
    if 'Facts' in strips:
        details = addl_details(strips)
    else: 
        return 'No additional details available for this case'
    details['URL'] = url
    return details

In [26]:
addl_main('https://www.supremecourt.uk/cases/uksc-2021-0160.html')

{'Case ID': 'UKSC 2021/0160',
 'Issue': 'Did the Supreme Court wrongly decide that Mr Crosland’s disclosure of the result of the Heathrow appeal, in breach of an embargo on the Court’s judgment, constituted a contempt of court? Did the Court then wrongly impose a fine of £5,000 on Mr Crosland, and wrongly order him to pay the Attorney General’s costs in the sum of £15,000?',
 'Facts': 'On 7 and 8 October 2020, the Supreme Court heard an appeal in the case of R (Friends of the Earth Ltd and others) v Heathrow Airport Limited [2020] UKSC 53 ("the Heathrow appeal"). Mr Timothy Crosland, an unregistered barrister, represented the charity Plan B Earth in those proceedings, in his capacity as a director of Plan B Earth. On 9 December 2020, a copy of the Supreme Court’s draft judgment was circulated to the parties’ representatives, to enable them to make suggestions for the correction of any errors, to prepare submissions on consequential matters, and to prepare themselves for the publication

In [20]:
addl_main('https://www.supremecourt.uk/cases/uksc-2012-0088.html')

'No additional details available for this case'

## All together for tests

In [4]:
def details_new(strips):
    details = {}

    caseid = strips.index('Case summary')
    date = strips.index('Judgment date')
    cit = strips.index('Neutral citation')
    justices = strips.index('Justices')
    pdf = strips.index('Judgment (PDF)')

    details['Name'] = strips[0].replace('- The Supreme Court', '')
    if 'Case ID:' in strips[caseid-1] and 'UKSC' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID: ', '')
    elif 'Case ID:' in strips[caseid-1]: 
        details['Case ID'] = strips[caseid-1].replace('Case ID:', 'UKSC')
    else: 
        details['Case ID'] = strips[caseid-1]
    details['Judgment date'] = ', '.join(strips[date+1:cit])
    details['Neutral citation'] = ', '.join(strips[cit+1:pdf])
    details['Justices'] = strips[justices+1]

    return details

def details_old(strips):
    details = {}

    if 'Neutral citation number' in strips:
        cit = strips.index('Neutral citation number')
    else: 
        cit = strips.index('Neutral citation number(s)')
    date = strips.index('Judgment date')
    caseid = strips.index('Case ID')
    justices = strips.index('Justices')

    details['Name'] = strips[0].replace('- The Supreme Court', '')
    details['Case ID'] = ', '.join(strips[caseid+1:justices])
    details['Judgment date'] = ', '.join(strips[date+1:cit])
    details['Neutral citation'] = ', '.join(strips[cit+1:caseid])
    details['Justices'] = strips[justices+1]

    return details

def landing_to_dict(url):
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser') 

    strips = list(soup.stripped_strings)
    
    if 'Facts' in strips:
        details = details_new(strips)
    else: 
        details = details_old(strips)
     
    details['URL'] = url
    return details

In [27]:
test_links = {2022: 'https://www.supremecourt.uk/cases/uksc-2021-0062.html', 
              2021: 'https://www.supremecourt.uk/cases/uksc-2019-0183.html',
              2020: 'https://www.supremecourt.uk/cases/uksc-2018-0100.html', 
              2019: 'https://www.supremecourt.uk/cases/uksc-2017-0175.html', 
              2018: 'https://www.supremecourt.uk/cases/uksc-2015-0022.html', 
              2017: 'https://www.supremecourt.uk/cases/uksc-2015-0057.html', 
              2016: 'https://www.supremecourt.uk/cases/uksc-2014-0247.html', 
              2015: 'https://www.supremecourt.uk/cases/uksc-2013-0280.html', 
              2014: 'https://www.supremecourt.uk/cases/uksc-2012-0088.html', 
              2013: 'https://www.supremecourt.uk/cases/uksc-2012-0060.html', 
              2012: 'https://www.supremecourt.uk/cases/uksc-2011-0180.html', 
              2011: 'https://www.supremecourt.uk/cases/uksc-2009-0177.html', 
              2010: 'https://www.supremecourt.uk/cases/uksc-2009-0018.html', 
              2009: 'https://www.supremecourt.uk/cases/uksc-2009-0085.html'}

In [28]:
test_df = pd.DataFrame.from_dict(test_links, 'index')
test_df

Unnamed: 0,0
2022,https://www.supremecourt.uk/cases/uksc-2021-00...
2021,https://www.supremecourt.uk/cases/uksc-2019-01...
2020,https://www.supremecourt.uk/cases/uksc-2018-01...
2019,https://www.supremecourt.uk/cases/uksc-2017-01...
2018,https://www.supremecourt.uk/cases/uksc-2015-00...
2017,https://www.supremecourt.uk/cases/uksc-2015-00...
2016,https://www.supremecourt.uk/cases/uksc-2014-02...
2015,https://www.supremecourt.uk/cases/uksc-2013-02...
2014,https://www.supremecourt.uk/cases/uksc-2012-00...
2013,https://www.supremecourt.uk/cases/uksc-2012-00...


In [63]:
output = test_df[0].map(landing_to_dict)

In [64]:
print(output[2022])

{'Name': 'R (on the application of O (a minor, by her litigation friend AO)) (Appellant) v Secretary of State for the Home Department (Respondent) ', 'Case ID': 'UKSC 2021/0062', 'Judgment date': '2 February 2022', 'Neutral citation': '[2022] UKSC 3', 'Justices': 'Lord Hodge, Lord Briggs, Lady Arden, Lord Stephens, Lady Rose', 'URL': 'https://www.supremecourt.uk/cases/uksc-2021-0062.html'}


In [65]:
print(output[2015])

{'Name': 'Cavendish Square Holding BV (Appellant) v Talal El Makdessi (Respondent)', 'Case ID': 'UKSC 2013/0280', 'Judgment date': '04 Nov 2015', 'Neutral citation': '[2015] UKSC 67', 'Justices': 'Lord Neuberger, Lord Mance, Lord Clarke, Lord Sumption, Lord Carnwath, Lord Toulson, Lord Hodge', 'URL': 'https://www.supremecourt.uk/cases/uksc-2013-0280.html'}


In [66]:
print(output[2009])

{'Name': 'AM (Somalia) and others (VS (Sri Lanka)) (Appellant) v Entry Clearance Officer (Respondent)', 'Case ID': 'UKSC 2009/0085', 'Judgment date': '16 Dec 2009', 'Neutral citation': '[2009] UKSC 16', 'Justices': 'Lord Hope, Lord Rodger, Lord Brown, Lord Collins, Lord Kerr', 'URL': 'https://www.supremecourt.uk/cases/uksc-2009-0085.html'}


In [67]:
print(output[2010])

{'Name': "R (on the application of Hani El Sayed Sabaei Youssef) (Respondent) v Her Majesty's Treasury (Appellants)", 'Case ID': 'UKSC 2009/0018', 'Judgment date': '27 Jan 2010', 'Neutral citation': '[2010] UKSC 1, [2010] UKSC 2, [2010] UKSC 5', 'Justices': 'Lord Phillips, Lord Hope, Lord Rodger, Lord Walker, Lady Hale, Lord Brown, Lord Mance', 'URL': 'https://www.supremecourt.uk/cases/uksc-2009-0018.html'}


In [68]:
print(output[2012])

{'Name': 'R (on the application of Halligen) (Appellant) v Secretary of State for the Home Department (Respondent)', 'Case ID': 'UKSC 2011/0180', 'Judgment date': '23 May 2012', 'Neutral citation': '[2012] UKSC 20', 'Justices': 'Lord Phillips, Lady Hale, Lord Mance, Lord Kerr, Lord Wilson', 'URL': 'https://www.supremecourt.uk/cases/uksc-2011-0180.html'}


In [69]:
print(output[2019])

{'Name': 'Lachaux (Respondent) v Independent Print Ltd and another (Appellants)', 'Case ID': 'UKSC 2017/0175', 'Judgment date': '12 Jun 2019', 'Neutral citation': '[2019] UKSC 27', 'Justices': 'Lord Kerr, Lord Wilson, Lord Sumption, Lord Hodge, Lord Briggs', 'URL': 'https://www.supremecourt.uk/cases/uksc-2017-0175.html'}


In [70]:
print(output[2021])

{'Name': "Tinkler (Respondent) v Commissioners for Her Majesty's Revenue and Customs (Appellant)", 'Case ID': 'UKSC 2019/0183', 'Judgment date': '30 July 2021', 'Neutral citation': '[2021] UKSC 39', 'Justices': 'Lord Hodge, Lord Briggs, Lady Arden, Lord Burrows, Lady Rose', 'URL': 'https://www.supremecourt.uk/cases/uksc-2019-0183.html'}


In [7]:
landing_to_dict('https://www.supremecourt.uk/cases/uksc-2011-0110.html')

{'Name': 'Societe Generale, London Branch (Respondent) v Geys (Appellant)',
 'Case ID': 'UKSC 2011/0110',
 'Judgment date': '19 Dec 2012',
 'Neutral citation': '[2012] UKSC 63',
 'Justices': 'Lord Hope, Lady Hale, Lord Wilson, Lord Sumption, Lord Carnwath',
 'URL': 'https://www.supremecourt.uk/cases/uksc-2011-0110.html'}

In [6]:
html = requests.get('https://www.supremecourt.uk/cases/uksc-2011-0110.html').content
soup = BeautifulSoup(html, 'html.parser') 

strips = list(soup.stripped_strings)
print(strips)

['Societe Generale, London Branch (Respondent) v Geys (Appellant)- The Supreme Court', 'Skip to Primary Navigation', 'Skip to Content', 'Go to the home page', 'Go to Latest news', 'Go to the sitemap page', 'Go to search', 'Go to the Decided cases', 'Go to the Current cases', 'Go to the Complaints Policies and Judicial Conduct', 'Go to the terms and conditions page', 'Go to the contact us page', 'Go to the accessibility statement page', 'Skip to main content', 'Home', 'Accessibility', 'Education', 'Press office', 'Contact us', 'FAQs', 'Careers', 'Cymraeg', 'Bookmark', 'Case details', 'Home', 'Decided cases', 'Decided cases', 'Court procedures', 'Visiting The Court', 'About The Supreme Court', 'Latest', 'news', 'Current cases', 'Societe Generale, London Branch (Respondent) v Geys (Appellant)', 'Judgment date', '19 Dec 2012', 'Neutral citation number', '[2012] UKSC 63', 'Case ID', 'UKSC 2011/0110', 'Justices', 'Lord Hope, Lady Hale, Lord Wilson, Lord Sumption, Lord Carnwath', 'Judgment de