# Analysis of PLC financials listed on the London Stock Exchange



This pipeline aims to scrape key company financials off the London Stock Exchange website. The aim is to create a large database of PLCs and their financial data including historical stock prices to conduct in-depth ML/DL analysis into what creates a good long term stock pick.


Importing libraries

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import html
import json

## Example of scraping data - STAN

Scraping company financials from the LSE website. This first example collects data on Standard Chartered PLC (STAN)

In [2]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
url = 'https://www.londonstockexchange.com/stock/{}/{}'

stock = 'STAN/standard-chartered-plc'
info = 'fundamentals'

full_url = url.format(stock, info)

print(full_url)

r = requests.get(full_url)
soup = BeautifulSoup(r.text, 'lxml')
data = json.loads(soup.select_one('#ng-lseg-state').string.replace('&q;','"'))
#print(len(data['sortedComponents']['content']))
#print(data['sortedComponents']['content'][1]['status']['childComponents'][1]['content'].keys())
fundamentals = data['sortedComponents']['content'][1]['status']['childComponents'][1]['content']['fundamentals']

company_data = {}

for key1, value1 in fundamentals.items():
    #print(key1)
    for key2,value2 in value1.items():
           
            for item in value2:
                
                #print(item)
                for key3, value3 in item.items():
                    if type(value3) == dict:
                        if key3 in company_data:
                            company_data[key3].append(value3['value'])
                        else:
                            company_data.setdefault(key3, [value3['value']])
            
#print(company_data)                             
columns = company_data.keys()



df = pd.DataFrame.from_dict(company_data, orient='index')
df = df.transpose()
df = df.drop_duplicates(subset='dateyearend', keep='first', inplace=False)
df.insert(0, 'Company', stock)
df

https://www.londonstockexchange.com/stock/STAN/standard-chartered-plc/fundamentals


Unnamed: 0,Company,dateyearend,currency,totaldividend,taxes,revenue,provisionsforlosses,profitforperiod,pretaxprofit,posttaxprofit,...,epsgrowthpercentual,dividendcover,revenuepershare,pretaxprofitpershare,operatingmarging,returnoncapitalemployed,dividendyield,dividendpersharegrowth,navpershare,netgearing
0,STAN/standard-chartered-plc,2016-12-31,USD,-,-,19679000000,-,-191000000,446000000,-154000000,...,-,-,5.9633,0.1352,-,-,-,-,13.307,-
1,STAN/standard-chartered-plc,2017-12-31,USD,0.11,-,21377000000,-,1268000000,2147000000,1000000000,...,-,2.1364,6.4916,0.652,-,-,1.0609,-,14.11,-
2,STAN/standard-chartered-plc,2018-12-31,USD,0.21,-,24038000000,-,1109000000,2307000000,868000000,...,-20.4255,0.8905,7.271,0.6978,-,-,2.6926,90.9091,13.6201,-
3,STAN/standard-chartered-plc,2019-12-31,USD,0.22,-,25188000000,-,2340000000,3413000000,2040000000,...,204.8128,2.5909,7.7359,1.0482,-,-,2.3395,4.7619,14.1208,-
4,STAN/standard-chartered-plc,2020-12-31,USD,0.09,-,21050000000,-,751000000,2401000000,1539000000,...,-81.8042,1.1556,6.6432,0.7577,-,-,1.4204,-59.2027,13.9202,-


As seen, this set of code creates a dataframe containing 52 columns, data found in the fundamentals section of the STAN listing on the LSE. We shall now create a function to do this for any company. First, we need to create a main dataframe that will contain all data for every company that we scrape.

In [7]:
columns = company_data.keys()
empty = []
main_df = pd.DataFrame(columns=columns)
main_df.insert(0, 'Company', value = empty)

In [8]:
main_df

Unnamed: 0,Company,dateyearend,currency,totaldividend,taxes,revenue,provisionsforlosses,profitforperiod,pretaxprofit,posttaxprofit,...,epsgrowthpercentual,dividendcover,revenuepershare,pretaxprofitpershare,operatingmarging,returnoncapitalemployed,dividendyield,dividendpersharegrowth,navpershare,netgearing


## Creating a function

As seen, this set of code creates a dataframe containing 52 columns, data found in the fundamentals section of the STAN listing on the LSE. We shall now create a function to do this for any company. `main_df` is the dataframe that will contain all our data


In [9]:
def my_scraper(stock):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
    url = 'https://www.londonstockexchange.com/stock/{}/{}'

    
    info = 'fundamentals'

    full_url = url.format(stock, info)

    print(full_url)

    r = requests.get(full_url)
    soup = BeautifulSoup(r.text, 'lxml')
    data = json.loads(soup.select_one('#ng-lseg-state').string.replace('&q;','"'))
    #print(len(data['sortedComponents']['content']))
    #print(data['sortedComponents']['content'][1]['status']['childComponents'][1]['content'].keys())
    fundamentals = data['sortedComponents']['content'][1]['status']['childComponents'][1]['content']['fundamentals']

    company_data = {}

    for key1, value1 in fundamentals.items():
        #print(key1)
        for key2,value2 in value1.items():
           
            for item in value2:
                
                    #print(item)
                    for key3, value3 in item.items():
                            if type(value3) == dict:
                                if key3 in company_data:
                                    company_data[key3].append(value3['value'])
                                else:
                                    company_data.setdefault(key3, [value3['value']])
            
    #print(company_data)                             
    columns = company_data.keys()



    df = pd.DataFrame.from_dict(company_data, orient='index')
    df = df.transpose()
    df = df.drop_duplicates(subset='dateyearend', keep='first', inplace=False)
    df.insert(0, 'Company', stock)
    return df


In [20]:
re = my_scraper('EXPN/EXPERIAN-PLC')
main_df = main_df.append(re)
main_df

https://www.londonstockexchange.com/stock/EXPN/EXPERIAN-PLC/fundamentals


Unnamed: 0,Company,dateyearend,currency,totaldividend,taxes,revenue,provisionsforlosses,profitforperiod,pretaxprofit,posttaxprofit,...,epsgrowthpercentual,dividendcover,revenuepershare,pretaxprofitpershare,operatingmarging,returnoncapitalemployed,dividendyield,dividendpersharegrowth,navpershare,netgearing
0,EXPN/EXPERIAN-PLC,2015-03-31,USD,0.3925,-,4810000000,-,772000000,1001000000,746000000,...,2.8646,1.9592,4.9232,1.0246,24.0541,17.3724,2.3742,8.0325,-3.2748,112.8859
1,EXPN/EXPERIAN-PLC,2016-03-31,USD,0.4,-,4550000000,-,752000000,1023000000,760000000,...,-0.7782,1.9975,4.7365,1.0649,23.3187,18.2086,2.2311,3.0997,-3.3391,122.2772
2,EXPN/EXPERIAN-PLC,2017-03-31,USD,0.415,-,4335000000,-,865000000,1067000000,808000000,...,17.4966,2.0843,4.6117,1.1351,24.7982,19.1503,2.0393,2.9302,-3.2516,112.2016
3,EXPN/EXPERIAN-PLC,2018-03-31,USD,0.4475,-,4662000000,-,815000000,986000000,837000000,...,-3.4745,2.0581,5.084,1.0752,23.4878,18.5373,2.0642,3.871,-3.7224,128.3149
4,EXPN/EXPERIAN-PLC,2019-03-31,USD,0.465,-,4861000000,-,701000000,954000000,698000000,...,-13.4983,1.6538,5.3772,1.0553,23.8017,19.3884,1.7074,7.8509,-3.6461,128.0242


## Creating a list of companies to analyse

In [10]:
def my_lister(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
    

    full_url = url

    #print(full_url)

    r = requests.get(full_url)
    soup = BeautifulSoup(r.text, 'lxml')
    data = json.loads(soup.select_one('#ng-lseg-state').string.replace('&q;','"'))
    #print(len(data['sortedComponents']['content']))
    bruh  = data['sortedComponents']
    companies_list = bruh['content'][2]['status']['childComponents'][0]['content']['ftseIndexInstruments']['value']
    #[1]['ftseIndexInstruments']
    companies = {}

    for i in companies_list:
        x = i['tidm']['value']
        y = i['issuername']['value']
        if '&a;' in y: 
            y = y.replace("&a;","&")
        y = y.replace(" ", "-")
        companies[x] = y
    return companies

In [27]:
numOfPages = [1,2,3,4,5,6]
full_companies_list = {}
url = 'https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page={}'
for i in numOfPages:
    num = str(i)
    full_url = url.format(num)
    companies = my_lister(full_url)
    full_companies_list.update(companies)
    
#full_companies_list
full_companies_list.pop("ENT")
full_companies_list.pop("RKT")
full_companies_list

{'JET': 'JUST-EAT-TAKEAWAY.COM-N.V.',
 'IAG': 'INTERNATIONAL-CONSOLIDATED-AIRLINES-GROUP-S.A.',
 'OCDO': 'OCADO-GROUP-PLC',
 'AVV': 'AVEVA-GROUP-PLC',
 'SKG': 'SMURFIT-KAPPA-GROUP-PLC',
 'CPG': 'COMPASS-GROUP-PLC',
 'INF': 'INFORMA-PLC',
 'RSW': 'RENISHAW-PLC',
 'EXPN': 'EXPERIAN-PLC',
 'HSBA': 'HSBC-HOLDINGS-PLC',
 'AVST': 'AVAST-PLC',
 'SMDS': 'SMITH-(DS)-PLC',
 'WEIR': 'WEIR-GROUP-PLC',
 'IHG': 'INTERCONTINENTAL-HOTELS-GROUP-PLC',
 'RMV': 'RIGHTMOVE-PLC',
 'AUTO': 'AUTO-TRADER-GROUP-PLC',
 'LLOY': 'LLOYDS-BANKING-GROUP-PLC',
 'ITRK': 'INTERTEK-GROUP-PLC',
 'ADM': 'ADMIRAL-GROUP-PLC',
 'SMT': 'SCOTTISH-MORTGAGE-INVESTMENT-TRUST-PLC',
 'SSE': 'SSE-PLC',
 'FLTR': 'FLUTTER-ENTERTAINMENT-PLC',
 'JD.': 'JD-SPORTS-FASHION-PLC',
 'PHNX': 'PHOENIX-GROUP-HOLDINGS-PLC',
 'BARC': 'BARCLAYS-PLC',
 'STAN': 'STANDARD-CHARTERED-PLC',
 'ICP': 'INTERMEDIATE-CAPITAL-GROUP-PLC',
 'PRU': 'PRUDENTIAL-PLC',
 'HLMA': 'HALMA-PLC',
 'ULVR': 'UNILEVER-PLC',
 'HL.': 'HARGREAVES-LANSDOWN-PLC',
 'SDR': 'SCHRODER

In [29]:
for key, value in full_companies_list.items():
    stock = '{}/{}'
    stock = stock.format(key, value)
    print(stock)
    data = my_scraper(stock)
    main_df = main_df.append(data, ignore_index=True)
    #print(main_df)
    
main_df

JET/JUST-EAT-TAKEAWAY.COM-N.V.
https://www.londonstockexchange.com/stock/JET/JUST-EAT-TAKEAWAY.COM-N.V./fundamentals
IAG/INTERNATIONAL-CONSOLIDATED-AIRLINES-GROUP-S.A.
https://www.londonstockexchange.com/stock/IAG/INTERNATIONAL-CONSOLIDATED-AIRLINES-GROUP-S.A./fundamentals
OCDO/OCADO-GROUP-PLC
https://www.londonstockexchange.com/stock/OCDO/OCADO-GROUP-PLC/fundamentals
AVV/AVEVA-GROUP-PLC
https://www.londonstockexchange.com/stock/AVV/AVEVA-GROUP-PLC/fundamentals
SKG/SMURFIT-KAPPA-GROUP-PLC
https://www.londonstockexchange.com/stock/SKG/SMURFIT-KAPPA-GROUP-PLC/fundamentals
CPG/COMPASS-GROUP-PLC
https://www.londonstockexchange.com/stock/CPG/COMPASS-GROUP-PLC/fundamentals
INF/INFORMA-PLC
https://www.londonstockexchange.com/stock/INF/INFORMA-PLC/fundamentals
RSW/RENISHAW-PLC
https://www.londonstockexchange.com/stock/RSW/RENISHAW-PLC/fundamentals
EXPN/EXPERIAN-PLC
https://www.londonstockexchange.com/stock/EXPN/EXPERIAN-PLC/fundamentals
HSBA/HSBC-HOLDINGS-PLC
https://www.londonstockexchange.co

LGEN/LEGAL-&-GENERAL-GROUP-PLC
https://www.londonstockexchange.com/stock/LGEN/LEGAL-&-GENERAL-GROUP-PLC/fundamentals
AV./AVIVA-PLC
https://www.londonstockexchange.com/stock/AV./AVIVA-PLC/fundamentals
BP./BP-PLC
https://www.londonstockexchange.com/stock/BP./BP-PLC/fundamentals
BRBY/BURBERRY-GROUP-PLC
https://www.londonstockexchange.com/stock/BRBY/BURBERRY-GROUP-PLC/fundamentals
ABF/ASSOCIATED-BRITISH-FOODS-PLC
https://www.londonstockexchange.com/stock/ABF/ASSOCIATED-BRITISH-FOODS-PLC/fundamentals
ANTO/ANTOFAGASTA-PLC
https://www.londonstockexchange.com/stock/ANTO/ANTOFAGASTA-PLC/fundamentals
AZN/ASTRAZENECA-PLC
https://www.londonstockexchange.com/stock/AZN/ASTRAZENECA-PLC/fundamentals
AHT/ASHTEAD-GROUP-PLC
https://www.londonstockexchange.com/stock/AHT/ASHTEAD-GROUP-PLC/fundamentals
RIO/RIO-TINTO-PLC
https://www.londonstockexchange.com/stock/RIO/RIO-TINTO-PLC/fundamentals
SMIN/SMITHS-GROUP-PLC
https://www.londonstockexchange.com/stock/SMIN/SMITHS-GROUP-PLC/fundamentals
VOD/VODAFONE-GROUP

Unnamed: 0,Company,dateyearend,currency,totaldividend,taxes,revenue,provisionsforlosses,profitforperiod,pretaxprofit,posttaxprofit,...,epsgrowthpercentual,dividendcover,revenuepershare,pretaxprofitpershare,operatingmarging,returnoncapitalemployed,dividendyield,dividendpersharegrowth,navpershare,netgearing
0,EXPN/EXPERIAN-PLC,2015-03-31,USD,0.3925,-,4810000000,-,772000000,1001000000,746000000,...,2.8646,1.9592,4.9232,1.0246,24.0541,17.3724,2.3742,8.0325,-3.2748,112.8859
1,EXPN/EXPERIAN-PLC,2016-03-31,USD,0.4,-,4550000000,-,752000000,1023000000,760000000,...,-0.7782,1.9975,4.7365,1.0649,23.3187,18.2086,2.2311,3.0997,-3.3391,122.2772
2,EXPN/EXPERIAN-PLC,2017-03-31,USD,0.415,-,4335000000,-,865000000,1067000000,808000000,...,17.4966,2.0843,4.6117,1.1351,24.7982,19.1503,2.0393,2.9302,-3.2516,112.2016
3,EXPN/EXPERIAN-PLC,2018-03-31,USD,0.4475,-,4662000000,-,815000000,986000000,837000000,...,-3.4745,2.0581,5.084,1.0752,23.4878,18.5373,2.0642,3.871,-3.7224,128.3149
4,EXPN/EXPERIAN-PLC,2019-03-31,USD,0.465,-,4861000000,-,701000000,954000000,698000000,...,-13.4983,1.6538,5.3772,1.0553,23.8017,19.3884,1.7074,7.8509,-3.6461,128.0242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,AAL/ANGLO-AMERICAN-PLC,2016-12-31,USD,-,-,21378000000,-,1926000000,2346000000,1648000000,...,-,-,16.5525,1.8165,7.7931,4.9803,-,-,11.267,37.4316
1374,AAL/ANGLO-AMERICAN-PLC,2017-12-31,USD,1.02,-,26243000000,-,4059000000,4938000000,3492000000,...,100.5479,2.4314,20.5827,3.8729,21.0685,16.473,4.9509,-,13.9889,18.1569
1375,AAL/ANGLO-AMERICAN-PLC,2018-12-31,USD,1,-,27610000000,-,4373000000,5461000000,3645000000,...,12.9032,2.8,21.7573,4.3034,21.9812,17.979,4.4699,114.2857,14.5994,10.1873
1376,AAL/ANGLO-AMERICAN-PLC,2019-12-31,USD,1.09,-,29870000000,-,4582000000,5757000000,4193000000,...,0.3571,2.578,23.65,4.5582,20.6763,18.1386,3.8001,10.4167,15.8314,17.7011


Unnamed: 0,Company,dateyearend,currency,totaldividend,taxes,revenue,provisionsforlosses,profitforperiod,pretaxprofit,posttaxprofit,...,epsgrowthpercentual,dividendcover,revenuepershare,pretaxprofitpershare,operatingmarging,returnoncapitalemployed,dividendyield,dividendpersharegrowth,navpershare,netgearing
