In [1]:
from bs4 import BeautifulSoup
headers = {"User-Agent": "Chrome/98.0.4758.102"}
#r = requests.get(url, headers=headers)
import pandas as pd
import requests
import ipywidgets as widgets
from ipywidgets import interact
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


requests = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
requests.mount('http://', adapter)
requests.mount('https://', adapter)


In [2]:
try:
    '''
    Requesting data from a given URL and saving it as text
    '''
    web_data = requests.get('https://finance.yahoo.com/quote/MMM/sustainability',headers=headers).text
    print('Success')
except:
    print('Could not get URL')

Success


In [3]:
soup = BeautifulSoup(web_data, "html.parser")

In [4]:
# Scraping the overall ESG score
esg_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
print(esg_score)

<div class="Fz(36px) Fw(600) D(ib) Mend(5px)">35</div>


In [5]:
data_point = esg_score.text
print(data_point)

35


In [6]:
# Scraping the specific ESG scores
elements = []
scores = soup.find_all('div', {'class':'D(ib) Fz(23px) smartphone_Fz(22px) Fw(600)'})
for score in scores:
    elements.append(score.text)

In [7]:
print(elements)

['13', '14', '8']


In [8]:
# Scraping controversy level data
controversy_score = soup.find('div', {'class':'D(ib) Fz(36px) Fw(500)'})
ctr_data = controversy_score.text
print(ctr_data)

3


In [9]:
df = pd.DataFrame({'Total ESG Score': data_point, 
                 'Environment': elements[0], 
                 'Social': elements[1], 
                 'Governance': elements[2], 
                 'Controversy Score': ctr_data}, 
                 index = ['MMM'])
df

Unnamed: 0,Total ESG Score,Environment,Social,Governance,Controversy Score
MMM,35,13,14,8,3


In [10]:
S_PFirms = pd.read_csv('S_PFirms.csv')
S_PFirms

Unnamed: 0,Ticker,Securities,Sector,Industry,Headquarter
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota"
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin"
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois"
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois"
4,ABMD,Abiomed,Health Care,Health Care Equipment,"Danvers, Massachusetts"
...,...,...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary,Restaurants,"Louisville, Kentucky"
501,ZBRA,Zebra,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois"
502,ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana"
503,ZION,Zions Bancorp,Financials,Regional Banks,"Salt Lake City, Utah"


In [11]:
tickers = S_PFirms['Ticker']
#get rest of the data
#tickers=S_PFirms.iloc[198:, [S_PFirms]] 

In [12]:
type(tickers)


pandas.core.series.Series

In [13]:
tickers.head()

0     MMM
1     AOS
2     ABT
3    ABBV
4    ABMD
Name: Ticker, dtype: object

In [14]:
def yahoo_finance_esg(ticker):
    '''
    Scraping for the ESG scores for a company and appending them to a dataframe.
    
    
    This function takes a ticker (firm's abbreviation) as input and then generate the Yahoo! Finace Sustainability URL. 
    If the URL is valid, BeautifulSoup will then parse the text and save the desired data points, 
    All data then will be saved in a dataframe.
    '''
    elements = []        
    web_data = requests.get('https://finance.yahoo.com/quote/'+ticker+'/sustainability?p='+ticker, headers=headers).text        
    soup = BeautifulSoup(web_data, "html.parser")
    esg_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
    try:
        data_point = esg_score.text # if data_point == None:
    except:
        return None
    controversy_score = soup.find('div', {'class':'D(ib) Fz(36px) Fw(500)'})
    ctr_data = controversy_score.text
    scores = soup.find_all('div', {'class':'D(ib) Fz(23px) smartphone_Fz(22px) Fw(600)'})
    for score in scores:
        elements.append(score.text)
    
    df_esg = pd.DataFrame({'Total ESG Score': data_point, 
                   'Environment': elements[0], 
                   'Social': elements[1], 
                   'Governance': elements[2], 
                   'Controversy Score': ctr_data}, 
                   index = [ticker])
    return df_esg

In [15]:
full_data = pd.DataFrame({'Total ESG Score': [], 
                   'Environment': [], 
                   'Social': [], 
                   'Governance':[], 
                   'Controversy Score': []})

for i in tickers:
    '''
    Runs the yahoo_finance() function for each company in the tickers series and writes all the data in a consolidated dataframe.
    
    If the data is not retrieved successfully, print no data for + ticker.
    '''

    individual_record = yahoo_finance_esg(i)
    if individual_record is not None:
        print(" retrieved" + i) # Progress checker
        full_data = full_data.append(individual_record)
    else:
        print("No data for  " +  i) # Progress checker
print(full_data)

 retrievedMMM
 retrievedAOS
 retrievedABT
 retrievedABBV
No data for ABMD
 retrievedACN
 retrievedATVI
 retrievedADM
 retrievedADBE
 retrievedADP
 retrievedAAP
 retrievedAES
 retrievedAFL
 retrievedA
 retrievedAIG
 retrievedAPD
 retrievedAKAM
No data for ALK
 retrievedALB
 retrievedARE
No data for ALGN
 retrievedALLE
 retrievedLNT
 retrievedALL
 retrievedGOOGL
No data for GOOG
 retrievedMO
 retrievedAMZN
No data for AMCR
No data for AMD
 retrievedAEE
 retrievedAAL
 retrievedAEP
 retrievedAXP
 retrievedAMT
 retrievedAWK
 retrievedAMP
 retrievedABC
 retrievedAME
 retrievedAMGN
 retrievedAPH
 retrievedADI
 retrievedANSS
 retrievedANTM
No data for AON
No data for APA
 retrievedAAPL
 retrievedAMAT
 retrievedAPTV
 retrievedANET
 retrievedAIZ
 retrievedT
 retrievedATO
 retrievedADSK
 retrievedAZO
 retrievedAVB
 retrievedAVY
No data for BKR
 retrievedBLL
 retrievedBAC
 retrievedBBWI
 retrievedBAX
 retrievedBDX
 retrievedWRB
No data for BRK.B
 retrievedBBY
 retrievedBIO
No data for TECH
 retrie

In [16]:
full_data.to_csv('esg_ratings.csv')