# Imports

In [1]:
# All necessary imports
from bs4 import BeautifulSoup
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
import requests
import pandas as pd
import re
import pickle

# Getting Companies

In [2]:
# Load in pickled list of companies and corresponding tickers
with open('./generated_data/companies_n_tickers.pickle','rb') as f:
    companies_n_tickers = pickle.load(f)

# Scraping Ratings

## Sustainalytics

In [10]:
def get_sustainalytics_ratings(companies):
    
    # Creating dataframe for ratings data
    sustainalytics_ratings = pd.DataFrame(columns = ['company','sustainalytics_risk_rating'])
    
    # Set path to chromedriver
    PATH = "/Users/MichaelWirtz/Desktop/pathfile/chromedriver_2"
    # Define options 
    options = Options()
    # Remove pop up window
    options.add_argument("--headless")
    # Define driver
    driver = webdriver.Chrome(PATH, options=options)
    # # Define driver
    # driver = webdriver.Chrome(PATH)
    # Define url
    url= "https://www.sustainalytics.com/esg-ratings/"
    # Maximize window
    driver.maximize_window()
    # Get website
    driver.get(url)
    
    for company,ticker in companies:
        try:
            # Find search bar
            search_bar = driver.find_element_by_id("searchInput")
            # Sleep for 1 seconds
            time.sleep(1)
            # Clear search bar
            search_bar.clear()
            # Enter ticker into search bar
            search_bar.send_keys(ticker)
            # Search ticker
            search_bar.send_keys(Keys.ENTER)
            # Sleep for 3 seconds
            time.sleep(3)
            # Find company in options list
            search_popup = driver.find_element_by_class_name("companyName")
            # Sleep for 3 seconds
            time.sleep(3)
            # Click company for ratings
            search_popup.click()

            # Getting page content
            content = driver.page_source.encode('utf-8').strip()
            # Getting page content in html
            soup = BeautifulSoup(content,"html.parser")
            # Getting company details
            company_details = soup.find(class_="row company-details d-flex")
            # Getting risk ratings
            risk_rating = company_details.find(class_="col-xs-6 risk-rating-score").text
            # Getting risk category
            risk_category = company_details.find(class_="col-xs-6 risk-rating-assessment").text
            # Getting industry group position
            industry_group_position = (int(company_details.find(class_="industry-group-position").text) / int(company_details.find(class_="industry-group-positions-total").text)) * 100
            # Getting overall rank
            overall_rank = (int(company_details.find(class_="universe-position").text) / int(company_details.find(class_="universe-positions-total").text)) * 100
            # Appending new ratings to dataframe
            sustainalytics_ratings = sustainalytics_ratings.append({'company': company,
                                                                    'sustainalytics_risk_rating': risk_rating}, ignore_index=True)
            # Sleep for 1 seconds
            time.sleep(1)
            # Go back to previous page
            driver.back()

            # Sleep for 2 seconds
            time.sleep(2)

        except:
            sustainalytics_ratings = sustainalytics_ratings.append({'company': company,
                                                                    'sustainalytics_risk_rating': 0}, ignore_index=True)
    
    return sustainalytics_ratings

In [11]:
sustainalytics_ratings = get_sustainalytics_ratings(companies_n_tickers)

In [12]:
sustainalytics_ratings

Unnamed: 0,company,sustainalytics_risk_rating
0,3M Company,35.2
1,Abbott Laboratories,29.8
2,AbbVie Inc.,29.1
3,Abiomed,32.6
4,Accenture,11.3
...,...,...
490,Yum! Brands Inc,0
491,Zebra Technologies,0
492,Zimmer Biomet,0
493,Zions Bancorp,0


# Scraping MSCI Ratings Data

In [7]:
# Creating dataframe for ratings data
msci_ratings = pd.DataFrame(columns = ['company','msci_risk_rating'])

In [8]:
# Set path to chromedriver
PATH = "/Users/MichaelWirtz/Desktop/pathfile/chromedriver_2"
# Define options 
options = Options()
# Remove pop up window
options.add_argument("--headless")
# Set window size
options.add_argument('window-size=1200x600')
# Define driver
driver = webdriver.Chrome(PATH,options=options)
# Define url
url= "https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool"
# Get website
driver.get(url)

In [9]:
for company,ticker in companies:
    try:
        # Find search bar
        search_bar = driver.find_element_by_id("_esgratingsprofile_keywords")
        # Clear search bar
        search_bar.clear()
        # Enter ticker into search bar
        search_bar.send_keys(ticker)
        # Sleep for 2 seconds
        time.sleep(2)
        # Search ticker
        search_bar.send_keys(Keys.ENTER)
        # Sleep for 3 seconds
        time.sleep(3)
        # Find company in options list
        search_popup = driver.find_element_by_id("ui-id-1")
        # Click company for ratings
        search_popup.click()
        # Sleep for 4 seconds
        time.sleep(4)

        # Getting page content
        content = driver.page_source.encode('utf-8').strip()
        # Getting page content in html
        soup = BeautifulSoup(content,"html.parser")
        # Getting company details
        company_details = soup.find(class_="ratingdata-outercircle esgratings-profile-header-yellow")
        # Getting risk rating
        risk_rating = ((str(company_details)).split('-'))[-1][:4].replace('"','')
        risk_rating = re.sub(r"[^a-z]+", '', risk_rating)

        # Appending new ratings to dataframe
        msci_ratings = msci_ratings.append({'company': company,
                                            'msci_risk_rating': risk_rating},
                                             ignore_index=True)
        # Sleep for 1 seconds
        time.sleep(1)
        # Go back to previous page
        driver.back()

        # Sleep for 2 seconds
        time.sleep(2)
    
    except:
        # Appending new ratings to dataframe
        msci_ratings = msci_ratings.append({'company': company,
                                            'msci_risk_rating': 'NaN'},
                                             ignore_index=True)

In [10]:
msci_ratings

Unnamed: 0,company,msci_risk_rating
0,"Aaron’s, Inc.",
1,"Applied Optoelectronics, Inc.",
2,"AAON, Inc.",
3,"American Assets Trust, Inc.",
4,"Atlas Air Worldwide Holdings, Inc.",
...,...,...
1994,Zix Corp.,
1995,"Zumiez, Inc.",
1996,"Zuora, Inc.",
1997,"Zynerba Pharmaceuticals, Inc.",


# Merging Sustainalytics and MSCI Data

In [11]:
# Merging dataframes
ratings = sustainalytics_ratings.merge(msci_ratings, how='outer', on='company')

In [12]:
# Checking dataframe
ratings

Unnamed: 0,company,sustainalytics_risk_rating,risk_category,industry_group_position,overall_rank,msci_risk_rating
0,"Aaron’s, Inc.",45.3,Severe Risk,34.0278,92.0433,
1,"Applied Optoelectronics, Inc.",,,,,
2,"AAON, Inc.",,,,,
3,"American Assets Trust, Inc.",12.4,Low Risk,2.01005,2.51305,
4,"Atlas Air Worldwide Holdings, Inc.",,,,,
...,...,...,...,...,...,...
2006,Zix Corp.,,,,,
2007,"Zumiez, Inc.",,,,,
2008,"Zuora, Inc.",,,,,
2009,"Zynerba Pharmaceuticals, Inc.",,,,,


In [13]:
# Saving comprehensive dataframe
ratings.to_csv('esg_ratings_data.csv')