In [1]:
#pip install py-msci-esg

from msci_esg.ratefinder import ESGRateFinder
import requests
from selenium import webdriver 
from selenium.webdriver.support.select import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException 
import warnings 
from selenium.webdriver.chrome.options import Options
warnings.filterwarnings("ignore")

options=webdriver.ChromeOptions()
options.add_argument('headless')#창 없이 실행
options.add_argument('window-size=1920x1080')##창없으나 노트북 모니터 해상도처럼 크롬이 행동
options.add_argument('disable-gpu')##GPU 가속 제어
#User 탐지 떴을 때
# #options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Window 64 X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")


In [2]:
# To get ESG Rating, parse HTML for 
# https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool?p_p_id=esgratingsprofile&p_p_lifecycle=20&p_p_state=normal&p_p_mode=view&p_p_resource_id=searchEsgRatingsProfiles&p_p_cacheability=cacheLevelPage&_esgratingsprofile_keywords={SYMBOL}/issuer/{encodedTitle}/{issuerID}

class ESGRateFinder:
    def __init__(self,debug=False):
        self.debug = debug 

        self.MSCI_GET_STOCK_ID_URL = (
            "https://www.msci.com/our-solutions/esg-investing/"
            "esg-ratings/esg-ratings-corporate-search-tool?"
            "p_p_id=esgratingsprofile&p_p_lifecycle=2&"
            "p_p_state=normal&p_p_mode=view&p_p_resource_id="
            "searchEsgRatingsProfiles&p_p_cacheability=cacheLevelPage"
            "&_esgratingsprofile_keywords={}" 
        ) # Format with symbol
        self.MSCI_ESG_URL = (
            "https://www.msci.com/our-solutions/esg-investing/"
            "esg-ratings/esg-ratings-corporate-search-tool/"
            "issuer/{}/{}"
        )
         # Format with encodedTitle, IssuerID (pulled from GET_STOCK_ID_URL json response) 
    def get_stock_msci_properties(self, symbol=None):
        response = None 
        if symbol: ##symbol 값 대입시 
            try:
                url = self.MSCI_GET_STOCK_ID_URL.format(symbol)
                response = requests.get(url)
                if self.debug:
                    print(response.content)
                    print("Getting props as JSON...")
                response = response.json()[0]
                if self.debug:
                    print(response)
                # returns: {encodedTitle, title, url}(url is issuerid)
            except Exception as e:
                print(e)
                response = {
                    'error': str(e)
                }
        return response 
        
    def get_esg_category(self, rating=None): 
        """ Get the category of an ESG risk rating """ 
        response = None 
        if rating: 
            rating_map = {
                'ccc': 'laggard',
                'b': 'laggard',
                'bb': 'average',
                'bbb': 'average',
                'a': 'average',
                'aa': 'leader',
                'aaa': 'leader'
            }
            response = rating_map[rating]
        return response 

    def get_esg_rating(self, symbol=None, js_timeout=1):
        """ Function to get ESG rating information for a given stock
        Params: 
        symbol : string : the symbol for which you want ESG rating information 
        js_timeout : int : how long should web driver wait for JS to build the page before retrieving content? (seconds)
        Returns :
        dict : dictionary of ESG rating information pulled from HTML parsing of MSCI corporate search page 
        """
        # Initialize Response Dictionary 
        response = {}
        try:
            # First get the stock MSCI properties 
            props = self.get_stock_msci_properties(symbol=symbol) 
            if self.debug:
                print(f'Props are: {props}') 
            # Build URL with properties and symbol 
            url = self.MSCI_ESG_URL.format(props['encodedTitle'],props['url'])
            if self.debug: 
                print(f'URL built: {url}')
            
            
            # Build Selenium web driver 
            #driver = webdriver.PhantomJS(executable_path = "C:/Users/jaehyoulee/Desktop/phantomjs-2.1.1-windows/phantomjs-2.1.1-windows/bin")

            from webdriver_manager.chrome import ChromeDriverManager
            #webdriver_options = webdriver.ChromeOptions()
            #webdriver_options .add_argument('headless')##chrome 창 없이 크롤링 하는 옵션 
            #driver = webdriver.Chrome(chrome_driver,options=chrome_options)            
            #driver = webdriver.Chrome(ChromeDriverManager().install())
            driver = webdriver.Chrome(ChromeDriverManager().install(),chrome_options=options)

            if self.debug:
                print(f"Built PhantomJS driver {driver}")
            driver.get(url)  
            if self.debug:
                print(f"Got URL") 
            try:
                if self.debug:
                    print(f'Waiting for JS to build page to get content...')
                data = [div for div in wait(driver, js_timeout).until(
                        EC.presence_of_element_located((By.XPATH, '//div[@class="esg-rating-paragraph-distr"]'))
                        )]
                if self.debug:
                    print(f'Got content successfully!')
            except TimeoutException: 
                if self.debug:
                    print('Timeout reached for WebDriver Wait')

            rating_paragraph = driver.find_element_by_class_name(
                name="esg-rating-paragraph-distr"
            ).text
            if self.debug:
                print(f'Rating paragraph: {rating_paragraph}')
            response['rating-paragraph'] = rating_paragraph

            rating_history_paragraph = driver.find_element_by_class_name(
                name="esg-rating-paragraph-hist"
            ).text 
            response['rating-history-paragraph'] = rating_history_paragraph
            if self.debug: 
                print(f'Rating history paragraph: {rating_history_paragraph}')

            rating_icon = driver.find_element_by_class_name(
                name="ratingdata-company-rating"
            )
            # get its other class name, that will tell the rating
            other_class = rating_icon.get_attribute("class")
            # class that tells rating formatted as esg-rating-circle-<RATING>
            # Build a map of ratings to categories (laggard is bad, leader is good)
            rating_map = {
                'ccc': 'laggard',
                'b': 'laggard',
                'bb': 'average',
                'bbb': 'average',
                'a': 'average',
                'aa': 'leader',
                'aaa': 'leader'
            }
            rating = other_class.split("esg-rating-circle-")[-1].lower()
            response['current'] = {}
            response['current']['esg_rating'] = rating
            response['current']['esg_category'] = rating_map[rating] 
            if self.debug: 
                print(f'ESG rating and category for {symbol}: {rating}/{rating_map[rating]}')

            # build history 
            history = {}
            # Get the history graph
            history_graph = driver.find_element_by_id(
                id_="_esgratingsprofile_esg-rating-history"
            )
            if self.debug:
                print(f"Got history graph HTML element")
            date_labels = history_graph.find_element_by_class_name(
                name="highcharts-xaxis-labels"
            ).find_elements_by_xpath(".//*") # these are the historical rating
            # dates formatted as Month-Year
            if self.debug:
                print(f"Got date labels for rating history!")
    
            # Get the rating history (the rating values for the respective dates)
            rating_labels = history_graph.find_element_by_class_name(
                name="highcharts-data-labels"
            ).find_elements_by_class_name("highcharts-label")  
            if self.debug:
                print(f"Got rating labels for rating history")

            for i in range(len(rating_labels)):  
                history[date_labels[i].text.lower()] = \
                    rating_labels[i].text.lower()
            if self.debug:
                print(f"History: {history}")

            response['history'] = history 

            if self.debug:
                print(f"Full response: {response}")
        except NoSuchElementException:
            print(
                f"MSCI ESG Ratings Corporate Search Tool may not have data for the stock {symbol}. "
                f" To verify this, open https://www.msci.com/our-solutions/esg-investing/esg-ratings/"
                f"esg-ratings-corporate-search-tool and search for your stock to see if the resulting "
                f"page contains data."
                )
        except KeyError:
            print(
                f"MSCI ESG Ratings Corporate Search Tool may not have data for the stock {symbol}. "
                f" To verify this, open https://www.msci.com/our-solutions/esg-investing/esg-ratings/"
                f"esg-ratings-corporate-search-tool and search for your stock to see if the resulting "
                f"page contains data."
            )
        return response
        
        




In [1]:
import pandas as pd

In [2]:
corp_list=pd.read_csv('C:/Users/jaehyoulee/Desktop/ESG/해외/02. sustainability_scores_ab_일자별_0110.csv')

In [11]:
len(corp_list['code'].unique())

52

In [14]:
corp_list['timestamp']

0       2014-09-01
1       2014-10-01
2       2014-11-01
3       2014-12-01
4       2015-01-01
           ...    
4227    2021-01-01
4228    2021-02-01
4229    2021-03-01
4230    2021-04-01
4231    2021-05-01
Name: timestamp, Length: 4232, dtype: object

In [12]:
check=corp_list['기업명']
check

0          신한지주
1        한국금융지주
2        하나금융지주
3        KB금융지주
4       메리츠금융지주
         ...   
100    아이비케이캐피탈
101       현대캐피탈
102     미래에셋캐피탈
103       롯데캐피탈
104      메리츠캐피탈
Name: 기업명, Length: 105, dtype: object

In [12]:
df=[]

In [13]:
import time 

In [14]:
test=check[5:10]
test

5     KB금융지주
6    메리츠금융지주
7    BNK금융지주
8    DGB금융지주
9     JB금융지주
Name: 기업명, dtype: object

In [15]:
for item in test:## test -> check로 실제 작업 시 변경 
    rate_finder = ESGRateFinder()
    rating_info = rate_finder.get_esg_rating(symbol=item)
    df.append(rating_info)
    time.sleep(15)


list index out of range
MSCI ESG Ratings Corporate Search Tool may not have data for the stock KB금융지주.  To verify this, open https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool and search for your stock to see if the resulting page contains data.
list index out of range
MSCI ESG Ratings Corporate Search Tool may not have data for the stock 메리츠금융지주.  To verify this, open https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool and search for your stock to see if the resulting page contains data.
list index out of range
MSCI ESG Ratings Corporate Search Tool may not have data for the stock BNK금융지주.  To verify this, open https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool and search for your stock to see if the resulting page contains data.
list index out of range
MSCI ESG Ratings Corporate Search Tool may not have data for the stock DGB금융지주.  To verify this, open ht

In [16]:
df

[{}, {}, {}, {}, {}]

In [60]:
df2=pd.DataFrame.from_dict(df)


In [61]:
df2

Unnamed: 0,rating-paragraph,rating-history-paragraph,current,history
0,ICBC is average among 189 companies in the ban...,"ICBC was upgraded in August, 2021.","{'esg_rating': 'bbb', 'esg_category': 'average'}","{'oct-17': 'bb', 'oct-18': 'bb', 'jul-19': 'bb..."
1,JPM is average among 189 companies in the bank...,"JPM was upgraded in October, 2021.","{'esg_rating': 'a', 'esg_category': 'average'}","{'sep-18': 'bb', 'jul-19': 'bb', 'jul-20': 'bb..."
2,Berkshire Hathaway is average among 46 compani...,Berkshire Hathaway's rating remains unchanged ...,"{'esg_rating': 'bb', 'esg_category': 'average'}","{'feb-17': 'bb', 'mar-18': 'bb', 'apr-19': 'bb..."
3,CCB is average among 189 companies in the bank...,"CCB's rating remains unchanged since December,...","{'esg_rating': 'a', 'esg_category': 'average'}","{'oct-17': 'bb', 'oct-18': 'bb', 'dec-19': 'bb..."
4,Aramco is average among 28 companies in the in...,Aramco's rating remains unchanged since Decemb...,"{'esg_rating': 'bb', 'esg_category': 'average'}","{'apr-19': 'bb', 'dec-19': 'bb', 'jul-20': 'bb..."


In [151]:
df2.to_csv('ESG_test.csv')