# Code for extracing ESG data for S&P 500 from Yahoo Finance

In [2]:
# import yfinance, pandas and os
import yfinance as yf
import pandas as pd
from pathlib import Path
import yahoo_fin.stock_info as si
import os

In [3]:
#Example code for single ticker data download to show factors available
cola = "KO"
cola_y = yf.Ticker(cola)
esg_data = pd.DataFrame.transpose(cola_y.sustainability)
esg_data['company_ticker'] = str(cola_y.ticker)
esg_data.head()

2022-2,palmOil,controversialWeapons,gambling,socialScore,nuclear,furLeather,alcoholic,gmo,catholic,socialPercentile,...,coal,pesticides,adult,percentile,peerGroup,smallArms,environmentScore,governancePercentile,militaryContract,company_ticker
Value,False,False,False,9.72,False,False,False,False,False,,...,False,False,False,32.62,Food Products,False,7.88,,False,KO


In [4]:
#Example code for single ticker data download for historical data
pfizer = yf.Ticker("PFE")
old = pfizer.history(start="2018-01-01", end="2022-2-01")
old.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-02,29.484828,29.598294,29.371362,29.533457,17059833,0.0,0.0
2018-01-03,29.541552,29.800901,29.363249,29.752275,14183151,0.0,0.0
2018-01-04,29.752277,29.922472,29.663124,29.817114,13046517,0.0,0.0
2018-01-05,29.841428,29.890056,29.671229,29.873848,13167517,0.0,0.0
2018-01-08,29.760385,29.825222,29.444303,29.541557,19964341,0.0,0.0


In [5]:
#Use Yahoo_Fin to pull tickers
sp500_ticker_list = si.tickers_sp500()
#Display first 10 tickers
sp500_ticker_list[0:10]

['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABMD', 'ABT', 'ACN', 'ADBE']

In [6]:
#Download historical data for S&P 500
sp500_hist_data = yf.download(sp500_ticker_list, start="2020-01-01", end="2020-04-30")
sp500_hist_data.head()

[*********************100%***********************]  506 of 506 completed

2 Failed downloads:
- OGN: Data doesn't exist for startDate = 1577865600, endDate = 1588230000
- CEG: Data doesn't exist for startDate = 1577865600, endDate = 1588230000


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-02,84.799667,28.982893,155.755661,73.894318,80.137344,82.064133,168.809998,84.001167,203.776093,334.429993,...,3006500.0,2915400.0,12456400.0,1556600.0,869500.0,1369900.0,1052400.0,387800.0,1528700.0,1576700.0
2020-01-03,83.438148,27.548195,155.765427,73.175926,79.376678,81.032074,166.820007,82.977112,203.436737,331.809998,...,1760500.0,2422500.0,17386900.0,910000.0,795100.0,1145500.0,963500.0,305300.0,1215800.0,1274000.0
2020-01-06,83.684792,27.21941,153.20549,73.758995,80.003113,82.218468,179.039993,83.411858,202.108292,333.709991,...,1357900.0,2217100.0,20081900.0,751000.0,817300.0,1454100.0,944100.0,322600.0,1217500.0,2334100.0
2020-01-07,83.941322,27.119778,151.388123,73.412125,79.546715,81.630104,180.350006,82.948135,197.744766,333.390015,...,1722100.0,1861100.0,17387700.0,792600.0,662200.0,1388600.0,907300.0,236300.0,1450900.0,1224500.0
2020-01-08,84.770073,27.737495,149.648941,74.59304,80.110497,82.421028,178.690002,83.28627,198.132629,337.869995,...,1176200.0,4828900.0,15137700.0,1001500.0,1083200.0,1521500.0,908800.0,689600.0,2396000.0,1766700.0


In [7]:
# Retrieve yfinance Sustainability Scores for each ticker
for i in sp500_ticker_list:
    i_y = yf.Ticker(i)
    try:
        if i_y.sustainability is not None:
            temp = pd.DataFrame.transpose(i_y.sustainability)
            temp['company_ticker'] = str(i_y.ticker)
            #print(temp)
            esg_data = esg_data.append(temp)
    except IndexError:
        pass


In [25]:
#Iterate through the list of tickers, appending each time to the dictionary
sp500_historical = {}
for ticker in sp_ticker_list:
    sp500_historical[ticker] = si.get_data(ticker, start_date="01/01/2021")



In [None]:
# Save data frame as CSV for later use
esg_data.to_csv(‘sp500_sustainability_scores.csv’, encoding=’utf-8')