<a href="https://colab.research.google.com/github/ssood/ESG/blob/main/esg_index_scorer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

```
Author: Yash Roongta
The code in this file is for the blog written here: https://tradewithpython.com/equity-index-score-esg-with-python

Please read the blog for detailed usage instructions, a couple of high level points.
1. You will have to provide the Yahoo Finance Tickers to the code.
2. Fetching ESG data and Stock Data will take time due to random delay function embedded.
3. The output will be printed in your console.
    
```

In [None]:
import pandas as pd
import yfinance as yf
import time
from random import randint
from google.colab import files
import io

In [None]:
!pip install yfinance

In [None]:
'''Please upload the .csv for tickers
FTSE 100: https://docs.google.com/spreadsheets/d/10SD0xGSdOqNZuINJocNoE0Gd8JClXmas5cPj9tmaw28/edit?usp=sharing
NIFTY 50: https://docs.google.com/spreadsheets/d/1Gm1v9-qOJ5ZXhgSEHLYsBPnhd3-zcaGnlmI-eeu8Gdw/edit?usp=sharing
'''

data_to_load = files.upload()

Saving ftse100_tickers - Sheet1.csv to ftse100_tickers - Sheet1.csv


In [None]:
df = pd.read_csv(io.BytesIO(data_to_load['ftse100_tickers - Sheet1.csv'])) #give the full path of file uploaded
index_tickers = df['Symbol'].tolist() #assigning all tickers to a list
#print(index_tickers)

esg_data = pd.DataFrame() #empty df for attaching all ticker's data response

In [None]:
for ticker in index_tickers:
     print(ticker) #just FYI to know your code is running
     ticker_name = yf.Ticker(ticker)
     try:
          if ticker_name.sustainability is not None: #if no response from Yahoo received, it will pass to next ticker
               ticker_df = ticker_name.sustainability.T #response dataframe
               ticker_df['symbol'] = ticker #adding new column 'symbol' in response df
               esg_data = esg_data.append(ticker_df) #attaching the response df to esg_data
               time.sleep(randint(2,8)) #delaying the fetch of data for 2-8 seconds
     except (IndexError, ValueError) as e: #in case yfinance API misbehaves
          print(f'{ticker} did not run') #FYI
          pass

AAL.L
ABF.L
ADM.L
AHT.L
ANTO.L
AUTO.L
AV.L
AVST.L
AVV.L
AZN.L
BA.L
BARC.L
BATS.L
BDEV.L
BHP.L
BKG.L
BLND.L
BME.L
BNZL.L
BP.L
BRBY.L
BT-A.L
CCH.L
CPG.L
CRDA.L
CRH.L
DCC.L
DGE.L
ENT.L
EVR.L
EXPN.L
FERG.L
FLTR.L
FRES.L
GLEN.L
GSK.L
HIK.L
HL.L
HLMA.L
HSBA.L
IAG.L
ICP.L
IHG.L
III.L
IMB.L
INF.L
ITRK.L
JD.L
JET.L
JMAT.L
KGF.L
LAND.L
LGEN.L
LLOY.L
LSEG.L
MNDI.L
MNG.L
MRO.L
MRW.L
NG.L
NWG.L
NXT.L
OCDO.L
PHNX.L
PNN.L
POLY.L
PRU.L
PSH.L
PSN.L
PSON.L
RB.L
RDSA.L
RDSB.L
REL.L
RIO.L
RMV.L
RR.L
RSA.L
RTO.L
SBRY.L
SDR.L
SGE.L
SGRO.L
SKG.L
SLA.L
SMDS.L
SMIN.L
SMT.L
SMT.L did not run
SN.L
SPX.L
SSE.L
STAN.L
STJ.L
SVT.L
TSCO.L
TW.L
ULVR.L
UU.L
VOD.L
WPP.L
WTB.L


In [None]:
esg_tickers = esg_data['symbol']
no_esg_data = list(set(index_tickers) - set(esg_tickers))
#set function removes all duplicates in a list and the above gives us the
#difference between our original ticker list and our esg_data ticker list
print(no_esg_data)

In [None]:
new_esg_df = esg_data[['symbol', 'socialScore',
               'governanceScore', 'totalEsg', 'environmentScore']]
#the above basically takes the columns mentioned above and assigns into new df.
new_esg_df.head(5) #let's see what it looks like

2021-2,symbol,socialScore,governanceScore,totalEsg,environmentScore
Value,AAL.L,9.52,3.41,23.63,10.71
Value,ABF.L,15.4,4.94,30.12,9.78
Value,ADM.L,6.94,11.4,20.06,1.71
Value,AHT.L,5.57,6.83,19.41,7.01
Value,ANTO.L,9.22,5.61,28.99,14.16


In [None]:
main_df = pd.DataFrame() #creating empty df to store data

for ticker in index_tickers:
     ticker_name = yf.Ticker(ticker)
     try:
          ticker_info = ticker_name.info
          ticker_df = pd.DataFrame.from_dict(ticker_info.items()).T
          #the above line will parse the dict response into a DataFrame
          ticker_df.columns = ticker_df.iloc[0]
          #above line will rename all columns to first row of dataframe
          #as all the headers come up in the 1st row, next line will drop the 1st line
          ticker_df = ticker_df.drop(ticker_df.index[0])
          main_df = main_df.append(ticker_df)
          time.sleep(randint(2,8))
          print(f'{ticker} + Complete')
     except (IndexError, ValueError) as e:
          print(f'{ticker} + Data Not Found')

AAL.L + Complete
ABF.L + Complete
ADM.L + Complete
AHT.L + Complete
ANTO.L + Complete
AUTO.L + Complete
AV.L + Complete
AVST.L + Complete
AVV.L + Complete
AZN.L + Complete
BA.L + Complete
BARC.L + Complete
BATS.L + Complete
BDEV.L + Complete
BHP.L + Complete
BKG.L + Complete
BLND.L + Complete
BME.L + Complete
BNZL.L + Complete
BP.L + Complete
BRBY.L + Complete
BT-A.L + Complete
CCH.L + Complete
CPG.L + Complete
CRDA.L + Complete
CRH.L + Complete
DCC.L + Complete
DGE.L + Complete
ENT.L + Complete
EVR.L + Complete
EXPN.L + Complete
FERG.L + Complete
FLTR.L + Complete
FRES.L + Complete
GLEN.L + Complete
GSK.L + Complete
HIK.L + Complete
HL.L + Complete
HLMA.L + Complete
HSBA.L + Complete
IAG.L + Complete
ICP.L + Complete
IHG.L + Complete
III.L + Complete
IMB.L + Complete
INF.L + Complete
ITRK.L + Complete
JD.L + Complete
JET.L + Complete
JMAT.L + Complete
KGF.L + Complete
LAND.L + Complete
LGEN.L + Complete
LLOY.L + Complete
LSEG.L + Complete
MNDI.L + Complete
MNG.L + Complete
MRO.L + Com

In [None]:
filtered_df = main_df[['symbol', 'sector', 'previousClose', 'sharesOutstanding']]
filtered_df.head(5) #checking how first 5 rows look like

Unnamed: 0,symbol,sector,previousClose,sharesOutstanding
1,AAL.L,Basic Materials,2953.5,1363120000
1,ABF.L,Consumer Defensive,2421.0,791673984
1,ADM.L,Financial Services,3107.0,296692000
1,AHT.L,Industrials,3925.0,449310016
1,ANTO.L,Basic Materials,1845.5,985857024


In [None]:
filtered_df['newMarketCap'] = filtered_df['previousClose'] * filtered_df['sharesOutstanding']
total_index_mcap = filtered_df['newMarketCap'].sum()
filtered_df['marketWeight'] = ((filtered_df['newMarketCap']/total_index_mcap)*100)

filtered_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,symbol,sector,previousClose,sharesOutstanding,newMarketCap,marketWeight
1,AAL.L,Basic Materials,2953.5,1363120000,4025970000000.0,2.13558
1,ABF.L,Consumer Defensive,2421.0,791673984,1916642715264.0,1.01668
1,ADM.L,Financial Services,3107.0,296692000,921822044000.0,0.488981
1,AHT.L,Industrials,3925.0,449310016,1763541812800.0,0.935471
1,ANTO.L,Basic Materials,1845.5,985857024,1819400000000.0,0.9651


In [None]:
final_df = filtered_df.merge(new_esg_df, how='left', on='symbol')

#for more info on .merge visit https://bit.ly/3pFlYIm

final_df.head()

Unnamed: 0,symbol,sector,previousClose,sharesOutstanding,newMarketCap,marketWeight,socialScore,governanceScore,totalEsg,environmentScore
0,AAL.L,Basic Materials,2953.5,1363120000,4025970000000.0,2.13558,9.52,3.41,23.63,10.71
1,ABF.L,Consumer Defensive,2421.0,791673984,1916642715264.0,1.01668,15.4,4.94,30.12,9.78
2,ADM.L,Financial Services,3107.0,296692000,921822044000.0,0.488981,6.94,11.4,20.06,1.71
3,AHT.L,Industrials,3925.0,449310016,1763541812800.0,0.935471,5.57,6.83,19.41,7.01
4,ANTO.L,Basic Materials,1845.5,985857024,1819400000000.0,0.9651,9.22,5.61,28.99,14.16


In [None]:
final_esg_df = pd.DataFrame() #empty df

sector_list = final_df['sector'].unique().tolist() #getting list of sectors in index

In [None]:
#looping over each sector and apply .mean to calculate average
for sector in sector_list:
    sector_df = final_df[final_df['sector'] == sector]
    sector_df['socialScore'].fillna(round(sector_df['socialScore'].mean(),2), inplace=True)
    sector_df['governanceScore'].fillna(round(sector_df['governanceScore'].mean(),2), inplace=True)
    sector_df['totalEsg'].fillna(round(sector_df['totalEsg'].mean(),2), inplace=True)
    sector_df['environmentScore'].fillna(round(sector_df['environmentScore'].mean(),2), inplace=True)

    final_esg_df = final_esg_df.append(sector_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [None]:
final_esg_df.head(5)

Unnamed: 0,symbol,sector,previousClose,sharesOutstanding,newMarketCap,marketWeight,socialScore,governanceScore,totalEsg,environmentScore
0,AAL.L,Basic Materials,2953.5,1363120000,4025970000000.0,2.13558,9.52,3.41,23.63,10.71
4,ANTO.L,Basic Materials,1845.5,985857024,1819400000000.0,0.9651,9.22,5.61,28.99,14.16
14,BHP.L,Basic Materials,2350.0,2112000000,4963200000000.0,2.63273,9.72,4.85,31.97,17.4
24,CRDA.L,Basic Materials,6240.0,139519008,870598609920.0,0.461809,6.66,6.48,25.14,12.01
25,CRH.L,Basic Materials,3190.0,785067008,2504363755520.0,1.32844,2.81,5.32,21.34,13.21


In [None]:
#also adding the weighted average columns into this new final_esg_df
final_esg_df['mktweightedEsg'] = (final_esg_df['marketWeight'] * final_esg_df['totalEsg'])/100
final_esg_df['mktweightedEnvScore'] = (final_esg_df['marketWeight'] * final_esg_df['environmentScore'])/100
final_esg_df['mktweightedSocScore'] = (final_esg_df['marketWeight'] * final_esg_df['socialScore'])/100
final_esg_df['mktweightedGovScore'] = (final_esg_df['marketWeight'] * final_esg_df['governanceScore'])/100

In [None]:
final_esg_df.head(5)

Unnamed: 0,symbol,sector,previousClose,sharesOutstanding,newMarketCap,marketWeight,socialScore,governanceScore,totalEsg,environmentScore,mktweightedEsg,mktweightedEnvScore,mktweightedSocScore,mktweightedGovScore
0,AAL.L,Basic Materials,2953.5,1363120000,4025970000000.0,2.13558,9.52,3.41,23.63,10.71,0.504637,0.22872,0.203307,0.0728232
4,ANTO.L,Basic Materials,1845.5,985857024,1819400000000.0,0.9651,9.22,5.61,28.99,14.16,0.279783,0.136658,0.0889823,0.0541421
14,BHP.L,Basic Materials,2350.0,2112000000,4963200000000.0,2.63273,9.72,4.85,31.97,17.4,0.841684,0.458095,0.255901,0.127687
24,CRDA.L,Basic Materials,6240.0,139519008,870598609920.0,0.461809,6.66,6.48,25.14,12.01,0.116099,0.0554633,0.0307565,0.0299252
25,CRH.L,Basic Materials,3190.0,785067008,2504363755520.0,1.32844,2.81,5.32,21.34,13.21,0.283489,0.175487,0.0373292,0.070673


In [None]:
print('Total Environment Score: {}'.format(round(final_esg_df['mktweightedEnvScore'].sum(),2)))
print('Total Social Score: {}'.format(round(final_esg_df['mktweightedSocScore'].sum(),2)))
print('Total Governance Score: {}'.format(round(final_esg_df['mktweightedGovScore'].sum(),2)))
print('Total ESG Score: {}'.format(round(final_esg_df['mktweightedEsg'].sum(),2)))

Total Environment Score: 6.73
Total Social Score: 9.97
Total Governance Score: 7.23
Total ESG Score: 23.93
