In [2]:
# Imports
import refinitiv.dataplatform.eikon as ek
import pandas as pd
import time

import matplotlib.pyplot as plt
import seaborn as sns
import datetime

### Introduction

This notebook explores company clustering opportunities based on ESG performance, using Thomson Reuters datapoints as a baseline.  
We begin by selecting SFDR-related metrics. 

##### Setup

In [18]:
# Utils
def get_ric(symbol):
    """
    symbol: the symbol of the company name in your portfolio.  
    Find the best possible RIC match for company names.
    returns: ric if there is a match, or initial company name. 
    """
    possible_rics = ek.json_requests.send_json_request('SymbologySearch',{'symbols': [symbol], 'from': 'ticker', 'to': ['RIC'], 'bestMatchOnly': False})
    try:
        return possible_rics.json().get("mappedSymbols")[0].get("RICs")[0]
    except:
        return symbol
    
def get_isin_from_ric(ric):
    """ 
    Find the ISIN match for your input RIC.
    """
    isin = ek.get_symbology(ric, from_symbol_type="RIC", to_symbol_type="ISIN")
    try:
        return isin.ISIN[0]
    except:
        return "N/A"
    

def get_isin_from_sedol(sedol):
    """ 
    Find the ISIN match for your input sedol.
    """
    isin = ek.get_symbology(sedol, from_symbol_type="SEDOL", to_symbol_type="ISIN")
    try:
        return isin.ISIN[0]
    except:
        return "N/A"

def prepare_universe(input_path="data/", filename="univers.xls"):
    universe = pd.read_excel(input_path+filename)
    column_names = universe.iloc[10,:]
    universe_df = universe.iloc[11:,:].copy().reset_index().drop(columns=["index"])
    universe_df.columns = column_names
    universe_df = universe_df.iloc[:-6,1:]
    universe_df = universe_df.loc[:,:"SEDOL"]
    return universe

def format_df(esg_df, esg_df_bis, year):
    test = esg_df_bis[esg_df_bis.year == year].copy().drop(columns=["year"])
    test.columns = ["Instrument", "Environmental Pillar Score Grade "+str(year), "Environmental Pillar Score "+str(year)]
    return pd.merge(esg_df, test, how="left", left_on="Instrument", right_on="Instrument")

def get_weighted_average(df, metric):
    try:
        df.Weight = df.Weight.str.replace(",",".").astype(float)
        df[metric] = df[metric].str.replace(",",".").astype(float)
        return (df[metric]*df.Weight).sum()
    except:
        return (df[metric]*df.Weight).sum()
    
def add_isins(universe_df, output_path="notebooks/output/",write_to_csv=False):
    
    sedols = [str(sedol) for sedol in universe_df.SEDOL]
    isins = [get_isin_from_sedol(sedol) for sedol in sedols]
    universe_df["Instrument"] = pd.Series(isins)
    
    if write_to_csv == True:
        universe_df.to_csv(output_path+"universe_df.csv")
    return universe_df

In [19]:
app_key = "213f70312b3740cfa9f65def2171d679808d0131"#"467e1d8d1e624dfc834b60e91bba50e474bea063"
ek.set_app_key(app_key)

Download the data. We will extract information for our reference universe, based on its ISIN codes.  
We will first cluster our data using only ESG performance and the field.

In [20]:
input_path = "data/" #"../inputs/"
output_path = "output/" #output_path = "notebooks/output/"
filename = "univers.xls"

In [108]:
universe_df = pd.read_csv(output_path+"universe_df.csv").drop(columns=["Unnamed: 0"])

In [109]:
universe_df.head()

Unnamed: 0,Name,Symbol,Country,Market Capitalization (bil) [USD],Industry Name - GICS Sub-Industry,SEDOL,ISINS
0,APPLE INC,AAPL,USA,2259.699951,"Technology Hardware, Storage & Peripherals",2046251,US0378331005
1,MICROSOFT CORP,MSFT,USA,1970.640015,Systems Software,2588173,US5949181045
2,AMAZON.COM INC,AMZN,USA,1680.459961,Internet & Direct Marketing Retail,2000019,US0231351067
3,ALPHABET INC,GOOGL,USA,1552.959961,Interactive Media & Services,BYVY8G0,US02079K3059
4,FACEBOOK INC,FB,USA,857.919983,Interactive Media & Services,B7TL820,US30303M1027


In [110]:
universe_df.tail()

Unnamed: 0,Name,Symbol,Country,Market Capitalization (bil) [USD],Industry Name - GICS Sub-Industry,SEDOL,ISINS
2901,REVANCE THERAPEUTICS INC,RVNC,USA,2.0,Pharmaceuticals,BJFSR99,US7613301099
2902,WORKSPACE GROUP PLC,WKP,GBR,2.0,Office REITs,B67G5X0,GB00B67G5X01
2903,SILK ROAD MEDICAL INC,SILK,USA,2.0,Health Care Supplies,BGHVXJ7,US82710M1009
2904,CREDITO EMILIANO SPA,EMBI,ITA,2.0,Diversified Banks,7135251,IT0003121677
2905,EMPIRE STATE REALTY TR INC,ESRT,USA,2.0,Diversified REITs,BF321D7,US2921041065


In [111]:
print(f"There are {len(universe_df)} companies in our universe.")

There are 2906 companies in our universe.


In [112]:
sfdr_metrics = {
    'TR.GICSSector': 'GIC_Sector',
    'TR.NACEClassification': 'NACE_Sector',
    'TR.CO2EmissionTotal': "GHG Emissions",
    'TR.CO2DirectScope1': "GHG Emissions",
    'TR.CO2IndirectScope2': "GHG Emissions",
    'TR.CO2IndirectScope3': "GHG Emissions",
  #  'carbon_footprint': "GHG Emissions",
    'TR.AnalyticCO2': "GHG Emissions",
   # 'TR.EnergyUseTotal':"Energy Efficiency",
    'TR.AnalyticTotalRenewableEnergy':"Energy Efficiency", # il faut faire 1-ça
    'TR.AnalyticEnergyUse':'Energy Efficiency', # globally and by NACE sector, GJ/M$
    'TR.BiodiversityImpactReduction':"Biodiversity", # does the company monitor its impact
    'TR.AnalyticDischargeWaterSystem':"Water", # ton emissions / $M
    'TR.HazardousWaste': "Waste",
    'TR.WasteTotal':'Waste', # to get non recycled waste
    'TR.WasteRecycledTotal':'Waste', 
    'TR.ILOFundamentalHumanRights': 'Social and Employee Matters',
    'TR.GenderPayGapPercentage':'Social and Employee Matters', # women to men
    'TR.AnalyticSalaryGap':'Social and Employee Matters', # to average, should be median
    'TR.AnalyticBoardFemale': 'Social and Employee Matters', 
    'TR.WhistleBlowerProtection': 'Social and Employee Matters',
    'TR.AccidentsTotal': 'Social and Employee Matters', # proxy for accidents
    'TR.AnalyticHumanRightsPolicy': 'Social and Employee Matters',
    'TR.CriticalCountry1': 'Social and Employee Matters', # as a proxy for operations at risk of child or forced labour
    'TR.CriticalCountry2': 'Social and Employee Matters', # as a proxy for operations at risk of child or forced labour
    'TR.CriticalCountry3': 'Social and Employee Matters', # as a proxy for operations at risk of child or forced labour
    'TR.CriticalCountry4': 'Social and Employee Matters', # as a proxy for operations at risk of child or forced labour
    'TR.AntiPersonnelLandmines':'Social and Employee Matters', # anti personnel landmines
    'TR.PolicyBriberyCorruption': 'Anti-corruption and Anti-bribery',
    'TR.AnalyticBriberyFraudControv':'Anti-corruption and Anti-bribery',
}

We are missing data on:  
- Biodiversity. Red List species / adjacent to sites. 
- Deforestation
- Water stress, untreated discharged waste water
- Due diligence on human rights, human trafficking  
- Number of convictions for anti-corruption

In [113]:
# fossil fuel sector exposure à faire avec les GICs
# 

In [114]:
metrics = list(sfdr_metrics.keys())
metrics.extend(['TR.TRESGScoreGrade',
                'TR.EnvironmentPillarScoreGrade',
                'TR.SocialPillarScoreGrade',
                'TR.GovernancePillarScoreGrade',
                'TR.AnalyticCO2ScoreGrade',
                'TR.TRESGInnovationScoreGrade',
                'TR.TRESGCSRStrategyScoreGrade',
                'TR.TRESGScore',
                'TR.EnvironmentPillarScore',
                'TR.SocialPillarScore',
                'TR.GovernancePillarScore',
                'TR.AnalyticCO2Score',
                'TR.TRESGInnovationScore',
                'TR.TRESGCSRStrategyScore',])

In [123]:
universe_df.ISINS = universe_df.ISINS.apply(str)

In [129]:
isins = list(universe_df.ISINS)
len(isins)

2906

In [168]:
esg_df, err = ek.get_data(instruments = isins, fields=metrics)

In [170]:
universe_df.ISINS.value_counts()
universe_df.ISINS = universe_df.ISINS.replace("nan", "NAN")

In [173]:
(universe_df.ISINS == esg_df.Instrument[:2906]).sum()

842

In [178]:
esg_df = esg_df.rename(columns={"Instrument":"ISINS"}).copy()

In [179]:
#out = pd.merge(universe_df, esg_df, how="left", left_on="Instrument", right_on="Instrument")
out = pd.merge(universe_df, esg_df, how="left", on="ISINS",)# right_on="ISINS")

In [192]:
out = out.drop_duplicates(subset=["Name"])

In [196]:
out.shape

(2821, 46)

In [195]:
output_path = "output/"
out.to_csv(output_path+"universe_df_esg.csv")