# **Part 1 | Constructing ESG Factor**

### Importing Packages

In [1]:
import pandas as pd
import os
import numpy as np

#Inspecting Factor
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

# **Part I: Creating ESG Factor**

### Importing & Preparing Data

#### Stock Returns

In [2]:
stock_returns = pd.read_excel("__data/Stock_Return_Data_Wide_Format.xlsx", "ReturnTotal")

In [3]:
#Divides Stock Return values by 100 to get decimal values
stock_returns.loc[:, stock_returns.columns != "Date"] = stock_returns.loc[:, stock_returns.columns != "Date"].apply(lambda x: x / 100)

#Set Date column as date
stock_returns["Date"] = pd.to_datetime(stock_returns["Date"])
stock_returns['Date'] = stock_returns["Date"].dt.date

#Set Date column as Index
stock_returns.set_index("Date", inplace=True)

In [4]:
stock_returns.head()

Unnamed: 0_level_0,A.N,AA.N,AAL.OQ,AAON.OQ,AAP.N,AAPL.OQ,AAT.N,ABBV.N,ABCB.N,ABG.N,...,YETI.N,YOU.N,YUM.N,ZBH.N,ZBRA.OQ,ZD.OQ,ZI.OQ,ZION.OQ,ZTS.N,ZWS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,0.15675,,,-0.132184,-0.02734,0.056005,,,-0.353586,-0.21663,...,,,-0.085414,-0.099456,-0.169299,-0.022954,,-0.391269,,
2009-02-28,-0.232854,,,-0.143488,0.168653,-0.009098,,,-0.351175,-0.203911,...,,,-0.081761,-0.037912,0.043969,-0.043412,,-0.370291,,
2009-03-31,0.108147,,,0.167526,0.075598,0.177024,,,-0.052314,0.512281,...,,,0.045662,0.042262,0.082527,0.168713,,0.049093,,
2009-04-30,0.188029,,,0.075055,0.064995,0.197013,,,0.390658,1.243619,...,,,0.221622,0.205205,0.117245,0.095934,,0.111902,,
2009-05-31,-0.001643,,,0.067762,-0.026514,0.079313,,,-0.042748,-0.01758,...,,,0.038381,0.01273,0.027294,-0.070446,,0.254319,,


#### Stock MCap

In [5]:
stock_mcap = pd.read_excel("__data/Stock_Return_Data_Wide_Format.xlsx", "MCAP")

In [6]:
stock_mcap["Date"] = pd.to_datetime(stock_mcap["Date"])
stock_mcap['Date'] = stock_mcap["Date"].dt.date

stock_mcap.set_index("Date", inplace=True)

In [7]:
stock_mcap.head(7)

Unnamed: 0_level_0,A.N,AA.N,AAL.OQ,AAON.OQ,AAP.N,AAPL.OQ,AAT.N,ABBV.N,ABCB.N,ABG.N,...,YETI.N,YOU.N,YUM.N,ZBH.N,ZBRA.OQ,ZD.OQ,ZI.OQ,ZION.OQ,ZTS.N,ZWS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-31,,,,,,,,,,,...,,,,,,,,,,
2007-10-31,,,,,,,,,,,...,,,,,,,,,,
2007-12-31,,,,,,,,,,,...,,,,,,,,,,
2008-10-31,,,,,,,,,,,...,,,,,,,,,,
2008-11-30,,,,,,,,,,,...,,,,,,,,,,
2009-01-31,6364005000.0,,1655819000.0,311589100.0,3099246000.0,80265610000.0,,,103675000.0,114387000.0,...,,,13136580000.0,8174367000.0,1024301000.0,859274600.0,,1720945000.0,,
2009-02-28,4788705000.0,,1141133000.0,266707500.0,3621941000.0,79535350000.0,,,67460000.0,91062250.0,...,,,12087000000.0,7803839000.0,1064224000.0,823108200.0,,1080714000.0,,


#### ESG Scores

In [8]:
esg_scores = pd.read_excel("__data/Stock_ESG_Data_Wide_Format.xlsx", "ESG")

In [9]:
#Set Date column as date
esg_scores["Date"] = pd.to_datetime(esg_scores["Date"])
esg_scores['Date'] = esg_scores["Date"].dt.date

#Set Date column as Index
esg_scores.set_index("Date", inplace=True)

esg_scores = esg_scores[esg_scores.index >= pd.to_datetime("2009-01-01").date()]

#Fill missing rows with previous values for esg score
esg_scores.fillna(method='ffill', inplace=True)

  esg_scores.fillna(method='ffill', inplace=True)


In [10]:
#The data between returns and ESG doesn't always match! Therefore we only keep matching columns
common_columns_returns = esg_scores.columns.intersection(stock_returns.columns)
esg_scores = esg_scores[common_columns_returns]

In [11]:
esg_scores.head()

Unnamed: 0_level_0,A.N,AA.N,AAL.OQ,AAON.OQ,AAP.N,AAPL.OQ,AAT.N,ABBV.N,ABCB.N,ABG.N,...,YETI.N,YOU.N,YUM.N,ZBH.N,ZBRA.OQ,ZD.OQ,ZI.OQ,ZION.OQ,ZTS.N,ZWS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,,,,,29.302437,,,,,,...,,,,,,,,,,
2009-02-28,,,,,29.302437,,,,,,...,,,,,,,,,,
2009-03-31,,,,,29.302437,,,,,,...,,,,,,,,,,
2009-04-30,,,,,29.302437,,,,,,...,,,,,,,,,,
2009-05-31,,,,,29.302437,,,,,,...,,,,,,,,,,


#### Constituents & ESG Portfolio of Constituents

In [12]:
#Loading Data which stock is in which index
constituents = pd.read_csv("__data/constituents.csv")

In [13]:
#Adding stocks to list
constituents_spx = constituents["0#.SPX"].tolist()
constituents_spcy = constituents["0#.SPCY"].tolist()
constituents_sp400 = constituents["0#.SP400"].tolist()

In [14]:
#Making sure we have esg scores for all stocks
constituents_spx = [col for col in constituents_spx if col in esg_scores.columns]
constituents_spcy = [col for col in constituents_spcy if col in esg_scores.columns]
constituents_sp400 = [col for col in constituents_sp400 if col in esg_scores.columns]

In [15]:
#Subsetting data
esg_scores_spx = esg_scores[constituents_spx]
esg_scores_spcy = esg_scores[constituents_spcy]
esg_scores_sp400 = esg_scores[constituents_sp400]

In [16]:
print(f"There are {esg_scores_spx.shape[1]} stocks in the SPX!")
print(f"There are {esg_scores_spcy.shape[1]} stocks in the SPCY!")
print(f"There are {esg_scores_sp400.shape[1]} stocks in the SP-400!")

There are 500 stocks in the SPX!
There are 587 stocks in the SPCY!
There are 398 stocks in the SP-400!


### Calculating Portfolio

#### Functions

In [27]:
def getESGScorePercentile(esg_score_data, mcap_data, percentile, high = True):
    stock_list = []

    #Loops over each date
    for date in esg_score_data.index:
        esg_scores = esg_score_data.loc[date] #Gets corresponding esg scores
        esg_scores = esg_scores.dropna(axis=0) #Drops any missing values

        #Drops Date column as we don't want to look at this
        esg_scores = esg_scores.drop(columns = ["Date"])

        #Creates Portfolio
        if high:
            percentile_stocks = esg_scores.nlargest(int(len(esg_scores) * percentile / 100)).index.tolist()
        else:
            percentile_stocks = esg_scores.nsmallest(int(len(esg_scores) * percentile / 100)).index.tolist()

        mcap = mcap_data.loc[date, percentile_stocks]
        total_mcap = mcap.sum() / 1000000000

        #Counts stocks in portfolio
        count = len(percentile_stocks)

        #Appends to list
        stock_list.append((date, percentile_stocks, count, total_mcap))

    #Creates new dataframe
    stock_list_dataframe = pd.DataFrame(stock_list, columns=["Date", "Stock_List", "Stock_Count", "Stock_MCap"])
    stock_list_dataframe.set_index("Date", inplace = True)

    #Returns dataframe
    return stock_list_dataframe

In [28]:
#Calculates POrtfolio return for a specific date & list of stocks
def getPortfolioReturn(return_data, date, list_stocks):
    returns = return_data.loc[date]
    returns = returns[list_stocks]

    return returns

#Calculates Return history
def calculateReturnHistory(return_data, portfolio_data, column_name_return, column_name_count, column_name_mcap):

    average_returns = []

    #Loops over each date
    for date in portfolio_data.index:

        #Gets list of stocks & count of stocks
        currentStockList = portfolio_data.loc[date]["Stock_List"]
        currentStockCount = portfolio_data.loc[date]["Stock_Count"]
        currentStockMCap = portfolio_data.loc[date]["Stock_MCap"]

        #Gets return of list of stocks at current date
        stock_returns = getPortfolioReturn(return_data, date, currentStockList)

        #Calculates average return (EQUAL WEIGHTED)
        if len(stock_returns) > 0:
            average_portfolio_return = stock_returns.mean()
        else:
            average_portfolio_return = float('nan')

        #Adds return to list
        average_returns.append((date, average_portfolio_return, currentStockCount, currentStockMCap))

    #Returns dataframe
    dataframe = pd.DataFrame(average_returns, columns=["Date", column_name_return, column_name_count, column_name_mcap])

    return dataframe

In [29]:
def getESGFactor(esg_score_data, percentile, min_stocks, mcap_data = stock_mcap):
  #Returns dataframe containing the ESG Portfolios for each date

  highest_stocks = []
  lowest_stocks = []

  highest_stocks = getESGScorePercentile(esg_score_data, mcap_data, percentile, high = True)
  lowest_stocks = getESGScorePercentile(esg_score_data, mcap_data, percentile, high = False)

  #Calculates the Average Return for each Portfolio at each Date
  highest_stocks_average_return = calculateReturnHistory(stock_returns, highest_stocks, "Average_Return_High", "Count_High", "MCap_High")
  lowest_stocks_average_return = calculateReturnHistory(stock_returns, lowest_stocks, "Average_Return_Low", "Count_Low", "MCap_Low")

  #Merges Data together to have the data in one dataframe
  return_history = pd.merge(highest_stocks_average_return, lowest_stocks_average_return, on='Date', how='outer')

  #Calculates Factor for each Date
  return_history["ESG_Factor"] = return_history["Average_Return_Low"] - return_history["Average_Return_High"]

  return_history_used = return_history[return_history["Count_High"] > min_stocks].copy() #Only look at diversified portfolios
  #return_history_used = return_history

  return_history_used = return_history_used.reset_index(drop = True)

  return return_history_used

#### Calculating Portfolios

I calculate the ESG factor for three subportfolios (Large, Mid & Small Cap Stocks). I then take the average of all three factors to reduce the influence of small cap stocks on the results

In [30]:
#Calculates factor for each portfolio
esg_factor_percentile_spx = getESGFactor(esg_scores_spx, percentile = 25, min_stocks = 50)
esg_factor_percentile_spcy = getESGFactor(esg_scores_spcy, percentile = 25, min_stocks = 50)
esg_factor_percentile_sp400 = getESGFactor(esg_scores_sp400, percentile = 25, min_stocks = 50)

#Subsets data
esg_factor_percentile_spx = esg_factor_percentile_spx[["Date", "ESG_Factor"]]
esg_factor_percentile_spcy = esg_factor_percentile_spcy[["Date", "ESG_Factor"]]
esg_factor_percentile_sp400 = esg_factor_percentile_sp400[["Date", "ESG_Factor"]]

#Renames column as prep for merging them
esg_factor_percentile_spx = esg_factor_percentile_spx.rename(columns={"ESG_Factor": "ESG_Factor_SPX"})
esg_factor_percentile_spcy = esg_factor_percentile_spcy.rename(columns={"ESG_Factor": "ESG_Factor_SPCY"})
esg_factor_percentile_sp400 = esg_factor_percentile_sp400.rename(columns={"ESG_Factor": "ESG_Factor_SP400"})

In [31]:
#Merging data
esg_factor_index_average = pd.merge(esg_factor_percentile_spx, esg_factor_percentile_spcy, on = "Date")
esg_factor_index_average = pd.merge(esg_factor_index_average, esg_factor_percentile_sp400, on = "Date")

In [32]:
#Calculating average esg factor
esg_factor_index_average["ESG_Factor_Index_Average"] = esg_factor_index_average[["ESG_Factor_SPX", "ESG_Factor_SPCY", "ESG_Factor_SP400"]].mean(axis = 1)

#Subsetting data
esg_factor_index_average = esg_factor_index_average[["Date", "ESG_Factor_Index_Average"]]

In [33]:
esg_factor_index_average.head()

Unnamed: 0,Date,ESG_Factor_Index_Average
0,2015-12-31,-0.002146
1,2016-01-31,-0.012111
2,2016-02-29,-0.007503
3,2016-03-31,-0.009521
4,2016-04-30,0.00724


In [34]:
esg_factor_percentile = getESGFactor(esg_scores, percentile = 25, min_stocks = 50)
esg_factor = pd.merge(esg_factor_percentile, esg_factor_index_average, on = "Date", how = "outer")

In [35]:
highest_stocks = getESGScorePercentile(esg_scores, stock_mcap, percentile = 25, high = True)
lowest_stocks = getESGScorePercentile(esg_scores, stock_mcap, percentile = 25, high = False)

#### Exporting Data

In [36]:
#Exports Data as CSV
esg_factor.to_csv("__data/esg_factor.csv", index=False)

highest_stocks.to_csv("__data/high_esg_portfolio.csv", index = True)
lowest_stocks.to_csv("__data/low_esg_portfolio.csv", index = True)