# **Part I: Creating ESG Factor**

### **Importing Packages**

In [1]:
import pandas as pd
import os
import numpy as np

#Inspecting Factor
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

#Edit Data
from pandas.tseries.offsets import MonthEnd
from functools import reduce #Get Monthly Data

#Regression
import statsmodels.api as sm #Regression

## **1. Importing & Preparing Data**

### **1.1 Stock Returns**

In [5]:
stock_returns = pd.read_excel("__data/Stock_Return_Data_Wide_Format.xlsx", "ReturnTotal")

In [6]:
#Divides Stock Return values by 100 to get decimal values
stock_returns.loc[:, stock_returns.columns != "Date"] = stock_returns.loc[:, stock_returns.columns != "Date"].apply(lambda x: x / 100)

#Set Date column as date
stock_returns["Date"] = pd.to_datetime(stock_returns["Date"])
stock_returns['Date'] = stock_returns["Date"].dt.date

#Set Date column as Index
stock_returns.set_index("Date", inplace=True)

In [7]:
stock_returns.head()

Unnamed: 0_level_0,A.N,AA.N,AAL.OQ,AAON.OQ,AAP.N,AAPL.OQ,AAT.N,ABBV.N,ABCB.N,ABG.N,...,YETI.N,YOU.N,YUM.N,ZBH.N,ZBRA.OQ,ZD.OQ,ZI.OQ,ZION.OQ,ZTS.N,ZWS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,0.15675,,,-0.132184,-0.02734,0.056005,,,-0.353586,-0.21663,...,,,-0.085414,-0.099456,-0.169299,-0.022954,,-0.391269,,
2009-02-28,-0.232854,,,-0.143488,0.168653,-0.009098,,,-0.351175,-0.203911,...,,,-0.081761,-0.037912,0.043969,-0.043412,,-0.370291,,
2009-03-31,0.108147,,,0.167526,0.075598,0.177024,,,-0.052314,0.512281,...,,,0.045662,0.042262,0.082527,0.168713,,0.049093,,
2009-04-30,0.188029,,,0.075055,0.064995,0.197013,,,0.390658,1.243619,...,,,0.221622,0.205205,0.117245,0.095934,,0.111902,,
2009-05-31,-0.001643,,,0.067762,-0.026514,0.079313,,,-0.042748,-0.01758,...,,,0.038381,0.01273,0.027294,-0.070446,,0.254319,,


### **1.2 Stock MCap**

In [8]:
stock_mcap = pd.read_excel("__data/Stock_Return_Data_Wide_Format.xlsx", "MCAP")

In [9]:
stock_mcap["Date"] = pd.to_datetime(stock_mcap["Date"])
stock_mcap['Date'] = stock_mcap["Date"].dt.date

stock_mcap.set_index("Date", inplace=True)

In [10]:
stock_mcap.head(7)

Unnamed: 0_level_0,A.N,AA.N,AAL.OQ,AAON.OQ,AAP.N,AAPL.OQ,AAT.N,ABBV.N,ABCB.N,ABG.N,...,YETI.N,YOU.N,YUM.N,ZBH.N,ZBRA.OQ,ZD.OQ,ZI.OQ,ZION.OQ,ZTS.N,ZWS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-31,,,,,,,,,,,...,,,,,,,,,,
2007-10-31,,,,,,,,,,,...,,,,,,,,,,
2007-12-31,,,,,,,,,,,...,,,,,,,,,,
2008-10-31,,,,,,,,,,,...,,,,,,,,,,
2008-11-30,,,,,,,,,,,...,,,,,,,,,,
2009-01-31,6364005000.0,,1655819000.0,311589100.0,3099246000.0,80265610000.0,,,103675000.0,114387000.0,...,,,13136580000.0,8174367000.0,1024301000.0,859274600.0,,1720945000.0,,
2009-02-28,4788705000.0,,1141133000.0,266707500.0,3621941000.0,79535350000.0,,,67460000.0,91062250.0,...,,,12087000000.0,7803839000.0,1064224000.0,823108200.0,,1080714000.0,,


### **1.3 ESG Scores**

In [11]:
esg_scores = pd.read_excel("__data/Stock_ESG_Data_Wide_Format.xlsx", "ESG")

In [12]:
#Set Date column as date
esg_scores["Date"] = pd.to_datetime(esg_scores["Date"])
esg_scores['Date'] = esg_scores["Date"].dt.date

#Set Date column as Index
esg_scores.set_index("Date", inplace=True)

esg_scores = esg_scores[esg_scores.index >= pd.to_datetime("2009-01-01").date()]

#Fill missing rows with previous values for esg score
esg_scores.fillna(method='ffill', inplace=True)

  esg_scores.fillna(method='ffill', inplace=True)


In [13]:
#The data between returns and ESG doesn't always match! Therefore we only keep matching columns
common_columns_returns = esg_scores.columns.intersection(stock_returns.columns)
esg_scores = esg_scores[common_columns_returns]

### **1.4 Fama-French Data**

In [14]:
ff5 = pd.read_csv("__data/F-F_Research_Data_5_Factors_2x3.csv", skiprows=3, index_col=0)

# Convert the index to datetime
ff5.index = pd.to_datetime(ff5.index, format='%Y%m') + MonthEnd(0)

# Remove any potential whitespace in column names
ff5.columns = ff5.columns.str.strip()

# Convert data to numeric, replacing any non-numeric values with NaN
for col in ff5.columns:
    ff5[col] = pd.to_numeric(ff5[col], errors='coerce')
    ff5[col] = ff5[col] / 100 #Divide by 100 to get actual "returns" as decimals

# Drop observations older than 2009-01-01
ff5 = ff5[ff5.index >= '2009-01-01']

# Create a new "Date" column from the index
ff5["Date"] = ff5.index
ff5['Date'] = ff5["Date"].dt.date

# Sort columns and drop index
ff5 = ff5[['Date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
ff5.reset_index(drop=True, inplace=True)

# Display the first few rows of the resulting dataframe
ff5.head()

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,2009-01-31,-0.0812,-0.0214,-0.1129,0.0018,-0.0116,0.0
1,2009-02-28,-0.101,-0.0133,-0.0695,0.012,-0.0102,0.0001
2,2009-03-31,0.0895,0.0067,0.0347,-0.0252,-0.0225,0.0002
3,2009-04-30,0.1018,0.0713,0.0536,0.0131,0.0012,0.0001
4,2009-05-31,0.0521,-0.0232,0.0028,-0.0078,-0.0216,0.0


### **1.5 Momentum Data**

In [15]:
mom = pd.read_csv("__data/F-F_Momentum_Factor.CSV", index_col = 0)

# Convert the index to datetime
mom.index = pd.to_datetime(mom.index, format='%Y%m') + MonthEnd(0)

# Remove any potential whitespace in column names
mom.columns = mom.columns.str.strip()

# Convert data to numeric, replacing any non-numeric values with NaN
for col in mom.columns:
    mom[col] = pd.to_numeric(mom[col], errors='coerce')
    mom[col] = mom[col] / 100 #Divide by 100 to get actual "returns" as decimals

# Drop observations older than 2009-01-01
mom = mom[mom.index >= '2009-01-01']

# Create a new "Date" column from the index
mom["Date"] = mom.index
mom['Date'] = mom["Date"].dt.date

mom["MOM"] = mom["Mom"]

mom = mom[["Date", "MOM"]]
mom.reset_index(drop=True, inplace=True)

# Display the first few rows of the resulting dataframe
mom.head()

Unnamed: 0,Date,MOM
0,2009-01-31,-0.0218
1,2009-02-28,0.0441
2,2009-03-31,-0.1187
3,2009-04-30,-0.343
4,2009-05-31,-0.1249


## **2. Calculating ESG Factors**

### **2.1 Functions**

In [16]:
def getESGScorePercentile(esg_score_data, mcap_data, percentile, high = True):
    stock_list = []

    #Loops over each date
    for date in esg_score_data.index:
        esg_scores = esg_score_data.loc[date] #Gets corresponding esg scores
        esg_scores = esg_scores.dropna(axis=0) #Drops any missing values

        #Drops Date column as we don't want to look at this
        esg_scores = esg_scores.drop(columns = ["Date"])

        #Creates Portfolio
        if high:
            percentile_stocks = esg_scores.nlargest(int(len(esg_scores) * percentile / 100)).index.tolist()
        else:
            percentile_stocks = esg_scores.nsmallest(int(len(esg_scores) * percentile / 100)).index.tolist()

        mcap = mcap_data.loc[date, percentile_stocks]
        total_mcap = mcap.sum() / 1000000000
        average_mcap = mcap.mean() / 1000000000

        #Counts stocks in portfolio
        count = len(percentile_stocks)

        #Appends to list
        stock_list.append((date, percentile_stocks, count, total_mcap, average_mcap))

    #Creates new dataframe
    stock_list_dataframe = pd.DataFrame(stock_list, columns=["Date", "Stock_List", "Stock_Count", "Stock_MCap_Total", "Stock_MCap_Average"])
    stock_list_dataframe.set_index("Date", inplace = True)

    #Returns dataframe
    return stock_list_dataframe

In [17]:
#Calculates POrtfolio return for a specific date & list of stocks
def getPortfolioReturn(return_data, date, list_stocks):
    returns = return_data.loc[date]
    returns = returns[list_stocks]

    return returns

#Calculates Return history
def calculateReturnHistory(return_data, portfolio_data, column_name_return, column_name_count, column_name_mcap_total, column_name_mcap_average):

    average_returns = []

    #Loops over each date
    for date in portfolio_data.index:

        #Gets list of stocks & count of stocks
        currentStockList = portfolio_data.loc[date]["Stock_List"]
        currentStockCount = portfolio_data.loc[date]["Stock_Count"]
        currentStockMCapTotal = portfolio_data.loc[date]["Stock_MCap_Total"]
        currentStockMCapAverage = portfolio_data.loc[date]["Stock_MCap_Average"]

        #Gets return of list of stocks at current date
        stock_returns = getPortfolioReturn(return_data, date, currentStockList)

        #Calculates average return (EQUAL WEIGHTED)
        if len(stock_returns) > 0:
            average_portfolio_return = stock_returns.mean()
        else:
            average_portfolio_return = float('nan')

        #Adds return to list
        average_returns.append((date, average_portfolio_return, currentStockCount, currentStockMCapTotal, currentStockMCapAverage))

    #Returns dataframe
    dataframe = pd.DataFrame(average_returns, columns=["Date", column_name_return, column_name_count, column_name_mcap_total, column_name_mcap_average])

    return dataframe

In [18]:
def getESGFactor(esg_score_data, percentile, min_stocks, mcap_data = stock_mcap):
  #Returns dataframe containing the ESG Portfolios for each date

  highest_stocks = []
  lowest_stocks = []

  highest_stocks = getESGScorePercentile(esg_score_data, mcap_data, percentile, high = True)
  lowest_stocks = getESGScorePercentile(esg_score_data, mcap_data, percentile, high = False)

  #Calculates the Average Return for each Portfolio at each Date
  highest_stocks_average_return = calculateReturnHistory(stock_returns, highest_stocks, "Average_Return_High", "Count_High", "MCap_Total_High", "MCap_Average_High")
  lowest_stocks_average_return = calculateReturnHistory(stock_returns, lowest_stocks, "Average_Return_Low", "Count_Low", "MCap_Total_Low", "MCap_Average_Low")

  #Merges Data together to have the data in one dataframe
  return_history = pd.merge(highest_stocks_average_return, lowest_stocks_average_return, on='Date', how='outer')

  #Calculates Factor for each Date
  return_history["ESG_Factor"] = return_history["Average_Return_Low"] - return_history["Average_Return_High"]

  return_history_used = return_history[return_history["Count_High"] > min_stocks].copy() #Only look at diversified portfolios
  #return_history_used = return_history

  return_history_used = return_history_used.reset_index(drop = True)

  return return_history_used

### **2.2 Calculating ESG Factor**

In [19]:
esg_factor = getESGFactor(esg_scores, percentile = 25, min_stocks = 50)

In [20]:
esg_factor.head()

Unnamed: 0,Date,Average_Return_High,Count_High,MCap_Total_High,MCap_Average_High,Average_Return_Low,Count_Low,MCap_Total_Low,MCap_Average_Low,ESG_Factor
0,2009-12-31,0.038894,154,6110.540896,39.678837,0.057903,154,908.712776,6.017965,0.019009
1,2010-01-31,-0.033412,156,5988.273673,38.38637,-0.030241,156,912.673663,5.965187,0.003171
2,2010-02-28,0.04341,156,6176.713433,39.594317,0.054302,156,963.517698,6.297501,0.010892
3,2010-03-31,0.069097,156,6533.051561,41.878536,0.070676,156,1005.762128,6.573609,0.001579
4,2010-04-30,0.02575,156,6633.206082,42.520552,0.045017,156,1022.315223,6.681799,0.019267


In [21]:
highest_stocks = getESGScorePercentile(esg_scores, stock_mcap, percentile = 25, high = True)
lowest_stocks = getESGScorePercentile(esg_scores, stock_mcap, percentile = 25, high = False)

### **2.3 Calculating Residual ESG Factor**

#### **Merge Data**

In [22]:
esg_factor_subsetted = esg_factor[["Date", "ESG_Factor"]]

data = reduce(lambda left, right: pd.merge(left, right, on = "Date"), [ff5, mom, esg_factor_subsetted])

In [23]:
data.tail()

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF,MOM,ESG_Factor
169,2024-01-31,0.007,-0.0568,-0.0247,0.0066,-0.0102,0.0047,0.0508,-0.018336
170,2024-02-29,0.0507,-0.0076,-0.0352,-0.0198,-0.0216,0.0042,0.0498,0.013514
171,2024-03-31,0.0283,-0.0118,0.0421,0.0147,0.0119,0.0043,-0.004,-0.017662
172,2024-04-30,-0.0467,-0.0256,-0.0052,0.0148,-0.003,0.0047,-0.0042,-0.019939
173,2024-05-31,0.0434,0.0076,-0.0166,0.0298,-0.0307,0.0044,-0.0002,0.022154


#### **Run Residual Regression**

In [24]:
Y = data["ESG_Factor"]
X = data[["Mkt-RF", "SMB", "HML", "RMW", "CMA", "MOM"]]
#X = data[["SMB"]]
X = sm.add_constant(X)

model = sm.OLS(Y, X).fit()

coefficients = model.params
coefficients.head(10)

Unnamed: 0,0
const,0.004696
Mkt-RF,0.019642
SMB,0.443913
HML,-0.053231
RMW,-0.127206
CMA,-0.187565
MOM,0.066217


In [25]:
data["ESG_Factor_Residual"] = Y - model.predict(X)

In [26]:
data.tail()

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF,MOM,ESG_Factor,ESG_Factor_Residual
169,2024-01-31,0.007,-0.0568,-0.0247,0.0066,-0.0102,0.0047,0.0508,-0.018336,-0.003708
170,2024-02-29,0.0507,-0.0076,-0.0352,-0.0198,-0.0216,0.0042,0.0498,0.013514,-0.000546
171,2024-03-31,0.0283,-0.0118,0.0421,0.0147,0.0119,0.0043,-0.004,-0.017662,-0.011068
172,2024-04-30,-0.0467,-0.0256,-0.0052,0.0148,-0.003,0.0047,-0.0042,-0.019939,-0.011032
173,2024-05-31,0.0434,0.0076,-0.0166,0.0298,-0.0307,0.0044,-0.0002,0.022154,0.010394


#### **Merge Data back to esg_factor**

In [27]:
data_subsetted = data[["Date", "ESG_Factor_Residual"]]

In [28]:
esg_factor_final = reduce(lambda left, right: pd.merge(left, right, on = "Date"), [esg_factor, data_subsetted])

In [29]:
esg_factor_final.head()

Unnamed: 0,Date,Average_Return_High,Count_High,MCap_Total_High,MCap_Average_High,Average_Return_Low,Count_Low,MCap_Total_Low,MCap_Average_Low,ESG_Factor,ESG_Factor_Residual
0,2009-12-31,0.038894,154,6110.540896,39.678837,0.057903,154,908.712776,6.017965,0.019009,-0.014877
1,2010-01-31,-0.033412,156,5988.273673,38.38637,-0.030241,156,912.673663,5.965187,0.003171,0.000677
2,2010-02-28,0.04341,156,6176.713433,39.594317,0.054302,156,963.517698,6.297501,0.010892,0.000383
3,2010-03-31,0.069097,156,6533.051561,41.878536,0.070676,156,1005.762128,6.573609,0.001579,-0.011539
4,2010-04-30,0.02575,156,6633.206082,42.520552,0.045017,156,1022.315223,6.681799,0.019267,-0.004379


## **3. Exporting Data**

In [30]:
#Exports Data as CSV
esg_factor_final.to_csv("__data/esg_factor.csv", index=False)

highest_stocks.to_csv("__data/high_esg_portfolio.csv", index = True)
lowest_stocks.to_csv("__data/low_esg_portfolio.csv", index = True)