In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from scipy.optimize import minimize
import prettytable
import plotly.graph_objects as go
import os
import math
import plotly.express as px

random.seed(0) # Setting random seed to 0
%matplotlib inline


### Sustainable and Entrepreneurial Finance

### Assignment 2 - Portfolio decarbonization

#### Group 8 - Energy Firms With Available Scope 1 to 3 emissions (TRUCOST)

- Charlotte Ahrens
- David Campbell
- Guillaume Rico
- Per Christian Wessel


### 0 Importing and preparing datasets

In [None]:
# Setting path names
github_path = 'https://github.com/percw/Sustainable_and_Entrepreneurial_Finance/blob/master'

# Scope 1-3 Intensity Paths
path_scope1_intensity = f'{github_path}/Data_Excel/Trucost_CO2emissions/scope1intensity.xlsx?raw=true'
path_scope2_intensity = f'{github_path}/Data_Excel/Trucost_CO2emissions/scope2intensity.xlsx?raw=true'
path_scope3_intensity = f'{github_path}/Data_Excel/Trucost_CO2emissions/scope3intensity.xlsx?raw=true'

# Reading df's
df_scope1_intensity = pd.read_excel(path_scope1_intensity)
df_scope2_intensity = pd.read_excel(path_scope2_intensity)
df_scope3_intensity = pd.read_excel(path_scope3_intensity)

# Dataframe created during Homework 1 (corresponding to Q2 First df)
path_all_energy_monthly_yearly_returns_cap = f'{github_path}/output/all_energy_monthly_yearly_returns_cap.csv?raw=true'
df_all_energy_monthly_yearly_returns_cap = pd.read_csv(path_all_energy_monthly_yearly_returns_cap)

# Dataframe created during Homework 1 (corresponding to Q2 Second, comparison df with value weighted and equally weighted portfolio)
path_all_energy_monthly_yearly_returns_cap_vw = f'{github_path}/output/df_all_energy_monthly_yearly_returns_cap_vw.csv?raw=true'
df_all_energy_monthly_yearly_returns_cap_vw = pd.read_csv(path_all_energy_monthly_yearly_returns_cap_vw)


A little cleaning of the double index column, thus removing `Unnamed: 0`.

In [None]:
df_all_energy_monthly_yearly_returns_cap.drop(columns={'Unnamed: 0'}, inplace=True)
display(df_all_energy_monthly_yearly_returns_cap.head())

Checking out the intensity data and removing the x in the column name

In [None]:
df_scope1_intensity.describe()

# Replacing the x before every year
df_scope1_intensity.columns = df_scope1_intensity.columns.str.replace('x','')
display(df_scope1_intensity.head())
display(df_scope1_intensity.shape)
df_scope1_intensity.isna().sum()

We can see that up until 2005 we have only NaNs.

Lets create a function that takes in a dataframe and transforms all the different year columns into one column named year, and let each ISIN/company have as many observations as years.

In [None]:
def transform_emission_dataframe(df: pd.DataFrame, id_vars: list, var_name: str, value_name: str) -> pd.DataFrame:
    '''
    Function that takes in a df frame and melts it on given column names
    Returns a dataframe
    '''
    # Melt the dataframe to convert it from wide to long format
    df_long = df.melt(id_vars=id_vars,
                      var_name=var_name, value_name=value_name)

    # Convert the year column to integer data type
    df_long['year'] = df_long['year'].astype(int)

    # Drop rows with missing values
    df_long.dropna(inplace=True)

    # Sort the dataframe by ISIN and year
    df_long.sort_values(['year', 'ISIN'], inplace=True)

    # Reset the index
    df_long.reset_index(drop=True, inplace=True)

    # Print the resulting dataframe
    return df_long


In [None]:
transformed_scope1 = transform_emission_dataframe(df_scope1_intensity, id_vars=['ISIN', 'NAME'], var_name='year', value_name='Scope_1')
display(transformed_scope1.head())
display(transformed_scope1.shape)

This looks good. Let's do the same with the Scope 2 and Scope 3 emission intensities as well. First we need to remember to remove the x before all the years.

In [None]:
# Removing x from column name
df_scope2_intensity.columns = df_scope2_intensity.columns.str.replace('x','')
df_scope3_intensity.columns = df_scope3_intensity.columns.str.replace('x','')

# Transforming data
transformed_scope2 = transform_emission_dataframe(df_scope2_intensity, id_vars=['ISIN', 'NAME'], var_name='year', value_name='Scope_2')
transformed_scope3 = transform_emission_dataframe(df_scope3_intensity, id_vars=['ISIN', 'NAME'], var_name='year', value_name='Scope_3')

# Displaying data
display(transformed_scope2.head())
display(transformed_scope2.shape)
display(transformed_scope3.head())
display(transformed_scope3.shape)

Great, now we need to make sure that we only extract the energy companies with sector 'GICSIG'.
This we can do by creating a list of all the companies in the dataframe `df_all_energy_monthly_yearly_returns_cap`.

In [None]:
energy_isin = df_all_energy_monthly_yearly_returns_cap['ISIN'].unique().tolist()
len(energy_isin)

We have 185 companies different companies, which is coherent with Homework 1. Now we only want to extract the Scope 1-3 data on the companies that have an `ISIN` matching one of the elements in our `energy_isin` list.

In [None]:
transformed_scope1_nrg = transformed_scope1[transformed_scope1['ISIN'].isin(energy_isin)].copy()
transformed_scope2_nrg = transformed_scope2[transformed_scope2['ISIN'].isin(energy_isin)].copy()
transformed_scope3_nrg = transformed_scope3[transformed_scope3['ISIN'].isin(energy_isin)].copy()
transformed_scope1_nrg.head()

In [None]:
observation_scope1 = transformed_scope1_nrg[['ISIN', 'year']].groupby(['year']).count()
observation_scope2 = transformed_scope2_nrg[['ISIN', 'year']].groupby(['year']).count()
observation_scope3 = transformed_scope3_nrg[['ISIN', 'year']].groupby(['year']).count()

print(observation_scope1.equals(observation_scope2) & observation_scope2.equals(observation_scope3))

Great, we have the same number of observations for each year. Thats promising.

Now we can merge the data into our dataset created in Homework 1

In [None]:
# Removing NAME column

transformed_scope1_nrg.drop(columns={'NAME'}, inplace=True)
transformed_scope2_nrg.drop(columns={'NAME'}, inplace=True)
transformed_scope3_nrg.drop(columns={'NAME'}, inplace=True)

In [None]:
# Merging the dataframes 
df_nrg_intensity = df_all_energy_monthly_yearly_returns_cap.copy()
df_nrg_intensity = pd.merge(df_nrg_intensity, transformed_scope1_nrg, on=['ISIN', 'year'], how='left')	
df_nrg_intensity = pd.merge(df_nrg_intensity, transformed_scope2_nrg, on=['ISIN', 'year'], how='left')	
df_nrg_intensity = pd.merge(df_nrg_intensity, transformed_scope3_nrg, on=['ISIN', 'year'], how='left')	
df_nrg_intensity.head()

### Q1

Report summary statistics (mean, median, min, max, standard deviation) on the cross-sectional distribution of your group’s variable of interest (i.e. environmental score for groups 1, 9 and 11; social score for groups 2 and 10; governance score for group 3; carbon intensity for groups 4 to 8). Draw the histogram of the cross-sectional distribution of the variable of interest and comment on the summary statistics and the histogram. (10 points)

In [None]:
sns.histplot(df_nrg_intensity, x='Scope_1')
plt.title('Energy Companies Emission Intensity\nScope 1')
plt.xlabel('Tons Co2eq/ Million $')
plt.show()


In [None]:
sns.histplot(df_nrg_intensity, x='Scope_2')
plt.title('Energy Companies Emission Intensity\nScope 2')
plt.xlabel('Tons Co2eq/ Million $')
plt.show()


In [None]:
sns.histplot(df_nrg_intensity, x='Scope_3')
plt.title('Energy Companies Emission Intensity\nScope 3')
plt.xlabel('Tons Co2eq/ Million $')
plt.show()


### Q2

In Question 4 of Homework 1, you calculated efficient portfolios with various target returns. Take these portfolios, calculate and report the weighted-average E/S/G score or weighted-average carbon intensity of these portfolios (you can take the average score/carbon intensity for each firm over time). Comment on the E/S/G score or carbon intensity of the portfolios. Which firms (e.g. top 10; report firm names along with ISIN) are driving the E/S/G score down or driving the carbon intensity up? Plot on the volatility-E/S/G score (carbon intensity) space the various portfolios (i.e., make a plot similar to the efficient frontier except that E/S/G score or carbon intensity replaces the return on the y-axis). (15 points)



### Q3

This question is a follow-up of Question 7 of Homework 1. First, take the same 100 selected firms. Then, create a minimum variance portfolio with monthly rebalancing with an additional constraint: you exclude the worst firms in terms of E/S/G score/most polluting (high carbon intensity) firms. Specifically, exclude the bottom tercile of the distribution in month t − 1 for ESG scores or exclude the top tercile of the distribution in month t − 1 for the carbon intensity. Report summary statistics on the performance (return, risk, Sharpe ratio) of this portfolio as well as its E/S/G score or carbon intensity. How do the performance measures (return, risk, Sharpe ratio) compare with the minimum variance portfolio from Question 3 of Homework 1. (20 points)

### Q4 

For each month, sort firms based on your group’s variable of interest (E/S/G scores or carbon intensity) into quintiles. Create equally-weighted and value-weighted portfolios for each time period and each score or carbon intensity quintile. Report the average returns for each quintile portfolio as well as a portfolio that goes long in the highest quintile and short the lowest quintile. Comment on your results. What can explain the relationship between the return of your portfolios and firms’ ESG score or carbon emissions? (25 points)

### Q5

Take the minimum variance portfolio from Question 3 of Homework 1 and calculate its E/S/G score or carbon intensity. Reallocate its composition in order to improve the E/S/G score by 20%/reduce carbon intensity by 50% (see optimization problem below). Comment on the changes it took in order to improve the ESG score/carbon intensity (e.g., how many and which firms (firm names) had to be removed in the most recent year of your sample in order to achieve these objectives). (30 points)

$$\min_{\alpha} \quad \alpha' \sum{\alpha}$$
$$\text{s.a.} \quad \alpha' e = 1$$
$$\quad \quad \alpha'\text{CI} \leq 0.5 \times \left( \alpha' CI \right)_{Q3-Homework1}$$