### Imports

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from numpy import mean
from numpy import std
import statsmodels.api as sm
import math
%matplotlib inline

In [2]:
df_original = pd.read_csv("predicting_carbon_emissions.csv")
df = df_original.copy()

### Cleaning

In [3]:
pd.set_option('display.max_columns', 999)

In [4]:
#change column names to not have brackets
df.columns = df.columns.str.strip().str.replace('(', '').str.replace(')', '')

In [5]:
#drop columns
columns_to_drop = ['Ticker', 'ISIN','Account ID']
df.drop(columns_to_drop, axis=1, inplace=True)

#### Creating Reported Scope Columns (can be changed into classes)

In [6]:
#change values in Reported Scope 1 column to not have , or -, then change to floats. replace null values with 0
df["Reported Scope 1 tCO2e"] = df["Reported Scope 1 tCO2e"].str.strip().str.replace(',', '').str.replace('-','0').astype(float)
df["Reported Scope 1 tCO2e"].fillna(0,inplace=True)

In [7]:
#change values in Estimated Scope 1 column to not have , or -, then change to floats. replace null values with 0
df["Estimated Scope 1 tCO2e"] = df["Estimated Scope 1 tCO2e"].astype(str).str.strip().str.replace(',', '').str.replace('-','0').astype(float)
df["Estimated Scope 1 tCO2e"].fillna(0,inplace=True)

In [8]:
#create column for scope
df['Scope 1 tCO2e'] = df["Reported Scope 1 tCO2e"]+df["Estimated Scope 1 tCO2e"]

In [9]:
#change values in Reported Scope 1 column to not have , or -, then change to floats. replace null values with 0
df["Reported location-based Scope 2 tCO2e"] = df["Reported location-based Scope 2 tCO2e"].str.strip().str.replace(',', '').str.replace('-','0').astype(float)
df["Reported location-based Scope 2 tCO2e"].fillna(0,inplace=True)

In [10]:
#change values in Estimated Scope 1 column to not have , or -, then change to floats. replace null values with 0
df["Estimated location-based Scope 2 tCO2e"] = df["Estimated location-based Scope 2 tCO2e"].astype(str).str.strip().str.replace(',', '').str.replace('-','0').astype(float)
df["Estimated location-based Scope 2 tCO2e"].fillna(0,inplace=True)

In [11]:
#create column for scope
df['Scope 2 tCO2e'] = df["Reported location-based Scope 2 tCO2e"]+df["Estimated location-based Scope 2 tCO2e"]

In [12]:
df['Scope 1 and 2 tCO2e'] = df["Scope 1 tCO2e"]+df["Scope 2 tCO2e"]

In [13]:
df.head(2)

Unnamed: 0,Company Name,Listed,HQ Country,GICS Sector,Accounting Year End,Reported Scope 1 tCO2e,Estimated Scope 1 tCO2e,Scope 1 source,Scope 1 quality flag,Reported market-based Scope 2 tCO2e,Note,Reported location-based Scope 2 tCO2e,Estimated location-based Scope 2 tCO2e,Location-based Scope 2 source,Location-based Scope 2 quality flag,Scope 2 company comments,GHG revenue intensity tCO2e/mmUSD,Scope 2 figure used for intensity,amplification.wordsindex,DeAmplification.wordsindex,HE_negativeindex,HE_positiveindex,angerindex,anticipationindex,disgustindex,fearindex,joyindex,sadnessindex,surpriseindex,trustindex,LMnegativeindex,LMpositiveindex,LMuncertaintyindex,BasicNegativeWordsindex,PositivePolarisedindex,NegativePolarisedindex,PowerWordsindex,PrudentialNegativeindex,PrudentialPositiveindex,PrudentialTopNegativeindex,Strategyindex,Financial.termsindex,Regulationindex,Technologyindex,Healthcareindex,Environmentindex,Socialindex,Governanceindex,ESGindex,LMuncertaintyindex.1,Regulationindex.1,Environmentindex.1,Socialindex.1,Governanceindex.1,ESGindex.1,ISIN lower,2016vol,2017vol,2018vol,2019vol,VolFactor,Carbon Emission tCO2e/mmUSD,Scope 1 tCO2e,Scope 2 tCO2e,Scope 1 and 2 tCO2e
0,Agencia de Proteccion Ambiental,Yes,,,29/09/2016,0.65,0.0,Reported: Company Filings,,,,0.0,0.0,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,,,,,,0.0,539.281077,0.65,0.0,0.65
1,Sociedad Comercial del Plata SA,Yes,Argentina,Energy,31/12/2016,0.0,1400.0,Estimated: Gamma GLM,2.0,,,0.0,3000.0,Estimated: IEA National Grid Emissions Factor ...,3.0,,26.03,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,arp290071462,0.023033,0.023079,0.033899,0.0217,0.178446,26.030882,1400.0,3000.0,4400.0


#### Creating Dummy Columns with Binary of GICS Sectors

In [14]:
df = pd.concat([df,pd.get_dummies(df['GICS Sector'])], axis=1)
df.head(2)

Unnamed: 0,Company Name,Listed,HQ Country,GICS Sector,Accounting Year End,Reported Scope 1 tCO2e,Estimated Scope 1 tCO2e,Scope 1 source,Scope 1 quality flag,Reported market-based Scope 2 tCO2e,Note,Reported location-based Scope 2 tCO2e,Estimated location-based Scope 2 tCO2e,Location-based Scope 2 source,Location-based Scope 2 quality flag,Scope 2 company comments,GHG revenue intensity tCO2e/mmUSD,Scope 2 figure used for intensity,amplification.wordsindex,DeAmplification.wordsindex,HE_negativeindex,HE_positiveindex,angerindex,anticipationindex,disgustindex,fearindex,joyindex,sadnessindex,surpriseindex,trustindex,LMnegativeindex,LMpositiveindex,LMuncertaintyindex,BasicNegativeWordsindex,PositivePolarisedindex,NegativePolarisedindex,PowerWordsindex,PrudentialNegativeindex,PrudentialPositiveindex,PrudentialTopNegativeindex,Strategyindex,Financial.termsindex,Regulationindex,Technologyindex,Healthcareindex,Environmentindex,Socialindex,Governanceindex,ESGindex,LMuncertaintyindex.1,Regulationindex.1,Environmentindex.1,Socialindex.1,Governanceindex.1,ESGindex.1,ISIN lower,2016vol,2017vol,2018vol,2019vol,VolFactor,Carbon Emission tCO2e/mmUSD,Scope 1 tCO2e,Scope 2 tCO2e,Scope 1 and 2 tCO2e,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Telecommunication Services,Utilities
0,Agencia de Proteccion Ambiental,Yes,,,29/09/2016,0.65,0.0,Reported: Company Filings,,,,0.0,0.0,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,,,,,,0.0,539.281077,0.65,0.0,0.65,0,0,0,0,0,0,0,0,0,0,0
1,Sociedad Comercial del Plata SA,Yes,Argentina,Energy,31/12/2016,0.0,1400.0,Estimated: Gamma GLM,2.0,,,0.0,3000.0,Estimated: IEA National Grid Emissions Factor ...,3.0,,26.03,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,arp290071462,0.023033,0.023079,0.033899,0.0217,0.178446,26.030882,1400.0,3000.0,4400.0,0,0,1,0,0,0,0,0,0,0,0


#### Removing Agencia de Proteccion Ambiental due to Nulls

In [15]:
df.drop(df.index[0],inplace=True)

In [19]:
df.head(2)

Unnamed: 0,Company Name,Listed,HQ Country,GICS Sector,Accounting Year End,Reported Scope 1 tCO2e,Estimated Scope 1 tCO2e,Scope 1 source,Scope 1 quality flag,Reported market-based Scope 2 tCO2e,Note,Reported location-based Scope 2 tCO2e,Estimated location-based Scope 2 tCO2e,Location-based Scope 2 source,Location-based Scope 2 quality flag,Scope 2 company comments,GHG revenue intensity tCO2e/mmUSD,Scope 2 figure used for intensity,amplification.wordsindex,DeAmplification.wordsindex,HE_negativeindex,HE_positiveindex,angerindex,anticipationindex,disgustindex,fearindex,joyindex,sadnessindex,surpriseindex,trustindex,LMnegativeindex,LMpositiveindex,LMuncertaintyindex,BasicNegativeWordsindex,PositivePolarisedindex,NegativePolarisedindex,PowerWordsindex,PrudentialNegativeindex,PrudentialPositiveindex,PrudentialTopNegativeindex,Strategyindex,Financial.termsindex,Regulationindex,Technologyindex,Healthcareindex,Environmentindex,Socialindex,Governanceindex,ESGindex,LMuncertaintyindex.1,Regulationindex.1,Environmentindex.1,Socialindex.1,Governanceindex.1,ESGindex.1,ISIN lower,2016vol,2017vol,2018vol,2019vol,VolFactor,Carbon Emission tCO2e/mmUSD,Scope 1 tCO2e,Scope 2 tCO2e,Scope 1 and 2 tCO2e,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Telecommunication Services,Utilities
1,Sociedad Comercial del Plata SA,Yes,Argentina,Energy,31/12/2016,0.0,1400.0,Estimated: Gamma GLM,2.0,,,0.0,3000.0,Estimated: IEA National Grid Emissions Factor ...,3.0,,26.03,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,arp290071462,0.023033,0.023079,0.033899,0.0217,0.178446,26.030882,1400.0,3000.0,4400.0,0,0,1,0,0,0,0,0,0,0,0
2,Banco de Galicia y Buenos Aires S.A.,Yes,Argentina,Financials,31/12/2016,933.37,0.0,Reported: CDP (Not Reviewed),,,,27626.0,0.0,Reported: CDP (Not Reviewed),,,7.97,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,,,,,,0.0,7.968841,933.37,27626.0,28559.37,0,0,0,1,0,0,0,0,0,0,0


In [32]:
Nans = df[df["Scope 2 figure used for intensity"]!="Location-based"]
Nans_2 = Nans[Nans["Scope 2 figure used for intensity"]!="Market-based"]
len(Nans_2)

46

In [43]:
len(df)

3986

In [44]:
df = df.dropna(subset=['Scope 2 figure used for intensity'], how='any')

In [45]:
len(df)

3940

In [50]:
df.to_csv("predicting_carbon_emissions_cleaned.csv")

In [49]:
df.head(30)

Unnamed: 0,Company Name,Listed,HQ Country,GICS Sector,Accounting Year End,Reported Scope 1 tCO2e,Estimated Scope 1 tCO2e,Scope 1 source,Scope 1 quality flag,Reported market-based Scope 2 tCO2e,Note,Reported location-based Scope 2 tCO2e,Estimated location-based Scope 2 tCO2e,Location-based Scope 2 source,Location-based Scope 2 quality flag,Scope 2 company comments,GHG revenue intensity tCO2e/mmUSD,Scope 2 figure used for intensity,amplification.wordsindex,DeAmplification.wordsindex,HE_negativeindex,HE_positiveindex,angerindex,anticipationindex,disgustindex,fearindex,joyindex,sadnessindex,surpriseindex,trustindex,LMnegativeindex,LMpositiveindex,LMuncertaintyindex,BasicNegativeWordsindex,PositivePolarisedindex,NegativePolarisedindex,PowerWordsindex,PrudentialNegativeindex,PrudentialPositiveindex,PrudentialTopNegativeindex,Strategyindex,Financial.termsindex,Regulationindex,Technologyindex,Healthcareindex,Environmentindex,Socialindex,Governanceindex,ESGindex,LMuncertaintyindex.1,Regulationindex.1,Environmentindex.1,Socialindex.1,Governanceindex.1,ESGindex.1,ISIN lower,2016vol,2017vol,2018vol,2019vol,VolFactor,Carbon Emission tCO2e/mmUSD,Scope 1 tCO2e,Scope 2 tCO2e,Scope 1 and 2 tCO2e,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Telecommunication Services,Utilities
1,Sociedad Comercial del Plata SA,Yes,Argentina,Energy,31/12/2016,0.0,1400.0,Estimated: Gamma GLM,2.0,,,0.0,3000.0,Estimated: IEA National Grid Emissions Factor ...,3.0,,26.03,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,arp290071462,0.023033,0.023079,0.033899,0.0217,0.178446,26.030882,1400.0,3000.0,4400.0,0,0,1,0,0,0,0,0,0,0,0
2,Banco de Galicia y Buenos Aires S.A.,Yes,Argentina,Financials,31/12/2016,933.37,0.0,Reported: CDP (Not Reviewed),,,,27626.0,0.0,Reported: CDP (Not Reviewed),,,7.97,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,,,,,,0.0,7.968841,933.37,27626.0,28559.37,0,0,0,1,0,0,0,0,0,0,0
3,YPF SA,Yes,Argentina,Energy,31/12/2016,0.0,12000000.0,"Estimated: Oil & Gas Extraction, O&G Refining",6.0,,,0.0,1200000.0,"Estimated: Oil & Gas Extraction, O&G Refining",6.0,,926.11,Location-based,48,84,165,110,102,123,221,87,69,86,102,72,89,62,75,98,73,98,94,55,89,170,0,98,111,14,0,87,26,130,90,0.23,0.12,0.23,0.07,0.13,0.27,arp9897x1319,0.024063,0.021263,0.03065,0.029235,0.064662,926.11172,12000000.0,1200000.0,13200000.0,0,0,1,0,0,0,0,0,0,0,0
4,Australia Post,Yes,Australia,Industrials,30/06/2016,119338.0,0.0,Reported: CDP (Not Reviewed),,,,169447.0,0.0,Reported: CDP (Not Reviewed),,Total scope 2 emissions have been calculated u...,59.91,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,0,,,,,0.0,59.908367,119338.0,169447.0,288785.0,0,0,0,0,0,1,0,0,0,0,0
5,AGL Energy,Yes,Australia,Utilities,30/06/2016,43258798.0,0.0,Reported: CDP (Reviewed),7.0,526294.7,,526700.0,0.0,Reported: CDP (Reviewed),7.0,Market-based scope 2 emissions were calculated...,5390.04,Market-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,au000000agl7,0.013876,0.012527,0.013894,0.013356,0.492732,5390.042347,43258798.0,526700.0,43785498.0,0,0,0,0,0,0,0,0,0,0,1
6,Aristocrat Leisure,Yes,Australia,Consumer Discretionary,29/09/2016,0.0,21000.0,Estimated: Gamma GLM,3.0,,,0.0,54000.0,Estimated: IEA National Grid Emissions Factor ...,2.0,,47.82,Location-based,114,35,20,143,135,98,76,99,112,64,89,94,73,119,70,71,113,71,87,86,94,0,316,99,75,266,0,65,1375,70,451,0.26,0.24,0.0,0.15,0.01,0.14,au000000all7,0.019772,0.013992,0.018362,0.019044,0.254386,47.820958,21000.0,54000.0,75000.0,1,0,0,0,0,0,0,0,0,0,0
7,ALS,Yes,Australia,Industrials,30/03/2017,0.0,12000.0,Estimated: Gamma GLM,4.0,,,0.0,3700.0,Estimated: IEA National Grid Emissions Factor ...,5.0,,16.39,Location-based,123,190,71,78,115,78,145,87,83,64,86,79,106,93,113,84,103,84,88,98,153,364,31,90,36,10,75,186,0,40,80,0.08,0.19,0.25,0.21,0.15,0.11,,,,,,0.0,16.392587,12000.0,3700.0,15700.0,0,0,0,0,0,1,0,0,0,0,0
8,Amcor,Yes,Australia,Materials,30/06/2016,357318.0,0.0,Reported: CDP (Reviewed),7.0,1221815.0,,1222218.0,0.0,Reported: CDP (Reviewed),7.0,Amcor is currently purchasing green electricit...,167.61,Market-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,au000000amc4,0.017259,0.010541,0.011098,0.008158,0.659649,167.613068,357318.0,1222218.0,1579536.0,0,0,0,0,0,0,0,1,0,0,0
9,AMP,Yes,Australia,Financials,31/12/2016,69.0,0.0,Reported: CDP (Reviewed),7.0,,,11789.0,0.0,Reported: CDP (Reviewed),7.0,,1.08,Location-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,au000000amp6,0.01741,0.011279,0.023377,0.026657,0.085965,1.078998,69.0,11789.0,11858.0,0,0,0,1,0,0,0,0,0,0,0
10,Ansell,Yes,Australia,Health Care,01/06/2017,141634.0,0.0,Reported: CDP (Not Reviewed),,111773.0,,111512.0,0.0,Reported: CDP (Not Reviewed),,,184.36,Market-based,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.32,0.32,0.25,0.28,0.31,0.32,,,,,,0.0,184.363041,141634.0,111512.0,253146.0,0,0,0,0,1,0,0,0,0,0,0


In [None]:
#drop columns
columns_to_drop = ['Reported Scope 1 tCO2e', 'Estimated Scope 1 tCO2e','Account ID']
df.drop(columns_to_drop, axis=1, inplace=True)

In [308]:
df.groupby(["GICS Sector"]).mean()
# df.columns = [col[0] for col in df]
# df.reset_index(inplace=True)
# df

Unnamed: 0_level_0,Reported Scope 1 tCO2e,Estimated Scope 1 tCO2e,Scope 1 quality flag,Location-based Scope 2 quality flag,GHG revenue intensity tCO2e/mmUSD,amplification.wordsindex,DeAmplification.wordsindex,HE_negativeindex,HE_positiveindex,angerindex,anticipationindex,disgustindex,fearindex,joyindex,sadnessindex,surpriseindex,trustindex,LMnegativeindex,LMpositiveindex,LMuncertaintyindex,BasicNegativeWordsindex,PositivePolarisedindex,NegativePolarisedindex,PowerWordsindex,PrudentialNegativeindex,PrudentialPositiveindex,Financial.termsindex,Regulationindex,Technologyindex,Governanceindex,ESGindex,LMuncertaintyindex.1,Regulationindex.1,Environmentindex.1,Socialindex.1,Governanceindex.1,ESGindex.1,2016vol,2017vol,2018vol,2019vol,VolFactor,Carbon Emission tCO2e/mmUSD,Scope 1 tCO2e
GICS Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
Consumer Discretionary,126671.0,33328.71,5.278689,5.423497,159.973818,29.209651,28.831947,26.249584,30.905158,25.933444,30.143095,27.442596,25.036606,30.53411,27.935108,30.18802,27.833611,26.396007,31.31614,24.417637,25.279534,30.55574,25.279534,27.219634,22.923461,27.8802,26.259567,24.585691,31.81198,20.883527,17.85025,0.282729,0.279551,0.225058,0.251448,0.2802,0.27995,0.021697,0.0167,0.021436,0.021313,0.169823,165.653798,159999.7
Consumer Staples,395150.8,96521.25,5.82684,5.822511,131.156504,28.339943,34.382436,31.512748,33.665722,25.215297,29.623229,28.084986,26.011331,32.220963,28.535411,29.730878,28.311615,26.433428,32.98017,23.76204,26.206799,31.33711,26.206799,29.0,22.385269,28.767705,27.461756,24.98017,16.263456,18.172805,23.050992,0.284221,0.277592,0.220907,0.252748,0.273286,0.281586,0.018344,0.014756,0.018089,0.018006,0.226121,135.781283,491672.1
Energy,3741973.0,5297289.0,6.012245,6.016327,1371.028621,35.337121,33.261364,38.75,34.155303,46.07197,40.840909,47.609848,38.655303,38.291667,36.253788,35.075758,37.768939,36.306818,35.871212,38.208333,38.193182,37.731061,38.193182,37.643939,33.67803,40.333333,39.518939,26.80303,21.80303,32.401515,54.564394,0.253636,0.256136,0.217841,0.234053,0.237045,0.25928,0.033453,0.021789,0.02525,0.025582,0.178217,1361.57702,9039262.0
Financials,8679.879,141646.3,5.899761,5.904077,18.74467,37.629237,41.069915,47.233051,41.53178,34.461864,38.684322,31.349576,40.03178,37.815678,42.364407,38.294492,42.625,42.870763,38.408898,41.09322,40.65678,38.082627,40.65678,40.275424,58.307203,28.021186,46.779661,59.555085,33.449153,49.341102,33.722458,0.243517,0.249216,0.209767,0.216589,0.252076,0.216356,0.0201,0.015187,0.016814,0.015657,0.354114,22.05336,150326.1
Health Care,63945.95,32816.07,5.805882,5.841176,191.4775,42.881517,40.772512,32.952607,43.454976,37.966825,41.436019,44.748815,42.7109,39.052133,43.630332,42.099526,40.729858,40.151659,42.327014,41.682464,36.393365,44.07109,36.393365,41.042654,31.35545,45.815166,35.374408,41.545024,38.454976,39.725118,41.654028,0.245498,0.262796,0.197109,0.222038,0.246493,0.269384,0.019774,0.015579,0.019308,0.018165,0.240341,196.422317,96762.03
Industrials,1233384.0,496525.8,5.792079,5.76808,274.62627,26.450644,33.048641,27.844063,31.333333,29.653791,28.855508,27.653791,27.959943,28.426323,29.755365,29.246066,28.509299,27.188841,29.334764,25.266094,26.519313,29.080114,26.519313,29.350501,24.871245,27.616595,27.909871,25.100143,22.905579,19.493562,28.573677,0.279127,0.275637,0.223104,0.252389,0.268011,0.276109,0.020997,0.016315,0.020054,0.019444,0.205954,278.412743,1729909.0
Information Technology,125088.3,12642.43,5.906667,5.986667,83.22995,33.538084,31.678133,27.911548,36.683047,33.17199,34.624079,29.965602,30.304668,34.609337,30.717445,33.751843,33.773956,31.186732,35.058968,32.29484,30.80344,35.479115,30.80344,34.658477,24.872236,34.130221,29.68059,28.051597,86.766585,25.437346,25.240786,0.265799,0.273268,0.220344,0.242924,0.269951,0.280221,0.020679,0.016273,0.021076,0.021002,0.164131,93.314674,137730.7
Materials,3240336.0,1948045.0,6.00597,6.017964,1189.339353,27.305732,30.254777,28.110403,28.537155,31.713376,30.021231,26.082803,28.142251,27.121019,26.40552,28.893843,28.193206,29.484076,27.751592,27.953291,29.078556,28.609342,29.078556,31.152866,21.687898,39.169851,27.205945,21.515924,12.997877,18.269639,34.464968,0.271019,0.267346,0.220064,0.250446,0.257219,0.284098,0.025317,0.018124,0.021163,0.020543,0.189581,1179.678055,5188381.0
Real Estate,104465.2,15659.06,5.593333,5.503311,137.834427,29.359375,28.130208,23.854167,26.088542,24.385417,28.411458,27.583333,25.307292,30.005208,25.447917,29.046875,27.729167,23.791667,26.052083,30.5,28.369792,26.963542,28.369792,26.234375,29.583333,25.036458,27.677083,19.583333,11.760417,22.885417,18.307292,0.265417,0.272031,0.22526,0.246563,0.27724,0.265417,0.01765,0.012676,0.015073,0.013605,0.354053,137.834535,120124.3
Telecommunication Services,58807.99,15132.83,6.119048,6.012048,63.160667,35.886792,30.726415,37.075472,40.018868,39.443396,45.084906,26.339623,36.95283,35.575472,28.792453,39.858491,38.849057,41.264151,40.0,35.188679,39.858491,41.311321,39.858491,39.603774,28.849057,79.603774,34.424528,36.584906,111.216981,43.622642,29.849057,0.268774,0.250849,0.216981,0.215472,0.254811,0.272547,0.019734,0.01589,0.019334,0.017905,0.291852,67.652725,73940.82


In [317]:
fig = px.bar(df,                                 
             x = 'GICS Sector',                 
             y = 'Carbon Emission  tCO2e/mmUSD',
             hover_data = ""
             )
fig.show()

In [311]:
x1 = df['Scope 1 tCO2e']
y1 = df['Carbon Emission  tCO2e/mmUSD']
X = sm.add_constant(x1)
model_sm = sm.OLS(y1,X)
results = model_sm.fit()
results.summary()

0,1,2,3
Dep. Variable:,Carbon Emission tCO2e/mmUSD,R-squared:,0.316
Model:,OLS,Adj. R-squared:,0.316
Method:,Least Squares,F-statistic:,1842.0
Date:,"Tue, 11 Feb 2020",Prob (F-statistic):,0.0
Time:,14:56:40,Log-Likelihood:,-35448.0
No. Observations:,3987,AIC:,70900.0
Df Residuals:,3985,BIC:,70910.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,316.7207,28.328,11.181,0.000,261.182,372.259
Scope 1 tCO2e,7.854e-05,1.83e-06,42.914,0.000,7.5e-05,8.21e-05

0,1,2,3
Omnibus:,6593.133,Durbin-Watson:,1.955
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8265145.709
Skew:,10.74,Prob(JB):,0.0
Kurtosis:,225.016,Cond. No.,15700000.0
