# Sprint 6 - Predicting future environmental intensity

In this notebook, we will predict future environmental intensity for all the companies in the 'Excel data'. 

First, we will create the following columns:

1) Industry Indicator
- 1 if above the industry average in 2020
- 0 if at industry average in 2020
- (-1) if below the industry average in 2020
2) Environmental Intensity Growth : ((Environmental Intensity in Current Year / Environmental Intensity Last Year) - 1) * 100

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('/Users/maralinetorres/Documents/GitHub/Predicting-Environmental-and-Social-Actions/Datasets/Final-Sample-External-with-ISINs.csv')
column_list = []
for column in df.columns:
    column_list.append(column.replace(' ', ''))
df.columns = column_list
print(f'The dataset has {df.shape[0]} rows and {df.shape[1]} columns')
df.head(3)

The dataset has 14515 rows and 34 columns


Unnamed: 0,ISIN,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),EnvironmentalIntensity(OpInc),TotalEnvironmentalCost,WorkingCapacity,FishProductionCapacity,...,SDG6,SDG12.2,SDG14.1,SDG14.2,SDG14.3,SDG14.c,SDG15.1,SDG15.2,SDG15.5,%Imputed
0,GB00BMX64W89,2019,Saga plc,United Kingdom,Activities auxiliary to financial intermediati...,-2.89%,-13.03%,-31842309,-31150754,-7184,...,-170776,-1059,-5,-1,-3585,-6,71,71,-1297,1%
1,MYL1818OO003,2019,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-1.68%,-3.47%,-1968379,-1924910,-451,...,-11502,-168,-1,-1,-222,-2,10,10,-79,4%
2,GB0031638363,2019,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.53%,-9.49%,-60599272,-59281663,-13774,...,-324960,-3804,-17,-4,-6861,-20,254,254,-2470,1%


In [3]:
df = df.iloc[:,1:6]
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales)
0,2019,Saga plc,United Kingdom,Activities auxiliary to financial intermediati...,-2.89%
1,2019,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-1.68%
2,2019,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.53%
3,2019,JSE LIMITED,South Africa,Activities auxiliary to financial intermediati...,-1.46%
4,2019,BUREAU VERITAS SA,France,Activities auxiliary to financial intermediati...,-0.70%


In [4]:
def percent_to_float(s):
    return float(s.strip('%')) / 100.0

replace_dict = {'(':'',')':'', ' ' : '', ',' : ''}
def paranthesis_to_minus(value):
    for i, j in replace_dict.items():
        value = value.replace(i, j)
    value = int(f'-{value}')
    return value

df['Env_intensity'] = df['EnvironmentalIntensity(Sales)'].apply(percent_to_float)

In [5]:
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity
0,2019,Saga plc,United Kingdom,Activities auxiliary to financial intermediati...,-2.89%,-0.0289
1,2019,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-1.68%,-0.0168
2,2019,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.53%,-0.0153
3,2019,JSE LIMITED,South Africa,Activities auxiliary to financial intermediati...,-1.46%,-0.0146
4,2019,BUREAU VERITAS SA,France,Activities auxiliary to financial intermediati...,-0.70%,-0.007


### Creating industry indicator

In [6]:
industry_avg = df.groupby('Industry(Exiobase)')[['Env_intensity']].mean().reset_index()
df['industry_avg'] = df['Env_intensity'].groupby(df['Industry(Exiobase)']).transform('mean')

In [7]:
def create_ind(df):
    if(df['Env_intensity'] > df['industry_avg']):
        return 1
    elif (df['Env_intensity'] == df['industry_avg']):
        return 0
    elif (df['Env_intensity'] < df['industry_avg']):
        return -1

df['Industry_indicator'] = df.apply(create_ind, axis = 1)
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity,industry_avg,Industry_indicator
0,2019,Saga plc,United Kingdom,Activities auxiliary to financial intermediati...,-2.89%,-0.0289,-0.004417,-1
1,2019,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-1.68%,-0.0168,-0.004417,-1
2,2019,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.53%,-0.0153,-0.004417,-1
3,2019,JSE LIMITED,South Africa,Activities auxiliary to financial intermediati...,-1.46%,-0.0146,-0.004417,-1
4,2019,BUREAU VERITAS SA,France,Activities auxiliary to financial intermediati...,-0.70%,-0.007,-0.004417,-1


In [8]:
def create_ind_year(df):
    if(df['Env_intensity'] > df['industry_avg_year']):
        return 1
    elif (df['Env_intensity'] == df['industry_avg_year']):
        return 0
    elif (df['Env_intensity'] < df['industry_avg_year']):
        return -1

df['industry_avg_year'] = df.groupby(['Industry(Exiobase)','Year']).transform('mean')[['Env_intensity']]

df['Industry_indicator_year'] = df.apply(create_ind_year, axis = 1)
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity,industry_avg,Industry_indicator,industry_avg_year,Industry_indicator_year
0,2019,Saga plc,United Kingdom,Activities auxiliary to financial intermediati...,-2.89%,-0.0289,-0.004417,-1,0.002943,-1
1,2019,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-1.68%,-0.0168,-0.004417,-1,0.002943,-1
2,2019,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.53%,-0.0153,-0.004417,-1,0.002943,-1
3,2019,JSE LIMITED,South Africa,Activities auxiliary to financial intermediati...,-1.46%,-0.0146,-0.004417,-1,0.002943,-1
4,2019,BUREAU VERITAS SA,France,Activities auxiliary to financial intermediati...,-0.70%,-0.007,-0.004417,-1,0.002943,-1


In [9]:
df.loc[(df['Industry(Exiobase)'] == 'Activities auxiliary to financial intermediation (67)'),:].sort_values(by='Year')

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity,industry_avg,Industry_indicator,industry_avg_year,Industry_indicator_year
5153,2016,BALOISE HOLDING AG,Switzerland,Activities auxiliary to financial intermediati...,-0.05%,-0.0005,-0.004417,1,-0.007880,1
5123,2016,SAGA PLC,United Kingdom,Activities auxiliary to financial intermediati...,-2.82%,-0.0282,-0.004417,-1,-0.007880,-1
5124,2016,BURSA MALAYSIA BHD,Malaysia,Activities auxiliary to financial intermediati...,-2.35%,-0.0235,-0.004417,-1,-0.007880,-1
5125,2016,INTERTEK GROUP PLC,United Kingdom,Activities auxiliary to financial intermediati...,-1.96%,-0.0196,-0.004417,-1,-0.007880,-1
5126,2016,APPLUS SERVICES SA,Spain,Activities auxiliary to financial intermediati...,-1.71%,-0.0171,-0.004417,-1,-0.007880,-1
...,...,...,...,...,...,...,...,...,...,...
18,2019,AXA SA,France,Activities auxiliary to financial intermediati...,-0.02%,-0.0002,-0.004417,1,0.002943,-1
19,2019,DEUTSCHE BOERSE AG,Germany,Activities auxiliary to financial intermediati...,1.88%,0.0188,-0.004417,1,0.002943,1
20,2019,CAPITA PLC,United Kingdom,Activities auxiliary to financial intermediati...,15.96%,0.1596,-0.004417,1,0.002943,1
10,2019,EXPERIAN PLC,United Kingdom,Activities auxiliary to financial intermediati...,-0.20%,-0.0020,-0.004417,1,0.002943,-1


### Creating Environmental growth

Environmental Intensity Growth : ((Environmental Intensity in Current Year / Environmental Intensity Last Year) - 1) * 100

In [10]:
df = df.sort_values(by=['CompanyName','Year'], ascending = True)
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity,industry_avg,Industry_indicator,industry_avg_year,Industry_indicator_year
6369,2016,1&1 DRILLISCH AG,Germany,Post and telecommunications (64),-0.07%,-0.0007,-0.018382,1,-0.01164,1
13777,2010,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.12%,-0.0012,-0.020072,1,-0.006402,1
12690,2011,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.16%,-0.0016,-0.020072,1,-0.009838,1
11504,2012,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.15%,-0.0015,-0.020072,1,-0.024437,1
13501,2010,3M COMPANY,United States,Activities of membership organisation n.e.c. ...,-7.90%,-0.079,-0.117561,1,-0.084583,1


In [11]:
df['Environmental_Growth'] = df.groupby(['CompanyName'])['Env_intensity'].apply(lambda x: x.pct_change()).to_numpy() * 100
df.head()

Unnamed: 0,Year,CompanyName,Country,Industry(Exiobase),EnvironmentalIntensity(Sales),Env_intensity,industry_avg,Industry_indicator,industry_avg_year,Industry_indicator_year,Environmental_Growth
6369,2016,1&1 DRILLISCH AG,Germany,Post and telecommunications (64),-0.07%,-0.0007,-0.018382,1,-0.01164,1,
13777,2010,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.12%,-0.0012,-0.020072,1,-0.006402,1,
12690,2011,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.16%,-0.0016,-0.020072,1,-0.009838,1,33.333333
11504,2012,3I GROUP PLC,United Kingdom,"Financial intermediation, except insurance an...",-0.15%,-0.0015,-0.020072,1,-0.024437,1,-6.25
13501,2010,3M COMPANY,United States,Activities of membership organisation n.e.c. ...,-7.90%,-0.079,-0.117561,1,-0.084583,1,


In [12]:
df.to_csv('Environmental_Impact_dataset.csv', index=False)