In [None]:
# standard libs
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import json
import zipfile
from sklearn.cluster import KMeans

# plotting libs
import seaborn as sns

# Corporation Participation as KPI

1. Industry Breakdown (for participating Corporations)
Over half of corporations that participted in the CDP survey in 2020 belong to the manufacturing industry and the services industry. Considering the abundance of consumer products- these corporations should be sampled frequently for climate change awareness. 

In [None]:
##import 2020 data
corporate_2020_climate_change=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2020_Full_Climate_Change_Dataset.csv')
corporate_2019_climate_change=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2019_Full_Climate_Change_Dataset.csv')
corporate_2020_water=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2020_Full_Water_Security_Dataset.csv')
city_2020_responses=pd.read_csv('../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv')

# ##import Disclosure data
corp_2020_disclosure=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2020_Corporates_Disclosing_to_CDP_Climate_Change.csv')
corp_2019_disclosure=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2019_Corporates_Disclosing_to_CDP_Climate_Change.csv')
corp_2018_disclosure=pd.read_csv('../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2018_Corporates_Disclosing_to_CDP_Climate_Change.csv')


In [None]:
##various sectors of the questionaire responders 
corp_2020_disclosure['primary_industry'].value_counts().plot.pie(pctdistance=0.7,autopct='%.2f%%',figsize=(6,6),labels=None)
plt.title("Corporations Disclosing Climate Change Surveys by Industry",fontsize=17,ha='left')
plt.legend(labels=corp_2020_disclosure['primary_industry'].value_counts().index, loc="best",bbox_to_anchor=(1, 0.25, 0.5, 0.5))
plt.show()

2.Corporation awareness of Climate Change Risks+Opportunities over time: What % of corporations recognize risks and opporunities (in 2019 vs. 2020)? 

In [None]:
corp_2019_risks=corporate_2019_climate_change[(corporate_2019_climate_change['question_number']=="C2.3")&(corporate_2019_climate_change['row_number']==0)]
corp_2019_opp=corporate_2019_climate_change[(corporate_2019_climate_change['question_number']=="C2.4")&(corporate_2019_climate_change['row_number']==0)]


fig = plt.figure(figsize=(15,9))
plt.suptitle("Corporate Sentiment to Climate Change 2019",fontsize=18)
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)


corp_2019_risks['response_value'].value_counts().plot.pie(pctdistance=0.7,autopct='%.2f%%', labels=corp_2019_risks.index, ax=ax1)
ax1.title.set_text("Have you identified any inherent climate-related risks...?")
ax1.legend(labels=corp_2019_risks['response_value'].value_counts().index, loc="best",bbox_to_anchor=(1, 0.25, 0.5, 0.5))


corp_2019_opp['response_value'].value_counts().plot.pie(pctdistance=0.7,autopct='%.2f%%', labels=corp_2019_opp.index, ax=ax2)
ax2.title.set_text("Have you identified any climate-related opportunities...?")              
ax2.legend(labels=corp_2019_opp['response_value'].value_counts().index, loc="best",bbox_to_anchor=(1, 0.25, 0.5, 0.5))

plt.show()

In [None]:
corp_2020_risks=corporate_2020_climate_change[(corporate_2020_climate_change['question_number']=="C2.3")&(corporate_2020_climate_change['row_number']==0)]
corp_2020_opp=corporate_2020_climate_change[(corporate_2020_climate_change['question_number']=="C2.4")&(corporate_2020_climate_change['row_number']==0)]


fig = plt.figure(figsize=(15,9))
plt.suptitle("Corporate Sentiment to Climate Change 2020",fontsize=18)
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)


corp_2020_risks['response_value'].value_counts().plot.pie(pctdistance=0.7,autopct='%.2f%%', labels=corp_2019_risks.index, ax=ax1)
ax1.title.set_text("Have you identified any inherent climate-related risks...?")
ax1.legend(labels=corp_2019_risks['response_value'].value_counts().index, loc="best",bbox_to_anchor=(1, 0.25, 0.5, 0.5))


corp_2020_opp['response_value'].value_counts().plot.pie(pctdistance=0.7,autopct='%.2f%%', labels=corp_2019_opp.index, ax=ax2)
ax2.title.set_text("Have you identified any climate-related opportunities...?")              
ax2.legend(labels=corp_2019_opp['response_value'].value_counts().index, loc="best",bbox_to_anchor=(1, 0.25, 0.5, 0.5))

plt.show()

3. Participation: How many corporations participated in the survey each year? ****

In [None]:
##basic plots to visualize participation rates- count of increased corp participants YoY and count of corp dropouts YoY 
from functools import reduce


corp_2020= corp_2020_disclosure[['survey_year','account_number']]
corp_2019= corp_2019_disclosure[['survey_year','account_number']]
corp_2018= corp_2018_disclosure[['survey_year','account_number']]

corp_participants = pd.merge(pd.merge(corp_2020,corp_2019, how='left',on='account_number'),corp_2018, how='left',on='account_number')
corp_participants=corp_participants.rename(columns={"survey_year_x": "2020", "survey_year_y": "2019", "survey_year": "2018"})
corp_dropouts = pd.merge(pd.merge(corp_2018,corp_2019, how='left',on='account_number'),corp_2020, how='left',on='account_number')
corp_dropouts=corp_dropouts.rename(columns={"survey_year_x": "2018", "survey_year_y": "2019", "survey_year": "2020"})

dropout_rates= corp_dropouts.groupby(['2018'])['2018','2019','2020'].count()
participation_rates=corp_participants.groupby(['2020'])['2018','2019','2020'].count()

print(participation_rates)

4. Drop out Rate: How many corporations have dropped out from 2018? 

In [None]:
print(dropout_rates)

**Regression Analysis **

How have corporations responded to the question "Did you have emissions reduction initiatives that were active within the reporting year? Note that this can include those in the planning and/or implementation phases." 

First contributing factors I thought to look for are their estimated cost savings for both 2019 and 2020. 

In [None]:
##log regressions
corp_active_2020=corporate_2020_climate_change[(corporate_2020_climate_change['question_number']=="C4.3")&(corporate_2020_climate_change['row_number']==0)]
corp_active_2020=corp_active_2020[['account_number','response_value']].dropna()
corp_active_2020['response_value']=np.where(corp_active_2020['response_value']=="Yes",1,0)

corp_2019est_savings=corporate_2019_climate_change[corporate_2019_climate_change['question_number']=="C4.3a"]
corp_2019est=corp_2019est_savings[['account_number','response_value']]

corp_2020est_savings=corporate_2020_climate_change[corporate_2020_climate_change['question_number']=="C4.3a"]
corp_2020est=corp_2020est_savings[['account_number','response_value']]
corp_est_savings= pd.merge(corp_2020est, corp_2019est, how="left", on="account_number" ).fillna(0)
corp_est_savings=corp_est_savings.rename(columns={"response_value_x": "2020_est_savings", "response_value_y": "2019_est_savings"})
corp_est_savings['2020_est_savings']=corp_est_savings['2020_est_savings'].astype(float)
corp_est_savings['2019_est_savings']=corp_est_savings['2019_est_savings'].astype(float)

df= pd.merge(corp_active_2020, corp_est_savings, how="left", on="account_number" ).fillna(0)
# df= df.groupby['account_number','response_value]['2020_est_savings','2019_est_savings'].sum()

import statsmodels.api as sm

X=df[['2020_est_savings','2019_est_savings']]
y=df['response_value']


# Note the difference in argument order
# Note the difference in argument order
est = sm.OLS(y, X).fit()
predictions = est.predict(X) # make the predictions by the model

# Print out the statistics
est.summary()


Using only these two varibles produces significance but isn't particularly helpful. It may be useful to add more features to the model. We will add the industry of coporations, whether they have identified risks and opportunities regarding climate change and whether or not they participated in the survey in order to estimate whether these contribute to the corporation's willingness to participate.

In [None]:
## collecting intention data and merging to disclosure details to find features that would indicate that a corporation is willing to set targets 

corp_active_stats=corporate_2020_climate_change[(corporate_2020_climate_change['question_number']=="C4.3")&(corporate_2020_climate_change['row_number']==0)]
corp_active= pd.merge(corp_active_stats, corp_2020_disclosure, how="left", on="account_number" )
corp_participants_acct=corp_participants.groupby(['account_number'])['2018','2019','2020'].count()
corp_active= pd.merge(corp_active, corp_participants_acct, how="left", on="account_number" )
corp_active=corp_active[['account_number','2018','2019','2020']]
# corp_active= pd.get_dummies(corp_active, columns=['primary_industry'])

corp_2020_risk_values=corp_2020_risks[['account_number','response_value']].dropna()
corp_2020_opp_values=corp_2020_opp[['account_number','response_value']].dropna()
corp_2020_risk_values=corp_2020_risk_values.rename(columns={"response_value": "Corp has ID'd risks"})
corp_2020_opp_values=corp_2020_opp_values.rename(columns={"response_value": "Corp has ID'd opportunities"})
corp_2020_risk_values=pd.get_dummies(corp_2020_risk_values).dropna()
corp_2020_opp_values=pd.get_dummies(corp_2020_opp_values).dropna()

corp_responses=pd.merge(corp_2020_risk_values, corp_2020_opp_values, how="left", on="account_number" )

df4= pd.merge(corp_active_2020, corp_responses,how="left", on="account_number").dropna()
df5= pd.merge(df4, corp_active, how="left", on="account_number" )

X=df5.iloc[:,2:22]
y=df5.iloc[:,[1]]


# Note the difference in argument order
# Note the difference in argument order
est = sm.OLS(y, X).fit()
predictions = est.predict(X) # make the predictions by the model

# Print out the statistics
est.summary()


This model implies that there are too many issues with mutlicolinearity. Trying another model given this set of data would not prove very helpful. 

Some future steps we could take would be to try some of the other questions from the survey that we could use to better understand what compells corporations to set climate change risks. 

We should also incorporate city data to better understand the opportunity for corporations to work within a city's emission target. Moreover, we could utilize corporation initatives to employ workers within a city for climate change/water work. 