In [None]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

import  seaborn as sns
import matplotlib.pyplot as plt
import plotly_express as px

from nltk.corpus import stopwords
stop = stopwords.words('english') + stopwords.words('spanish') 

PATH = '../input/cdp-unlocking-climate-solutions/Cities/'
PATH2 = '../input/cdp-unlocking-climate-solutions/Corporations/'

This notebook provides an idea of the corporate responses to the Climate Change and Water Security Questionnares in the form of visualizations. These visualizations attemt present the most significant or noteworthy attributes of the answers to the selected questions.

The 2020 Questionnaires seems to contain data only from organizations headquartered in USA and Canada. As such, multiple location based visualiations presented show only these two countries. Wherever data from operations conducted or facilities owned by these organizations in other parts of the world is available, the visualizations will reflect it. I have tried to make the best use of the space but if any of the visualizations seem too small to read the text labels, I request you to zoom in. Thanks, and enjoy!

In [None]:
# disclosing
cd_cc2018 = pd.read_csv(PATH2 + 'Corporations Disclosing/Climate Change/2018_Corporates_Disclosing_to_CDP_Climate_Change.csv')
cd_cc2019 = pd.read_csv(PATH2 + 'Corporations Disclosing/Climate Change/2019_Corporates_Disclosing_to_CDP_Climate_Change.csv')
cd_cc2020 = pd.read_csv(PATH2 + 'Corporations Disclosing/Climate Change/2020_Corporates_Disclosing_to_CDP_Climate_Change.csv')

cd_cc = pd.concat([cd_cc2020, cd_cc2019, cd_cc2018])

# responses
cr_cc2018 = pd.read_csv(PATH2 + 'Corporations Responses/Climate Change/2018_Full_Climate_Change_Dataset.csv')
cr_cc2019 = pd.read_csv(PATH2 + 'Corporations Responses/Climate Change/2019_Full_Climate_Change_Dataset.csv')
cr_cc2020 = pd.read_csv(PATH2 + 'Corporations Responses/Climate Change/2020_Full_Climate_Change_Dataset.csv')

cr_cc = pd.concat([cr_cc2020, cr_cc2019, cr_cc2018])

# cd_cc.columns, cr_cc.columns

In [None]:
climate = pd.merge(cd_cc, cr_cc, 'inner', on=['survey_year', 'account_number', 'organization', 'response_received_date'])

# climate.groupby('survey_year')['account_number'].nunique()

In [None]:
# disclosing
cd_ws2018 = pd.read_csv(PATH2 + 'Corporations Disclosing/Water Security/2018_Corporates_Disclosing_to_CDP_Water_Security.csv')
cd_ws2019 = pd.read_csv(PATH2 + 'Corporations Disclosing/Water Security/2019_Corporates_Disclosing_to_CDP_Water_Security.csv')
cd_ws2020 = pd.read_csv(PATH2 + 'Corporations Disclosing/Water Security/2020_Corporates_Disclosing_to_CDP_Water_Security.csv')

cd_ws = pd.concat([cd_ws2020, cd_ws2019, cd_ws2018])

# responses
cr_ws2018 = pd.read_csv(PATH2 + 'Corporations Responses/Water Security/2018_Full_Water_Security_Dataset.csv')
cr_ws2019 = pd.read_csv(PATH2 + 'Corporations Responses/Water Security/2019_Full_Water_Security_Dataset.csv')
cr_ws2020 = pd.read_csv(PATH2 + 'Corporations Responses/Water Security/2020_Full_Water_Security_Dataset.csv')

cr_ws = pd.concat([cr_ws2020, cr_ws2019, cr_ws2018])

# cd_ws.columns, cr_ws.columns

In [None]:
water = pd.merge(cd_ws, cr_ws, 'inner', on=['survey_year', 'account_number', 'organization', 'response_received_date'])

# water.groupby('survey_year')['account_number'].nunique()

In [None]:
def get_climate(q, by='row_number', year=2020, cols=['organization']):
    val = climate[climate['question_number']==q]['question_unique_reference']
    sh = val.shape[0]
    if sh > 0:
        print(q, val.iloc[0], f'({sh})')
    return climate[(climate['survey_year'] == year) & (climate['question_number']==q)][cols + [by, 'column_name', 'response_value']].dropna(subset=['response_value'])

In [None]:
def get_water(q, by='row_number', year=2020, cols=['organization']):
    val = water[water['question_number']==q]['question_unique_reference']
    val2 = water[(water['survey_year'] == year) & (water['question_number']==q)][cols + [by, 'column_name', 'response_value']].dropna(subset=['response_value'])
    sh = val2.shape[0]
    if sh > 0:
        print(q, val.iloc[0], f'({sh})')
    return val2

# Climate Change

The 2020 Climate Change questionnaire is organized into the following sections. From these we again extract a list of relevant questions that can facilitate the understanding of what would be good KPIs

1. Governance
1. Risks and opportunities
1. Business strategy
1. Targets and performance
1. Emissions methodology
1. Emissions data
1. Emissions breakdown
1. Energy
1. Additional metrics
1. Verification
1. Carbon pricing
1. Engagement

Below, we have picked a set of key questions from these sections in the questionnaire. We believe these might be useful in building the KPIs for corporations with respect to their influence on climate change and the potential risk certain climate hazards pose to their operations.

In [None]:
# no data (2020): ['C3.1b', 'C3.1d', 'C4.2a', 'C4.3d', 'C5.1', 'C7.9a',
#           'C9.1', 'C11.1d', 'C11.2a', 'C11.3a', 'C12.3c',
#           'C12.3e', 'C12.3f', 'C-FI', 'SC0.2']

qs_cc = ['C2.2', 'C2.2a', 'C2.3a', 'C2.4a', 'C4.1a', 'C4.1b', 'C4.2b',
         'C4.3a', 'C4.3b', 'C4.5a', 'C6.1', 'C6.3', 'C6.5',  'C6.10', 
         'C7.1a', 'C7.2', 'C7.3a', 'C7.3b', 'C7.3c', 'C7.5', 'C7.6a', 
         'C7.6c','C7.6b', 'C8.1', 'C8.2', 'C8.2a', 'C8.2b', 'C8.2c',
         'C8.2d', 'C8.2e', 'C12.1a', 'C12.1b', 'C12.3a']

qs_cc =sorted(climate[climate['survey_year']==2019]['question_number'].unique())

for q in qs_cc:
    get_climate(q, year=2019)

In [None]:
cc2_2a = get_climate('C2.2a', by='row_name')
cc2_2a.row_name.value_counts().plot(kind='barh', title='Risk areas considered by corporated (2020)');
# cc2_2a.pivot(index=['organization', 'row_name'], columns='column_name')

In [None]:
cc2_3a = get_climate('C2.3a')
piv2_3a = cc2_3a.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
piv2_3a[['C2.3a_C3Risk type & Primary climate-related risk driver', 'C2.3a_C9Magnitude of impact']] \
.groupby('C2.3a_C3Risk type & Primary climate-related risk driver')['C2.3a_C9Magnitude of impact'] \
.value_counts().sort_values(ascending=False)[:60].unstack() \
.plot(kind='barh', stacked=True, title='Types of risks with their magnitude of impact on businesses (2020)');

In [None]:
piv2_3a[['C2.3a_C4Primary potential financial impact', 'C2.3a_C9Magnitude of impact']] \
.groupby('C2.3a_C4Primary potential financial impact')['C2.3a_C9Magnitude of impact'] \
.value_counts().sort_values(ascending=False)[:48].unstack() \
.plot(kind='barh', stacked=True, title='Kind of financial impacts on business and their magnitudes (2020)');

In [None]:
cc2_4a = get_climate('C2.4a')
ccpiv2_4a = cc2_4a.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']

ccpiv2_4a[['C2.4a_C3Opportunity type','C2.4a_C4Primary climate-related opportunity driver', 'C2.4a_C7Time horizon']] \
.groupby(['C2.4a_C3Opportunity type', 'C2.4a_C4Primary climate-related opportunity driver'])['C2.4a_C7Time horizon'].value_counts() \
.sort_values(ascending=False)[:60].unstack().plot(kind='barh', figsize=(10, 8), stacked=True, title='Opportunity type and drivers, and time horizons for their potential impacts on business (2020)');

In [None]:
ccpiv2_4a[['C2.4a_C5Primary potential financial impact', 'C2.4a_C9Magnitude of impact']] \
.groupby('C2.4a_C5Primary potential financial impact')['C2.4a_C9Magnitude of impact'] \
.value_counts().sort_values(ascending=False)[:33].unstack() \
.plot(kind='barh', stacked=True, title='Financial impacts of identified opportunities and the magnitudes (2020)');

In [None]:
cc4_1a = get_climate('C4.1a')
ccpiv4_1a = cc4_1a.pivot(index=['organization', 'row_number'], columns='column_name')['response_value'] \
[['C4.1a_C5Base year', 'C4.1a_C6Covered emissions in base year (metric tons CO2e)', 'C4.1a_C8Target year', 'C4.1a_C9Targeted reduction from base year (%)', 'C4.1a_C12% of target achieved [auto-calculated]']]

for col in ccpiv4_1a.columns:
    ccpiv4_1a[col] = ccpiv4_1a[col].astype('float64', copy=False )

ccpiv4_1a['Target'] = ccpiv4_1a['C4.1a_C6Covered emissions in base year (metric tons CO2e)'] - (ccpiv4_1a['C4.1a_C6Covered emissions in base year (metric tons CO2e)'] * ccpiv4_1a['C4.1a_C9Targeted reduction from base year (%)']/100)
ccpiv4_1a['Reporting'] = ccpiv4_1a['C4.1a_C6Covered emissions in base year (metric tons CO2e)'] - ((ccpiv4_1a['C4.1a_C12% of target achieved [auto-calculated]']/100) * (ccpiv4_1a['C4.1a_C6Covered emissions in base year (metric tons CO2e)'] - ccpiv4_1a['Target']))
ccpiv4_1a.drop(['C4.1a_C9Targeted reduction from base year (%)', 'C4.1a_C12% of target achieved [auto-calculated]'], axis=1, inplace=True)

In [None]:
# pd.concat([ccpiv4_1a.groupby(['C4.1a_C5Base year'])['C4.1a_C6Covered emissions in base year (metric tons CO2e)'].sum(), ccpiv4_1a.groupby(['C4.1a_C8Target year'])['Target'].sum()])

In [None]:
# ccpiv4_1a.groupby(['C4.1a_C8Target year'])['Target'].sum()

In [None]:
cc6_1 = get_climate('C6.1', by='row_name')
ccpiv6_1 = cc6_1.pivot(index=['organization', 'row_name'], columns='column_name')['response_value']
ccpiv6_1['C6.1_C1Gross global Scope 1 emissions (metric tons CO2e)'] = ccpiv6_1['C6.1_C1Gross global Scope 1 emissions (metric tons CO2e)'].astype('float32')
ccpiv6_1['C6.1_C1Gross global Scope 1 emissions (metric tons CO2e)'].sort_values(ascending=False)[:120].unstack() \
.plot(kind='barh', figsize=(8, 14), logx=True, width=0.9, title='Organization yearly Scope 1 emissions in metric tonnes (reported 2020)');

In [None]:
cc6_3 = get_climate("C6.3", by='row_name')
ccpiv6_3 = cc6_3.pivot(index=['organization', 'row_name'], columns='column_name')['response_value']
ccpiv6_3['C6.3_C1Scope 2, location-based'] = ccpiv6_3['C6.3_C1Scope 2, location-based'].astype('float32')
ccpiv6_3['C6.3_C2Scope 2, market-based (if applicable)'] = ccpiv6_3['C6.3_C2Scope 2, market-based (if applicable)'].astype('float32')
ccpiv6_3['sum'] = ccpiv6_3['C6.3_C1Scope 2, location-based'] + ccpiv6_3['C6.3_C2Scope 2, market-based (if applicable)']

ccpiv6_3.sort_values(by='sum', ascending=False)[:80].drop('sum', axis=1) \
.plot(kind='barh', figsize=(10, 14), logx=True, width=0.9, title='Organization location and market based Scope 2 emissions in metric tonnes (reported 2020)');

In [None]:
cc6_5 = get_climate("C6.5", by='row_name')
ccpiv6_5 = cc6_5.pivot(index=['organization', 'row_name'], columns='column_name')['response_value']['C6.5_C2Metric tonnes CO2e']
ccpiv6_5 = ccpiv6_5.astype('float32')

ccpiv6_5.groupby('row_name').sum().sort_values().plot(kind='barh', title='Global Scope 3 emissions by activity/source (2020)');

In [None]:
ccpiv6_5 = ccpiv6_5.unstack()
ccpiv6_5['sum'] = ccpiv6_5.sum(axis=1)
ccpiv6_5.sort_values(by='sum', ascending=False)[:60].drop('sum', axis=1) \
.plot(kind='barh', figsize=(12, 12), width=0.91, stacked=True, cmap='tab20', title='Organization Scope 3 emissions by activity/source (2020)').legend(bbox_to_anchor=(1, 0.6));

In [None]:
cc6_10 = get_climate("C6.10").pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
cc6_10 = cc6_10[cc6_10['C6.10_C3Metric denominator'] == 'unit total revenue']['C6.10_C1Intensity figure'].astype('float32')
cc6_10.groupby(['organization']).sum().sort_values(ascending=False)[:60] \
.plot(kind='barh', figsize=(10, 12), logx=True, title='Top 60 organizations by intensity of combined global Scope 1 and 2 emissions per unit total revenue (2020)');

In [None]:
cc7_1a = get_climate('C7.1a')
ccpiv7_1a = cc7_1a.pivot(index=['organization', 'row_number'], columns='column_name')['response_value'].reset_index().drop(['row_number', 'C7.1a_C3GWP Reference'], axis=1)
ccpiv7_1a['C7.1a_C2Scope 1 emissions (metric tons of CO2e)'] = ccpiv7_1a['C7.1a_C2Scope 1 emissions (metric tons of CO2e)'].astype('float32')
ccpiv7_1a = ccpiv7_1a.pivot_table(index=['organization'], columns='C7.1a_C1Greenhouse gas', aggfunc=sum)['C7.1a_C2Scope 1 emissions (metric tons of CO2e)'] \
[['CO2', 'CO2: Refrigerants', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NF3']]

ccpiv7_1a['sum'] = ccpiv7_1a.sum(axis=1)
ccpiv7_1a.sort_values('sum', ascending=False)[:60].drop('sum', axis=1) \
.plot(kind='barh', stacked=True, figsize=(10,10), title='Breakdown of Scope 1 emissions by Greenhouse Gas of top emitters (2020)');

In [None]:
cc7_2 = get_climate('C7.2')
ccpiv7_2 = cc7_2.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']

ccpiv7_2['C7.2_C2Scope 1 emissions (metric tons CO2e)'] = ccpiv7_2['C7.2_C2Scope 1 emissions (metric tons CO2e)'].astype('float32')
ccpiv7_2.groupby(['C7.2_C1Country/Region'])['C7.2_C2Scope 1 emissions (metric tons CO2e)'].sum().sort_values(ascending=False)[:30] \
.plot(kind='barh', figsize=(8,8), logx=True, title='Total Scope 1 emissions by country/region (2020)').legend(bbox_to_anchor=(0.2, 1));

In [None]:
ccpiv7_2.groupby(['C7.2_C1Country/Region'])['C7.2_C2Scope 1 emissions (metric tons CO2e)'].mean() \
.sort_values(ascending=False)[:80].plot(kind='barh', figsize=(10,14), logx=True, title='Mean Scope 1 emissions by country/region (2020)');

In [None]:
cc7_5 = get_climate('C7.5')
ccpiv7_5 = cc7_5.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
ccpiv7_5['C7.5_C2Scope 2, location-based (metric tons CO2e)'] = ccpiv7_5['C7.5_C2Scope 2, location-based (metric tons CO2e)'].astype('float32')
ccpiv7_5['C7.5_C3Scope 2, market-based (metric tons CO2e)'] = ccpiv7_5['C7.5_C3Scope 2, market-based (metric tons CO2e)'].astype('float32')
ccpiv7_5['total CO2e'] = ccpiv7_5['C7.5_C2Scope 2, location-based (metric tons CO2e)'] + ccpiv7_5['C7.5_C3Scope 2, market-based (metric tons CO2e)']
ccpiv7_5.groupby(['C7.5_C1Country/Region'])['total CO2e'].sum().sort_values(ascending=False)[:75] \
.plot(kind='barh', logx=True, figsize=(10,12), title='Total Scope 2 emissions by region (2020)');

In [None]:
ccpiv7_5.groupby(['C7.5_C1Country/Region'])['total CO2e'].mean().sort_values(ascending=False)[:75] \
.plot(kind='barh', figsize=(10,12), logx=True, title='Mean Scope 2 emissions by region (2020)');

In [None]:
get_climate('C8.1')['response_value'].value_counts() \
.plot(kind='barh', title='Organizational spend on energy (2020)', figsize=(10, 6));

In [None]:
get_climate('C8.2', by='row_name').groupby('row_name')['response_value'].value_counts().unstack() \
.plot(kind='barh', title='Energy related activities undertaken by organizations (2020)', figsize=(10, 6));

In [None]:
cc8_2a = get_climate('C8.2a', by='row_name')
cc8_2a.pivot(index=['organization', 'row_name'], columns='column_name')['response_value'] \
[['C8.2a_C2MWh from renewable sources', 'C8.2a_C3MWh from non-renewable sources']] \
.astype('float32').groupby(['row_name']).sum() \
.plot(kind='barh', figsize=(8, 6), title='Global energy consumption by organizations by source (2020)');

In [None]:
get_climate('C8.2b', by='row_name').groupby('row_name')['response_value'].value_counts().unstack() \
.plot(kind='barh', title='Applications of fuel use by organization (2020)', figsize=(10, 6));

In [None]:
cc8_2c = get_climate('C8.2c')
ccpiv8_2c = cc8_2c.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
ccpiv8_2c['C8.2c_C3Total fuel MWh consumed by the organization'] = ccpiv8_2c['C8.2c_C3Total fuel MWh consumed by the organization'].astype('float64')

ccpiv8_2c.groupby(['organization', 'C8.2c_C1Fuels (excluding feedstocks)'])['C8.2c_C3Total fuel MWh consumed by the organization'].sum() \
.sort_values(ascending=False)[:120].unstack() \
.plot(kind='barh', stacked=True, figsize=(10,14), cmap='tab20', title='Breakdown of fuels consumption in MWh by organizations (2020)');

In [None]:
cc8_2d = get_climate('C8.2d', by='row_name')
ccpiv8_2d = cc8_2d.pivot(index=['organization', 'row_name'], columns='column_name')['response_value'].astype('float32') \
[['C8.2d_C1Total Gross generation (MWh)', 'C8.2d_C3Gross generation from renewable sources (MWh)']].groupby('row_name').sum()
ccpiv8_2d['Gross generation from non-renewable sources'] = ccpiv8_2d['C8.2d_C1Total Gross generation (MWh)'] - ccpiv8_2d['C8.2d_C3Gross generation from renewable sources (MWh)']

ccpiv8_2d.drop('C8.2d_C1Total Gross generation (MWh)', axis=1) \
.plot(kind='barh', title='Comparisions between generation of electricity, heat, steam, cooling from renewable and non-renewable sources (2020)', figsize=(10, 8)).legend(bbox_to_anchor=(1,0.1));

In [None]:
cc12_1a = get_climate('C12.1a')
cc12_1a.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']['C12.1a_C2Details of engagement'].value_counts()[:14] \
.plot(kind='barh', figsize=(8, 8), title='Climate related supplier engangement strategies (2020)');

In [None]:
cc12_1b = get_climate('C12.1b')
cc12_1b.pivot(index=['organization', 'row_number'], columns='column_name')['response_value']['C12.1b_C2Details of engagement'].value_counts()[:5]

In [None]:
cc12_3a = get_climate('C12.3a').pivot(index=['organization', 'row_number'], columns='column_name')['response_value'] \
.groupby(['C12.3a_C1Focus of legislation'])['C12.3a_C2Corporate position'].value_counts().unstack()
cc12_3a['sum'] = cc12_3a.sum(axis=1)

cc12_3a.sort_values('sum', ascending=False).drop('sum', axis=1)[:8] \
.plot(kind='barh', stacked=True, title='Engagement with policy makers by issues and position (2020)');

# Water Security

The Sections as per the 2020 Water Security Questionnaire for Corporations are as follows.

1. Current state
1. Business impacts
1. Procedures
1. Risks and opportunities
1. Facility-level water accounting
1. Governance
1. Business strategy
1. Targets
1. Verfication

Once more, we pick a set of most important questions which are relevant towards gaining insights into building KPIs for corporations with respect to their water security and risk.

In [None]:
# no data (2020): ['W1.2d', 'W1.4c', 'W1.4d', 'W2.1a', 'W2.2a', 'W2.2b',
#           'W4.1a', 'W4.3b', 'W6.1a', 'W6.2a', 'W6.2b', 'W6.3', 'W6.4a', 
#           'W6.5a', 'W6.6', 'W7.1', 'W7.2', 'W7.3b', 'W9.1a']

qs_ws = ['W1.2', 'W1.2b', 'W1.2h', 'W1.2i', 'W1.4a', 'W1.4b',   
         'W3.3a', 'W3.3b', 'W3.3c', 'W3.3d', 'W4.1b', 
         'W4.1c', 'W4.2', 'W4.2a', 'W4.3a', 'W5.1', 
         'W5.1a', 'W8.1', 'W8.1b']
# qs_ws = sorted(water[water['survey_year']==2019]['question_number'].unique())

for q in qs_ws:
    get_water(q, year=2020)

In [None]:
w1_2 = get_water('W1.2', by='row_name').pivot(index=['organization', 'row_name'], columns='column_name')['response_value']['W1.2_C1% of sites/facilities/operations']
w1_2.groupby('row_name').value_counts().unstack() \
.plot(kind='barh', stacked=True, figsize=(10, 8), title='Proportion of water related aspects that are regularly measured and monitored across organization operations (2020)').legend(bbox_to_anchor=(1,1));

In [None]:
w1_2b = get_water('W1.2b', by='row_name').pivot(index=['organization', 'row_name'], columns='column_name')['response_value']['W1.2b_C1Volume (megaliters/year)'].unstack().astype('float32')
w1_2b['sum'] = w1_2b.sum(axis=1)
w1_2b.sort_values(by='sum', ascending=False).drop('sum', axis=1)[:60] \
.plot(kind='barh', logx=True, figsize=(10, 14), width=0.9, title='Water volume consumed, discharged, withdrawn by organization (202)');

In [None]:
w1_2h = get_water('W1.2h', by='row_name').pivot(index=['organization', 'row_name'], columns='column_name')['response_value']['W1.2h_C2Volume (megaliters/year)'].astype('float32')
w1_2h.groupby(['organization', 'row_name']).sum().sort_values(ascending=False)[:75].unstack() \
.plot(kind='barh', logx=True, width=0.9, figsize=(10, 12), title='Volume of water withdrawn by organizations by source (2020)');

In [None]:
w1_2i = get_water('W1.2i', by='row_name').pivot(index=['organization', 'row_name'], columns='column_name')['response_value']['W1.2i_C2Volume (megaliters/year)'].astype('float32')
w1_2i.groupby(['organization', 'row_name']).sum().sort_values(ascending=False)[:75].unstack() \
.plot(kind='barh', logx=True, width=0.9, figsize=(10, 12), title='Volume of water discharged by organizations by destination (2020)');

In [None]:
w1_2j = get_water('W1.2j', by='row_name', cols=['country', 'organization'], year=2019).pivot(index=['country', 'organization', 'row_name'], columns='column_name')['response_value']
w1_2j.groupby('country')['W1.2j_C1% recycled and reused'].value_counts().unstack(-1) \
.plot(kind='barh', figsize=(8, 6), stacked=True, title='Water recycled by country (US and Canada 2019)');

In [None]:
get_water('W4.1b').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']['W4.1b_C1Total number of facilities exposed to water risk'] \
.astype('float32').plot(kind='hist', title='Number of facilities exposed to water risk for organizations (2020)', logy=True);

In [None]:
w4_1c = get_water('W4.1c').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
w4_1c['W4.1c_C2Number of facilities exposed to water risk'] = w4_1c['W4.1c_C2Number of facilities exposed to water risk'].astype('float32')
w4_1c.groupby(['W4.1c_C1Country/Area & River basin_G', 'W4.1c_C7% companyâ€™s total global revenue that could be affected'])['W4.1c_C2Number of facilities exposed to water risk'].sum() \
.sort_values(ascending=False)[:50].unstack() \
.plot(kind='barh', stacked=True, figsize=(14, 8), title='No. of facilities exposed to water risk by location and % impact to  organizationrevenue (2020)').legend(bbox_to_anchor=(1, 0.5));

In [None]:
w4_2 = get_water('W4.2').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
plt.subplots(figsize=(10, 8))
sns.heatmap(w4_2.groupby(['W4.2_C2Type of risk & Primary risk driver_G', 'W4.2_C2Type of risk & Primary risk driver'])['W4.2_C3Primary potential impact'] \
            .value_counts().sort_values(ascending=False)[:40].unstack().T, annot=True);

plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Type of risks and risk drivers affecting direct operations with their potential impacts (2020)')
plt.show()

In [None]:
plt.subplots(figsize=(10, 8))
sns.heatmap(w4_2.groupby(['W4.2_C2Type of risk & Primary risk driver_G', 'W4.2_C2Type of risk & Primary risk driver'])['W4.2_C13Primary response to risk'] \
            .value_counts().sort_values(ascending=False)[:40].unstack().T, annot=True);

plt.xticks(rotation=65, horizontalalignment='right')
plt.title('Type of risks and risk drivers affecting direct operations by primary response to the risks (2020)')
plt.show()

In [None]:
w4_2a = get_water('W4.2a').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
plt.subplots(figsize=(10, 8))
sns.heatmap(w4_2a.groupby(['W4.2a_C3Type of risk & Primary risk driver_G', 'W4.2a_C3Type of risk & Primary risk driver'])['W4.2a_C4Primary potential impact'] \
            .value_counts().sort_values(ascending=False)[:].unstack(), annot=True);

plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Type of risks and risk drivers beyond direct operations and their primary impacts (2020)')
plt.show()

In [None]:
w4_3a = get_water('W4.3a').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
w4_3a.groupby(['W4.3a_C1Type of opportunity', 'W4.3a_C2Primary water-related opportunity'])['W4.3a_C5Magnitude of potential financial impact'].value_counts() \
.sort_values()[51:].unstack().plot(kind='barh', stacked=True, figsize=(12, 7), title='Opportunities identified as potentially having financial impacts on organizations - by magnitude of impact (2020)');

In [None]:
pd.options.mode.chained_assignment = None

w = 'W5.1_C9Total water withdrawals at this facility (megaliters/year)'
d = 'W5.1_C17Total water discharges at this facility (megaliters/year)'
c = 'W5.1_C23Total water consumption at this facility (megaliters/year)'

w5_1 = get_water('W5.1').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
w5_1 = w5_1[[
    'W5.1_C3Country/Area & River basin_G', 'W5.1_C3Country/Area & River basin', w, c, d,
    'W5.1_C4Latitude', 'W5.1_C5Longitude'
]]

w5_1.dropna(subset=['W5.1_C4Latitude', 'W5.1_C5Longitude'], inplace=True)

for col in w5_1.columns[2:]:
    w5_1[col] = w5_1[col].astype('float32')

In [None]:
w5_1w = w5_1[w5_1[w]>0]

w5_1w['withdrawn'] = np.log10(w5_1w[w])
# adding min negative value to remove negatives
w5_1w['withdrawn']= w5_1w['withdrawn'] - w5_1w['withdrawn'].min()

w5_1w['log consumed'] = np.log10(w5_1w[c])

px.scatter_geo(w5_1w, lat='W5.1_C4Latitude',
               lon='W5.1_C5Longitude', 
               color='log consumed',
               hover_data=[w, c, 'W5.1_C3Country/Area & River basin_G', 'W5.1_C3Country/Area & River basin'],
               projection="natural earth",        
               size='withdrawn',
               size_max=15,
               color_continuous_scale=px.colors.sequential.Viridis,
               title='Water withdrawn (size) and consumed (color) by facility location (2020)',
)

In [None]:
w5_1d = w5_1[w5_1[d]>0]

w5_1d['size'] = np.log10(w5_1d[d])
# adding min negative value to remove negatives
w5_1d['size']= w5_1d['size'] - w5_1d['size'].min()

px.scatter_geo(w5_1d, lat='W5.1_C4Latitude',
               lon='W5.1_C5Longitude', 
               color='size',
               hover_data=[d, 'W5.1_C3Country/Area & River basin_G', 'W5.1_C3Country/Area & River basin'],
               projection="natural earth",        
               size='size',
               size_max=15,
               title='Discharge (size and color) of water by facility location (2020)'
    )

In [None]:
get_water('W5.1a', cols=['country', 'organization']).pivot(index=['country', 'organization', 'row_number'], columns='column_name')['response_value']['W5.1a_C1% verified'].value_counts()

In [None]:
w5_1c = get_water('W5.1c', year=2019, cols=['country', 'organization']).pivot(index=['country', 'organization', 'row_number'], columns='column_name')['response_value']
w5_1c.groupby('country')['W5.1c_C3% recycled or reused'].value_counts().unstack(0) \
.plot(kind='barh', stacked=True, figsize=(8, 6), title='Proportion of water recycled/reused at a facility by home country of organization (2019)');

In [None]:
w8_1 = get_water('W8.1', cols=['organization', 'country']).pivot(index=['country', 'organization', 'row_number'], columns='column_name')['response_value']
w8_1['W8.1_C1Levels for targets and/or goals'].str.split(';').explode().to_frame().groupby('country')['W8.1_C1Levels for targets and/or goals'].value_counts() \
.sort_values(ascending=False)[:26].unstack(0).plot(kind='barh', figsize=(8, 6), stacked=True, title='Organization approach to setting and monitoring water targets/goals by home country of organization (2020)');

In [None]:
w8_1a = get_water('W8.1a').pivot(index=['organization', 'row_number'], columns='column_name')['response_value']
plt.subplots(figsize=(10, 8))
sns.heatmap(w8_1a.groupby('W8.1a_C2Category of target')['W8.1a_C4Primary motivation'].value_counts().sort_values(ascending=False)[:50].unstack(), annot=True);
plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Category of organization water targets by their primary motivation (2020)')
plt.show()

In [None]:
w8_1b = get_water('W8.1b', cols=['country', 'organization']).pivot(index=['country', 'organization', 'row_number'], columns='column_name')['response_value']
w8_1b.groupby(['country'])['W8.1b_C1Goal'].value_counts().sort_values(ascending=False)[:90].unstack(0)[:12] \
.plot(kind='barh', stacked=True, title='Organization water goals by home country of organization (2020)');

In [None]:
w9_1a = get_water('W9.1a', cols=['country', 'organization'], year=2019).pivot(index=['country', 'organization', 'row_number'], columns='column_name')['response_value']
w9_1a.groupby('W9.1a_C1Linkage or tradeoff')['W9.1a_C2Type of linkage/tradeoff'].value_counts().sort_values(ascending=False)[:20].unstack(0) \
.plot(kind='barh', stacked=True, figsize=(10, 7), title='Linkages and tradeoffs related to other environmental issues (2019)');