<h3>The purpose of the viz is to capture movements of metrics between years. Below we are just showcasing the average CO2 Emmisions and the average MWh Consumption for a selected number of Industries for 2019 and 2020.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pandas_profiling import ProfileReport # quick EDA
import matplotlib.pyplot as plt # plotting
import seaborn as sns # plotting

import matplotlib as mpl # for plotting
from matplotlib import cm # for colours
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib.image as mpimg
import matplotlib.ticker as ticker

pd.set_option('display.max_columns', None) # display any number of columns
pd.set_option('display.max_colwidth', None) # number of characted to display

# hierarchy: ('2020', '2019', '2018') -> ('climate', 'water') -> ('data', 'dictionary', 'corp_meta')
# 3 X 2 X 3 : 18 datasets
corp_dict = {'2020':{'climate':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2020_Full_Climate_Change_Dataset.csv', 
                                'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/Full_Corporations_Response_Data_Dictionary copy.csv', 
                                'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2020_Corporates_Disclosing_to_CDP_Climate_Change.csv'}, 
                    'water':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2020_Full_Water_Security_Dataset.csv', 
                             'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/Corporations_Disclosing_to_CDP_Data_Dictionary.csv', 
                             'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2020_Corporates_Disclosing_to_CDP_Water_Security.csv'}},
             
             '2019':{'climate':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2019_Full_Climate_Change_Dataset.csv', 
                                'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/Full_Corporations_Response_Data_Dictionary copy.csv', 
                                'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2019_Corporates_Disclosing_to_CDP_Climate_Change.csv'}, 
                    'water':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2019_Full_Water_Security_Dataset.csv', 
                             'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/Corporations_Disclosing_to_CDP_Data_Dictionary.csv', 
                             'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2019_Corporates_Disclosing_to_CDP_Water_Security.csv'}}, 
             
             '2018':{'climate':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2018_Full_Climate_Change_Dataset.csv', 
                                'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/Full_Corporations_Response_Data_Dictionary copy.csv', 
                                'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2018_Corporates_Disclosing_to_CDP_Climate_Change.csv'}, 
                    'water':{'data':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2018_Full_Water_Security_Dataset.csv', 
                             'dictionary':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/Corporations_Disclosing_to_CDP_Data_Dictionary.csv', 
                             'corp_meta':'../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2018_Corporates_Disclosing_to_CDP_Water_Security.csv'}}}

# Read Data
df_climate_2020, df_climate_data_dict, df_climate_corp_meta_2020 = map(lambda x: pd.read_csv(x, low_memory=False), corp_dict['2020']['climate'].values())
df_climate_2019, df_climate_data_dict, df_climate_corp_meta_2019 = map(lambda x: pd.read_csv(x, low_memory=False), corp_dict['2019']['climate'].values())

# append the two datasets
df_climate = pd.concat([df_climate_2020, df_climate_2019])

# Candidate questions
q1 = 'C6.1_C1_What were your organizationâ€™s gross global Scope 1 emissions in metric tons CO2e? - Gross global Scope 1 emissions (metric tons CO2e)'

# q2_1 = 'C7.1a_C1_Break down your total gross global Scope 1 emissions by greenhouse gas type and provide the source of each used greenhouse warming potential (GWP). - Greenhouse gas'
# q2_2 = 'C7.1a_C2_Break down your total gross global Scope 1 emissions by greenhouse gas type and provide the source of each used greenhouse warming potential (GWP). - Scope 1 emissions (metric tons of CO2e)'

q3 = 'C8.2a_C3_Report your organizationâ€™s energy consumption totals (excluding feedstocks) in MWh. - MWh from non-renewable sources'


df_climate_reduced = df_climate.loc[(df_climate['data_point_name'].isin([q1, q3])) & (df_climate['row_number'] == 1), 
               ['account_number', 'organization', 'survey_year', 'data_point_name', 'response_value']].\
                sort_values(['account_number', 'survey_year', 'data_point_name']).copy()


df_climate_reduced_dropna = df_climate_reduced.set_index(['account_number', 'organization', 'data_point_name', 'survey_year']).unstack(['survey_year']).dropna().stack()
df_climate_reduced_dropna = df_climate_reduced_dropna.unstack('data_point_name').reset_index()
df_climate_reduced_dropna.columns = ['account_number', 'organization', 'survey_year', 'emissions', 'consumption']

df_climate_reduced_merge = \
    pd.merge(df_climate_reduced_dropna, df_climate_corp_meta_2020[['account_number', 'primary_industry', 'primary_questionnaire_sector']], on='account_number', how='inner')

df_climate_reduced_merge['emissions'] = pd.to_numeric(df_climate_reduced_merge['emissions'])
df_climate_reduced_merge['consumption']  = pd.to_numeric(df_climate_reduced_merge['consumption'])

df_climate_reduced_merge_industry = df_climate_reduced_merge.groupby(['primary_industry', 'survey_year'])[['emissions', 'consumption']].mean().dropna().reset_index()

colours = {'Power generation':'#4868E4', 'Transportation services':'#F39D2A', 'Infrastructure':'#290D30', 
           'Fossil Fuels':'#22A166', 'Materials':'#A4162C', 'Food, beverage & agriculture':'#1E3EFE'}

<h2> This visualisation portrays Energy consumption and Emissions averages for 2020 and 2019 by industry.</h2>

Lower CO2 emissions are shown across the board for 2020, but in the case of Power Generation and Infrastructure, a clear reduction in energy consumed in 2020 is displayed with respect to 2019 (possibly linked to Covid lock-down rules), meaning that not necessarily the lower emissions are linked to sources that are more carbon efficient. 

The transportation sector, Food&beverages instead, show stable levels of energy consumption generated linked to less carbon emissions, suggesting a switch to "greener" energy sources. Materials shows an even more promising trend: more energy was consumed in 2020, but less emissions were generated.

In [None]:
fig = plt.figure()
fig.set_size_inches(16, 9)
ax = plt.axes()

# for key in df_climate_reduced_merge_industry['primary_industry'].unique():
for key in ['Power generation', 'Transportation services', 'Infrastructure', 'Fossil Fuels', 'Materials', 'Food, beverage & agriculture']:
    
    key2 = key
    
    if key2 not in ['Power generation', 'Infrastructure', 'Food, beverage & agriculture']:
        key2 = key2 + '\n\n'
     
    if key2 == 'Food, beverage & agriculture':
        key2 = 'Food, beverage & agriculture\n'

    
    tmp = df_climate_reduced_merge_industry[df_climate_reduced_merge_industry['primary_industry']==key].copy()
    x, x_end = tmp['emissions']
    y, y_end = tmp['consumption']

    ax.scatter(y=y, x=x, c='w', s=1)
    ax.scatter(y=y_end, x=x_end, c=colours[key], s=200)

    ax.annotate(key2 , size=14,  xy=(x_end,y_end), xytext=(x,y), 
                arrowprops={'arrowstyle': '-|> , head_width=0.5', 'lw': 2.5, 'color': colours[key]}, va='center')

ax.set_ylabel('Average Consumption in MWh', size=15, fontweight='bold')
ax.set_xlabel('Average Emissions in CO2', size=15, fontweight='bold')
ax.set_title('Industry movement from 2019 to 2020', size=20)

ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)

ax.set_xlim(xmin=0)
ax.set_ylim(ymin=0)

# ax.set_xlim(xmax=1_000_000)
# ax.set_ylim(ymax=5_000_000)


ax.xaxis.set_major_formatter(ticker.EngFormatter())
ax.yaxis.set_major_formatter(ticker.EngFormatter())

plt.show();