# Analysis on response data about climate change and suggesion for cities and corporations


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
path_cities = "/kaggle/input/cdp-unlocking-climate-solutions/Cities/"
path_corp = "/kaggle/input/cdp-unlocking-climate-solutions/Corporations/"
path_suppl = "/kaggle/input/cdp-unlocking-climate-solutions/Supplementary Data/"
path_out = "/kaggle/output/"

# EDA on corporations data(2020)
The latest data of climate change were used.

In [None]:
disclose_cc = pd.read_csv(path_corp + "Corporations Disclosing/Climate Change/2020_Corporates_Disclosing_to_CDP_Climate_Change.csv")
print("columns of disclose_cc: ", disclose_cc.columns)
response_cc = pd.read_csv(path_corp + "Corporations Responses/Climate Change/2020_Full_Climate_Change_Dataset.csv")
print("columns of response_cc: ", response_cc.columns)

In [None]:
print("shape of disclose_cc: ", disclose_cc.shape)
print("number of organizations: ", disclose_cc["organization"].nunique())
disclose_cc.head(3)

## analyze corporations and their types.
Primary_sector and industries are the best for grouping corporations.\
I chose primary_sector as the best category at the end because primary_sector is relatively even and can describe each corpotaion's features.

In [None]:
def count_unique_value(df, columns):
    count_dict = {}
    for col in columns:
        count_dict[col] = df[col].nunique()
    print(count_dict)
        
count_unique_value(disclose_cc, ["account_number", "activities", "primary_activity", "sectors", "primary_sector", "industries"])
print(disclose_cc["primary_sector"].value_counts())
print("##############################")
print(disclose_cc["industries"].value_counts())

In [None]:
# get primary_sector dict for mapping
organization = list(disclose_cc["organization"])
primary_sector = list(disclose_cc["primary_sector"])
organization_map = dict(zip(organization, primary_sector))

## analyze response data from corporations (climate change)

In [None]:
print("shape of response_cc: ", response_cc.shape)
print("columns:",list(response_cc.columns))
print("number of organizations: ", response_cc["organization"].nunique())
# add column "primary_sector"
response_cc["primary_sector"] = response_cc["organization"].map(organization_map)
response_cc.head(3)

analyze emission data\
Scope1 emission data are listed in the question C6.1\
scope 1 emission:
Emissions from operations that are owned or controlled by the
reporting company

In [None]:
response_c6_1 = response_cc[response_cc["question_number"] == "C6.1"]
# take Iron Mountain Inc. as an example
response_c6_1_IronMountain = response_c6_1[response_c6_1["organization"] == "Iron Mountain Inc."]
def sort_df_along_row_and_column_number(df):
    return df.sort_values("row_number").sort_values("column_number")
def get_scope1_emission(df):
    return df[df["table_columns_unique_reference"] == "C6.1_c1-Gross global Scope 1 emissions (metric tons CO2e)"]
print("Iron Mountain Inc.'s response to the question C6.1'")
sort_df_along_row_and_column_number(response_c6_1_IronMountain)

In [None]:
# focus on emission figures
sort_df_along_row_and_column_number(get_scope1_emission(response_c6_1_IronMountain))

## emission difference among sectors
### scope1 emission in 2019
Some sectors emit much more gas than others.

In [None]:
response_c6_1_4years = get_scope1_emission(response_c6_1)
# each organizeation's scope1 emission in 2019 (2019/01/01 - 2019/12/31)
response_c6_1_2019 = response_c6_1_4years[response_c6_1_4years["row_name"] == "Reporting year"]
response_c6_1_2019 = response_c6_1_2019.astype({'response_value': float})

fig = plt.figure(figsize=(30,10))
ax = fig.add_subplot(1, 1, 1)
sns.boxplot(x = "primary_sector",y = "response_value",data= response_c6_1_2019, showfliers=False, ax=ax)
sns.stripplot(x = "primary_sector",y = "response_value",data= response_c6_1_2019, jitter=True, color='black', ax=ax)
plt.xticks(rotation=90)
plt.title("scope1 emission")
plt.show()
# response_c6_1_2019[["primary_sector","response_value"]].groupby("primary_sector").describe()

another look at the sector difference with a pie chart

In [None]:
response_c6_1_2019_sectorsum = response_c6_1_2019.groupby("primary_sector").sum()[["response_value"]]
response_c6_1_2019_sectorsum = response_c6_1_2019_sectorsum.sort_values("response_value", ascending=False)

# convert 45 small-emission sectors to "others"
n_others = 45
scope1_others = pd.Series([response_c6_1_2019_sectorsum.tail(n_others).sum()["response_value"]],
                   index=response_c6_1_2019_sectorsum.columns, name='others')
response_c6_1_2019_sectorsum = response_c6_1_2019_sectorsum.iloc[:-n_others,:].append(scope1_others)
print("Now the number of sectors is", len(response_c6_1_2019_sectorsum))
total_scope1_emission = round(response_c6_1_2019_sectorsum["response_value"].sum())
print("Total scope1 emission: ", total_scope1_emission, "t")

# plot pie chart
def plot_piechart(pandas_series, title):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.pie(pandas_series.values,
           labels=list(pandas_series.index),
           autopct="%1.1f%%",
          startangle=90,
          counterclock=False)
    plt.title(title)
    plt.show()
plot_piechart(response_c6_1_2019_sectorsum["response_value"], "scope1 emission")
del scope1_others
del response_c6_1_2019_sectorsum
del response_c6_1_2019

Now, you can see which sectors matter.\
Sectors with 5% of share or more : 
* Thermal power generation,
* Energy utility networks,
* Air transport,
* Chemicals,
* Oil & gas extraction & production

### scope2 emission in 2019
Scope2 emission data are listed in the question 6.3.\
scope2 emission:\
Indirect emissions from the generation of purchased or acquired
electricity, steam, heat or cooling consumed by the reporting
company

In [None]:
response_c6_3 = response_cc[response_cc["question_number"] == "C6.3"]
def get_scope2_emission(df):
    return df[df["table_columns_unique_reference"] == "C6.3_c1-Scope 2, location-based"]
response_c6_3_4years = get_scope2_emission(response_c6_3)
# each organizeation's scope2 emission in 2019 (2019/01/01 - 2019/12/31)
response_c6_3_2019 = response_c6_3_4years[response_c6_3_4years["row_name"] == "Reporting year"]
response_c6_3_2019 = response_c6_3_2019.astype({'response_value': float})

fig = plt.figure(figsize=(30,10))
ax = fig.add_subplot(1, 1, 1)
sns.boxplot(x = "primary_sector",y = "response_value",data= response_c6_3_2019, showfliers=False, ax=ax)
sns.stripplot(x = "primary_sector",y = "response_value",data= response_c6_3_2019, jitter=True, color='black', ax=ax)
plt.xticks(rotation=90)
plt.title("scope2 emission")
plt.show()

another look at the sector difference with a pie chart


In [None]:
response_c6_3_2019_sectorsum = response_c6_3_2019.groupby("primary_sector").sum()[["response_value"]]
response_c6_3_2019_sectorsum = response_c6_3_2019_sectorsum.sort_values("response_value", ascending=False)

# convert 45 small-emission sectors to "others"
n_others = 45
scope2_others = pd.Series([response_c6_3_2019_sectorsum.tail(n_others).sum()["response_value"]],
                   index=response_c6_3_2019_sectorsum.columns, name='others')

response_c6_3_2019_sectorsum = response_c6_3_2019_sectorsum.iloc[:-n_others,:].append(scope2_others)
print("Now the number of sectors is", len(response_c6_3_2019_sectorsum))
total_scope2_emission = round(response_c6_3_2019_sectorsum["response_value"].sum())
print("Total scope2 emission: ", total_scope2_emission, "t")
print("The figure is ", round(total_scope2_emission*100/total_scope1_emission,1), "% of total scope1 emission")

plot_piechart(response_c6_3_2019_sectorsum["response_value"], "scope2 emission")
del scope2_others
del response_c6_3_2019_sectorsum
del response_c6_3_2019
del response_cc

Except the chemicas sector, the scope2 indirect emission is relatively even-distributed, which means that scope1 direct emission from some top sectors is evenly shared by other sectors.\

Here I propose two approaches for reducing GHG emission.
1. approach to energy generators
1. approach to energy consumers.\
(In this notebook I am going to focus on corporaion consumers)

# EDA on cities data(2020)
analyze cities data to consider the two approaches above.

In [None]:
city_response = pd.read_csv(path_cities + "Cities Responses/2020_Full_Cities_Dataset.csv")
print("shape of city_2020:", city_response.shape)
print("columns:",list(city_response.columns))
print("number of organizations: ", city_response["Organization"].nunique())
print("number of countries: ", city_response["Country"].nunique())
# use the same column names as that used in the dataframe response_cc
city_response.rename(columns={'Row Number': 'row_number', 'Column Number': 'column_number'}, inplace=True)
city_response.head(3)

1. approach to energy generators\
analyze question 8 (Energy section)

In [None]:
# each organization's answer to question 8.0
question_number = "8.0"
print(f"question {question_number}:", city_response[city_response["Question Number"]== question_number].head(1)["Question Name"].values)
city_8_0 = city_response[city_response["Question Number"]== question_number]["Response Answer"].value_counts(dropna=False)
plot_piechart(city_8_0, "resoponse to question 8.0")

You can see that many cities think renewable energy as an important matter.

In [None]:
question_number = "8.0a"
city_8_0_a = city_response[city_response["Question Number"]== question_number]
print(f"question {question_number}:", city_8_0_a.head(1)["Question Name"].values)
city_8_0_a_base = city_8_0_a[city_8_0_a["Column Name"] == "Base year"]["Response Answer"].value_counts(dropna=False)
city_8_0_a_target = city_8_0_a[city_8_0_a["Column Name"] == "Target year"]["Response Answer"].value_counts(dropna=False)
print("Total answers concerning base/target year:", city_8_0_a_base.sum())
print("Responses from", city_8_0[city_8_0.index=="Yes"].values, "organizations")
# convert 25 small-emission sectors to "others"
n_others = 15
others_8_0_a_base = pd.Series([city_8_0_a_base.tail(n_others).sum()],
                   index=['others'], name=city_8_0_a_base.name)
city_8_0_a_base = city_8_0_a_base[:-n_others].append(others_8_0_a_base)
others_8_0_a_target = pd.Series([city_8_0_a_target.tail(n_others).sum()],
                   index=['others'], name=city_8_0_a_target.name)
city_8_0_a_target = city_8_0_a_target[:-n_others].append(others_8_0_a_target)
titles = ["base year", "target year"]
series = [city_8_0_a_base, city_8_0_a_target]
plt.figure(figsize=(15,6))
for i in range(2):
    plt.subplot(1,2,i+1)
    plt.title(titles[i])
    plt.pie(series[i].values,
       labels=list(series[i].index),
       autopct="%1.1f%%",
      startangle=90,
      counterclock=False)
plt.show()

These graphs suggest that base year is more various among organizations than target year, which means it is difficult to compare targets each organizaions set.\
Half of the responses were "Question not applicable".

I think the difficulty of comparing targets with others leads to slow speed of cities' actions toward climate change. We may need to make great effort on climate change as corporations do to win their market. Some competition system would change our attitude to climate change.

2. approach to energy cosumers\
analyze question 8.5 (Energy section)(includes approach 1)

In [None]:
# each organization's answer to question 8.5
question_number = "8.5"
print(f"question {question_number}:", city_response[city_response["Question Number"]== question_number].head(1)["Question Name"].values)
city_8_5 = city_response[city_response["Question Number"]== question_number]["Response Answer"].value_counts(dropna=False)
plot_piechart(city_8_5, "response to question 8.5")

Only 44% of respondants said "Yes" or "In progress"

Cities have to take action to improve energy efficiency more quickly. The figure tells you that discussions on efficiency has been looked over.

analyze target sectors concerning energy efficiency in detail

In [None]:
question_number = "8.5a"
print(f"question {question_number}:", city_response[city_response["Question Number"]== question_number].head(1)["Question Name"].values)
city_8_5_a = city_response[city_response["Question Number"]== question_number]
city_8_5_a = city_8_5_a[city_8_5_a["Column Name"]=="Please indicate to which energy sector(s) the target applies (Multiple choice)"]
city_8_5_a = city_8_5_a["Response Answer"].value_counts(dropna=False)

# convert 25 small-emission sectors to "others"
n_others = 25
others_8_5_a = pd.Series([city_8_5_a.tail(n_others).sum()],
                   index=['others'], name=city_8_5_a.name)
city_8_5_a = city_8_5_a[:-n_others].append(others_8_5_a)
print("Total answers:", city_8_5_a.sum())
print("from", city_8_5[city_8_5.index=="Yes"].values, "organizations")
plot_piechart(city_8_5_a, "response to question 8.5a")

* "Question not applicable" made up about 40% 
* lack of variety

I would recommend cities get more sectors engaged in their efficiency target. Since all sectors are indirext emitters of green house gas as shown in the graph of scope2 emission, they should contribute to some target. Cities can accelerate corporation actions by setting a tax or help new investment on utilities contributing to efficiency by loosing ristrictions.

## Conclusion
In conclusion I would suggest following KPI for cities and corporaions;
* Easiness to compare disclosed figures of an organization with other organizations (whether it uses the same baseline, same protocol)
* Variety of sectors, stakeholders and people an organization get involved in its mitigation action toward climate change.
* eagerness to improve energy efficiency of an arganization regardless of its sector

There is no discussion on verification of emissions in this notebook since verification data were removed from the corporations response dataset. However, we have to see whether verification proceses work or not in order to precisely understand the current emission of green house gas and its impact, which will lead to better actions to climate change.