In [None]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

import  seaborn as sns
import matplotlib.pyplot as plt
import plotly_express as px

from wordcloud import WordCloud

from nltk.corpus import stopwords
stop = stopwords.words('english') + stopwords.words('spanish') 

PATH = '../input/cdp-unlocking-climate-solutions/Cities/'
PATH2 = '../input/cdp-unlocking-climate-solutions/Corporations/'

# Data Wrangling

Start by loading the data for cities, and merging the response and disclosing dataframes.

In [None]:
# disclosing data
cd2020 = pd.read_csv(PATH + "Cities Disclosing/2020_Cities_Disclosing_to_CDP.csv")
cd2019 = pd.read_csv(PATH + "Cities Disclosing/2019_Cities_Disclosing_to_CDP.csv")
cd2018 = pd.read_csv(PATH + "Cities Disclosing/2018_Cities_Disclosing_to_CDP.csv")

cities_dis = pd.concat([cd2020, cd2019, cd2018])

# response data
cr2020 = pd.read_csv(PATH + "Cities Responses/2020_Full_Cities_Dataset.csv")
cr2019 = pd.read_csv(PATH + "Cities Responses/2019_Full_Cities_Dataset.csv")
cr2018 = pd.read_csv(PATH + "Cities Responses/2018_Full_Cities_Dataset.csv")

cities_res = pd.concat([cr2020, cr2019, cr2018])

cities_dis.columns, cities_res.columns

Next we join these two dataframes to create a single dataframe `cities` for our queries. Since we're only concerned about survey responses, we perform an inner join on the basis of `Year Reported to CDP` and `Account Number`. The remaining columns used in join depend on `Account Number` and hence do not change for a given row.

The count of unique cities which responded to each year's questionnaire are given below. 

In [None]:
cities = pd.merge(cities_dis, cities_res, 'inner', on=['Year Reported to CDP', 'Account Number', 'Organization', 'Country', 'CDP Region',])

# the following columns contain data we do not need, and are dropped
cities.drop(columns=['Access', 'File Name', 'Last update_x', 'Last update_y'], inplace=True)

cities.groupby(['Year Reported to CDP'])['Account Number'].nunique()

In [None]:
cities[['Country', 'City', 'Organization', 'Account Number']].drop_duplicates().sort_values(by=['Country'])

In [None]:
# City column may have NaN, use Account Number as unique identifier
cities[['CDP Region', 'Country', 'Account Number']].drop_duplicates()[['CDP Region', 'Country']].value_counts().sort_values(ascending=False)[:40]\
.unstack(0).plot(kind='barh', stacked=True, cmap='tab20', figsize=(12, 8), title='No. of cities contributing to the surveys - by country and region');

In [None]:
# helper function to query city data from combined dataframe. By default the 2020 responses are returned

def get_cities(q, by='Row Number', years=[2020], cols=['City', 'Account Number']):
    val = cities[cities['Question Number']==q]['Question Name']
    val2 = cities[(cities['Year Reported to CDP'].isin(years)) & (cities['Question Number']==q) & (cities['Response Answer'] != 'Question not applicable')] \
    [cols + [by, 'Column Name', 'Response Answer']].dropna(subset=['Response Answer'])
    sh = val2.shape[0]
    if sh > 0:
        print(q, val.iloc[0], f'({sh})')
    return val2

# Cities

The 2020 Cities Questionnaire presents the following paren sections for the questions asked with respect to which one may begin discussing the environmental and social issues for cities. (The numbering for the sections is as provided in the questionnaire)

* 1 Governance
* 2 Climate Hazards and Vulnerability
* 3 Adaptation
* 4 City-wide Emissions
* 5 Emissions Reduction
* 6 Opportunities
* 7 Energy
* 10 Transport
* 12 Food
* 13 Waste    
* 14 Water Security

To begin, once we've built our `cities` dataframe (see Data Wrangling), we select a set of key questions from the 2020 Questionnaire which might aid in the identification of KPIs. These are listed together as follows- 

In [None]:
# '4.8',
qs = ['1.0a', '2.0b', '2.1', '2.2', '2.3a', '3.0', '3.2a', '3.3', '4.4', '4.6a',
      '4.9', '4.15', '5.0a', '5.0b', '5.0c', '5.1', '5.3a', '5.4', '5.5',
      '6.0', '6.2a', '8.0a', '8.1', '8.4', '8.5a', '9.1', '10.1',  '10.2', 
      '10.4', '10.7a', '12.0', '12.1', '12.3', '12.4', '12.5', '13.0', '14.1', 
      '14.2a', '14.3']

for q in qs:
    get_cities(q)   

Let us now start to analyse the broad goals and targets cities have set themselves by looking at their response to Question `1.0a`.   
Please zoom in if the text on the visualizations is too small to read. Enjoy!

Grouping together the main areas cities have set themelves targets for in their master plan shows a primary focus on waste management and emission reduction, with cities showing a massive increase in their waste management goals from 2019 to 2020. The remaining targets see much smaller year on year changes.

In [None]:
c1a = get_cities('1.0a', years=[2019, 2020], cols=['Year Reported to CDP', 'City', 'Account Number']) \
.pivot(index=['Year Reported to CDP', 'City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']
c1a.groupby('Year Reported to CDP')['Goal type'].value_counts().sort_values(ascending=False)[:12] \
.unstack(0).plot(kind='barh', title='Main target areas for cities (2019 and 2020)', cmap='tab20', figsize=(10, 6));

And how are these targets, as per the cities, going to be addressed? 

Analysin responses for the methods of implementation with the following word cloud provides some insight on this. A lot of emphasis seems to have been given on good management and planning. In addition, we can also infer that a number of plans focus on development and building of new infrastructure and facilities, such as that related to renewable energy, for example.

In [None]:
answers = c1a['How are these goals/targets addressed in the city master plan?'].astype(str)
answers = answers.apply(lambda x: ' '.join([item for item in x.lower().split() if item not in stop and len(item) > 3]))
# answers.str.split(expand=True).stack().value_counts()[:50]

fig, ax = plt.subplots(figsize=(16,8))       

wordcloud = WordCloud(stopwords=stop,
                      background_color='white', width=1600,                            
                      height=800).generate(' '.join(answers))

# ax.imshow(wordcloud, interpolation='bilinear') 
ax.set_axis_off()
plt.title('Word Cloud: Methods to address city goals')
plt.imshow(wordcloud);

Next we try to get an idea of the impact of climate change and climate hazards on different sectors, and how these differ from country to country. 

Water and sanitation come out to be the most at risk, which is in line with the predictions that a majority of the nations of the world will be facing serious risk to water supply due to the fast diminishing sources of freshwater. The data also tells us that sanitation continues to be one of the most important requirements in developing nations for the minimizations of water borne diseases, many of which are responsible for the extreme loss of life, especially in the case of infants.

In [None]:
c2b = get_cities('2.0b', cols=['Country'])
c2b[c2b['Column Name'] == 'Areas/sectors covered by the risk and vulnerability assessment'].groupby(['Response Answer'])['Country'] \
.value_counts().sort_values(ascending=False)[:220].unstack()\
.plot(kind='barh', stacked=True, figsize=(12, 9), cmap='tab20', title='Sectors vulnerable to climate hazards - by country (2020)').legend(bbox_to_anchor=(1,1));

The following visualization shows the kinds of services that will be most impacted by the various kinds of climate hazards. 

Wild fires, heat waves, sea level rise, and extreme precipitation events, such as storms, feature prominently in the reported data. This is not at all surprising given that various parts of the world have already been seeing a very significant increase in events of these types in the last decade. These include but are not limited to the heat waves across Europe, the wildfires in Australia, California, and the Amazon, and gradual increase in the strength and frequency of storms in the United States.

Moreover, water scarcity continues to affect a number of locations across the planet, and it's nice to see these kinds of climate related threats have started to register amongst the top priorities for cities.

In [None]:
##
c2_1 = get_cities('2.1')
piv2_1 = c2_1[c2_1['Column Name'].isin([
    'Climate Hazards',
    'Please identify which vulnerable populations are affected',
    'Most relevant assets / services affected overall',
    'Social impact of hazard overall'
])].pivot_table(index=['City', 'Account Number', 'Row Number'], columns='Column Name', aggfunc= lambda x: '^'.join(x))['Response Answer'].groupby('Climate Hazards')


gg = piv2_1['Most relevant assets / services affected overall'].apply(lambda x: x.str.split('^').explode()).reset_index() \
.groupby('Climate Hazards')['Most relevant assets / services affected overall'].value_counts().unstack()

cols = [col for col in gg.columns if 'Other,' not in col]
gg[cols].plot(kind='barh', width=0.9, logx=True, figsize=(10, 20), cmap='tab20', title='Climate hazards by the areas most affected (2020)').legend(bbox_to_anchor=(0.5, -0.04));

Across all kinds of climate hazards it is clearly seen that the poor and elderly are amongst the most adversely affected. 

This includes everyone from those living in low income or sub-standard housing, to marginalized groups which generally make up a large proportion of poor populations as well as those suffering from chronic diseases and health issues, which is again a demographic that skews towards the disenfranchised and poor.

In [None]:
gg = piv2_1['Please identify which vulnerable populations are affected'].apply(lambda x: x.str.split('^').explode()).reset_index() \
.groupby('Climate Hazards')['Please identify which vulnerable populations are affected'].value_counts().unstack()

cols = [col for col in gg.columns if 'Other,' not in col]
gg[cols].plot(kind='barh',  cmap='tab20', logx=True, width=0.9, figsize=(10,18),title='Climate hazards by populations affected (2020)').legend(bbox_to_anchor=(0.4, -0.05));

The next visualization gives us an idea of the kind of impact we are likely to see in the world if the goals of climate action and adaptation are not met. 

With the population of the world set to grow up to 10 billion by the year 2050, we can see that there will be an immense shortage of and therefore a huge increase in demand for resources and public services. These two areas, along with a further increase in risk to already vulnerable populations make up the top social impacts across the entire set of climate hazards.

In [None]:
gg = piv2_1['Social impact of hazard overall'].apply(lambda x: x.str.split('^').explode()).reset_index() \
.groupby('Climate Hazards')['Social impact of hazard overall'].value_counts().unstack()
cols = [col for col in gg.columns if 'Other,' not in col]
gg[cols].plot(kind='barh', width=0.9, logx=True, figsize=(10, 18), cmap='tab20', title='Climate hazards by social impact (2020)').legend(bbox_to_anchor=(0.5, -0.05));

The ability to adapt to climate change is an important factor to ensure the thriving of cities. Here we look at the factors most affecting adaptation ability across the globe, and whether they contribute positively or negatively to it.

Cities seem to continue to think that throwing money at an issue solves the problem, and the primary challenge to adapting is budgetary constraints. Now this may very well depend on the exact situation on ground, but it does give us a look into the direction of thinking of policy makers and city administrators. Other factors that support adaptability are community and political engagement, access to education, stability, and access to key resources such as data and basic services.

In [None]:
##
c2_2 = get_cities('2.2')
c2_2.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby('Factors that affect ability to adapt')['Indicate if this factor either supports or challenges the ability to adapt'] \
.value_counts().sort_values(ascending=False)[:53].unstack() \
.plot(kind='barh', cmap='tab20', stacked=True, figsize=(12, 8), title='Factors affecting adaptation ability (2020)');

As has become well known in the last few years, as cities such as Beijing and New Delhi continue to be choked with toxic air primarily consisting of harmful PM 2.5 partculate matter, air pollution remains one of the leading issues affecting cities and the health of people living within them. It is estimated that over 4.2 million people lose their lives each year due to air pollution related health complications, such as chronic respiratory diseases, lung cancer, stroke, etc.

In [None]:
##
c2_3a = get_cities('2.3a')
c2_3a[c2_3a['Column Name'].isin([
    'Identify the climate-related health issues faced by your city', 
])]['Response Answer'].value_counts()[:14].plot(kind='barh', stacked=True, figsize=(12, 7), title='Climate related health issues faced by city (2020)');

Children, the elderly, and the poor are most affected by climate related health issues owing to the fact that they comprise the most vulnerable populations in any given demographic in part of the world.

In [None]:
##
c2_3a[c2_3a['Column Name'].isin([
    'Please identify which vulnerable populations are affected by these climate-related impacts'
])]['Response Answer'].value_counts()[:12].plot(kind='barh', stacked=True, figsize=(12, 6), title='Populations vulnerable to Climate related health impacts (2020)');

Next let's take a look at the actions being taken to mitigate climate risks. 

The following heatmap shows the climate hazards against the actions being implemented/planned by cities to mitigate them. Flood mapping, tree planting, and long term planning stand out as the primary mitigative actions - owing to the increasing concerns due to rising sea levels and overflooding, rising greenhouse effects, and the need for effective planning much in advance of actually facing the climate issues. 

It is essential that we as a planet don't ignore these threats and come up with well thought out plans for the future of humanity in order to prevent the worst case scenario's of climate change.

In [None]:
##
c3 = get_cities('3.0')
fig, ax = plt.subplots(figsize=(16,10))

sns.heatmap(c3[c3['Column Name'].isin(['Climate hazards', 'Action'])].pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby('Action')['Climate hazards'].value_counts().sort_values(ascending=False)[:271].unstack(), cmap="YlOrRd", annot=True)

plt.xticks(
    rotation=65, 
    horizontalalignment='right',
#     fontweight='light',
#     fontsize='medium'  
)
plt.title('Climate hazards and mitigatory actions taken by cities (2020)')
plt.show()

The same plans visualized above require resources in order to be implemented. Going in hand with that is the urgent need for the development of infrastructure, not just to counteract increasing greenhouse effects, but also to provide safety and shelter to the vulnerable and affected populations, as well as developing the required tools to scientifically study the environment and come up with methods most suited to reverse its causes and prevent its worst impacts. 

To do this, it is even more essential to raise awareness amongst people (through education or awareness raising campaigns) towards the various climate related issues and garner support for the policies required to change our outdated methods of functioning on Earth.

In [None]:
c3[c3['Column Name'] == 'Means of implementation']['Response Answer'].value_counts()[:12] \
.plot(kind='barh', figsize=(12, 7), title='Means of implementation for mitigatory actions (2020)');

Here are the sectors sorted according to the number of adaptation actions that apply to them as per the reported data. 

In [None]:
c3[c3['Column Name'] == 'Sectors/areas adaptation action applies to']['Response Answer'].value_counts()[:13] \
.plot(kind='barh', figsize=(12, 7), title='Sectors the actions apply to (2020)');

Next we see the sectors that correspond to city plans with respect to the adaptation goals of cities. We see that there is a large co-relation between the sectors corresponding to city adaptation actions and city adaptation plans.

In [None]:
##
c3_2a = get_cities('3.2a')
c3_2a[c3_2a['Column Name'] == 'Sectors/areas covered by plan that addresses climate change adaptation']['Response Answer'].value_counts()[:13] \
.plot(kind='barh', figsize=(12, 7), title='Sectors covered by climate change adaptation plans (2020)');
# c3_2a.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']

The following visualization shows the primary goals cities have set themselves with respect to their adaptation efforts. As seen earlier in this notebook, the key hazards cities seem to be focusing on are those related to extreme temperature and precipitation events such as heat waves and storm events respectively, those that relate to water scarcity and rising sea levels.

In [None]:
##
c3_3 = get_cities('3.3')
c3_3[c3_3['Column Name'] == 'Climate hazards that adaptation goal addresses']['Response Answer'].value_counts() \
.plot(kind='barh', figsize=(12,10), title='Climate hazards addressed by adaptations goals (2020)');

When coming up with goals, it is important to develop indicators used to monitor the progress towards those goals. In fact, that is the entire point of this CDP competition. Here we try to visualize the kind of metrics cities are using to track their goals by creating a word cloud for the descriptions they provide for their indicators. 

In [None]:
answers3_3 = c3_3[c3_3['Column Name'] == 'Description of metric / indicator used to track goal']['Response Answer']
# .apply(lambda x: ' '.join([item for item in x.lower().split() if item not in stop and len(item) > 3]))
# answers3_3.str.split(expand=True).stack().value_counts()[:45]

fig, ax = plt.subplots(figsize=(16,8))       

wordcloud = WordCloud(stopwords=stop,
                      background_color='white', width=1600,                            
                      height=800).generate(' '.join(answers3_3))

ax.set_axis_off()
plt.title('Word Cloud: Metrics used to track adaptation goals (2020)')
plt.imshow(wordcloud);

When addressing issues affecting vulnerable groups in a city, it is important to focus on the specific communities that are being predominantly impacted. This is what we see happening by looking at the next word cloud analysing responses relating to transformative acion towards vulnerable communities. Other areas of focus seem to be equity, providing support and access to health care, and other initiatives to benefit the people.

In [None]:
c3_5 = get_cities('3.5')
answers3_5 = c3_5['Response Answer']
# .apply(lambda x: ' '.join([item for item in x.lower().split() if item not in stop and len(item) > 3]))
# answers3_5.str.split(expand=True).stack().value_counts()[:25]

fig, ax = plt.subplots(figsize=(16,8))       

wordcloud = WordCloud(stopwords=stop,
                      background_color='white', width=1600,                            
                      height=800).generate(' '.join(answers3_5))

ax.set_axis_off()
plt.title('Word Cloud: Transformative actions addressing vulnerable groups in cities (2020)')
plt.imshow(wordcloud);

What are the gases most contirbuting to each city's emissions? This is an important question to shape policy around the reduction of activities that lead to increasing Greenhouse effects. The following heatmap shows the GHGs that contribute to city emissions grouped by regions. Methane, CO2 and Nitrous oxide show a greater contribution in each of the regions, whereas the high counts in North America, Europe and Latin America is mainly due to a greater number of contributing cities than other regions.

In [None]:
c_4_4 = get_cities('4.4', cols=['CDP Region']).drop(['Row Number', 'Column Name'], axis=1)
plt.subplots(figsize=(10, 6))
plt.title('Gases contributing to city emissions by region (2020)')
sns.heatmap(c_4_4.pivot_table(index=['CDP Region'], columns='Response Answer', aggfunc=len), annot=True);

Next we move to analysing emissions. The following visuzlization displays the average CO2 emissions by region and sector/subsector. Not surprisingly, a number of cities in developing nations in Asia and the Middle east contribute more towards the global emissions especially in sectors related to unclean energy generation and transportation, both areas which have seen unprecedented growth linked to their developing nation status.

In [None]:
##
c4_6a = get_cities('4.6a', by='Row Name', cols=['CDP Region', 'City', 'Account Number'])
piv4_6a = c4_6a.pivot(index=['CDP Region', 'City', 'Account Number', 'Row Name'], columns='Column Name')['Response Answer']


plt.subplots(figsize=(12, 10))
sns.heatmap(piv4_6a['Direct emissions (metric tonnes CO2e)'].dropna().astype('float64', copy=False ) \
.groupby(['CDP Region', 'Row Name']).mean().unstack().T, annot=True) 

plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Mean metric tonnes of CO2 emitted by sector/sub-sector and region (2020)')
# .plot(kind='barh', width=0.9, figsize=(14, 14), logx=True);
plt.show()

A breakdown of city GHG emissions by scope of emissions is provided below.

In [None]:
##
c4_6c = get_cities('4.6c', cols=['City', 'Account Number', 'Column Number'])
piv4_6c = c4_6c.pivot(index=['City', 'Account Number'], columns=['Column Number', 'Column Name'] )['Response Answer']
# piv4_6c.reindex(sorted(piv4_6c.columns), axis=1)
piv4_6c.columns = piv4_6c.columns.droplevel()
piv4_6c = piv4_6c[[
    'Scope 1 emissions excluding emissions from grid-supplied energy generation', 
    'Scope 1 emissions from grid-supplied energy generation within the city boundary',
    'Total Scope 2 emissions',
    'Total Scope 3 emissions'
]].astype('float32')
piv4_6c['sum'] = piv4_6c.sum(axis=1)
piv4_6c.sort_values('sum', ascending=False).drop('sum', axis=1)[:-6] \
.plot(kind='barh', cmap='tab20', figsize=(10,12), title='Breakdown of city emissions by scope (2020)', width=0.8, logx=True).legend(bbox_to_anchor=(1,-0.05));
# .groupby(['CDP Region', 'Row Name']).sum().unstack().T.plot(kind='barh', stacked=True, figsize=(14, 8), logx=True);


The next visualization analyses the contribution of GHG gases in certain sectors. CO2 and CO2 equivalent emissions contribute the most, mainly in transpotation and energy generation. We also see a significant amount of methane emissions being contributed in relation to livestock rearing. Other gases have relatively lower contributions.

In [None]:
##
c4_15 = get_cities('4.15')
piv4_15 = c4_15[c4_15['Column Name'] != 'Category'].pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby(['Applicable sub-sector'])['Gas'].value_counts().unstack()

piv4_15['sum'] = piv4_15.sum(axis=1)
piv4_15.sort_values('sum', ascending=False).drop('sum', axis=1)[:25] \
.plot(kind='barh', cmap='tab20', stacked=True, figsize=(10, 8), title='Gases contributing to sectoral emissions (2020)');

A similar visualization now compares GHG gases emitted through certain activies or the burning of specific fuels. Once again we see CO2 and CO2 equivalent emissions are most often found to contribute towards emissions from a variety of different fuels and activities, followed next by methane and nitrous oxide.

In [None]:
piv4_15 = c4_15[c4_15['Column Name'] != 'Category'].pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby(['Fuel type or activity'])['Gas'].value_counts().unstack()

piv4_15['sum'] = piv4_15.sum(axis=1)
piv4_15.sort_values('sum', ascending=False).drop('sum', axis=1)[:25] \
.plot(kind='barh', cmap='tab20', stacked=True, figsize=(12, 8), title='Gases emitted by Fuels and Activities (2020)');

In [None]:
# c5a['Column Name'].unique()
###
# c5a = get_cities('5.0a', year=2019)
# c5a[~c5a['Column Name'].isin(['Select the initiatives that this target contributes towards'])] \
# .pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']

A good measure for judging the amount of emissions in a city is to calculate the per capita emissions or emission intensity. This gives us a fairer metric to compare different cities on their emissions. The next visualization shows the top cities with maximum emission intensities as per the reported data.

In [None]:
###
c5b = get_cities('5.0b', cols=['Country', 'City', 'Account Number'])
piv5b = c5b[~c5b['Column Name'].isin(['Select the initiatives that this target contributes towards'])] \
.pivot(index=['Country', 'City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'].reset_index().set_index(['City', 'Country', 'Account Number'])

for col in ['Target year absolute emissions goal (metric tonnes CO2e)', 'Projected population in target year']:
    piv5b[col] = piv5b[col].astype('float32')

piv5b['per capita'] = piv5b['Target year absolute emissions goal (metric tonnes CO2e)']/piv5b['Projected population in target year']
piv5b['per capita'].sort_values(ascending=False)[:22].plot(kind='barh', figsize=(12,10), logx=True, title='Per capita emissions (in metric tonnes) in target year (as per reported data 2020)');

In [None]:
###
# c5c = get_cities('5.0c', year=2019)
# c5c[~c5c['Column Name'].isin(['Select the initiatives that this target contributes towards'])].pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']

What are the methods for emission transfer applied by cities and the consequent total emissions (in metric tonnes) saved?

In [None]:
c5_3a = get_cities('5.3a')
piv5_3a = c5_3a.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']
piv5_3a['Emissions saved (metric tonnes CO2e)'] = piv5_3a['Emissions saved (metric tonnes CO2e)'].astype('float64')
piv5_3a.groupby('Type of transferable emissions')['Emissions saved (metric tonnes CO2e)'].mean().sort_values() \
.plot(kind='barh', logx=True, title='Metric tonnes of CO2 emissions saved by type of emission transfer method (2020)');

Next, we attempt to score some of the mitigation actions the cities are undertaking. This is in order to provide some way to compare the various actions after weighing their costs alongside their benefits.

The proposed score is calculated by considering the amount of renewable energy a certain action causes to be produced, along with the energy savings and reduction in emissions that occur as a direct consequence of undertaking the action. This is then divided by the total cost of the project to account for the financial resources that need to be directed towards the implementation of the action. This results in the final score for the action.

The actions sorted by their scores are visualized below, and key areas to focus on turn out to be the transport and transit sectors, which also happen to be large contributors of greenhouse gas emissions. These high scores are expected mainly due to the increasing demand and availability of electric transportation technology. Another cause of this would be due the steep downward trend in the price of batteries, which are used to store energy for electric transportation.

In [None]:
##
c5_4 = get_cities('5.4')
piv5_4 = c5_4[c5_4['Column Name'].isin([
    'Mitigation action', 'Estimated emissions reduction (metric tonnes CO2e)', 'Energy savings (MWh)', 'Renewable energy production (MWh)', 
    'Total cost of the project', 'Total cost provided by the local government', 'Total cost provided by the majority funding source (currency)'
])].pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']

for col in piv5_4.columns:
    if not col == 'Mitigation action':
        piv5_4[col] = piv5_4[col].astype('float32')

an = piv5_4.groupby('Mitigation action').sum()
an['score'] = (((an['Energy savings (MWh)'] + an['Renewable energy production (MWh)'])/2) + an['Estimated emissions reduction (metric tonnes CO2e)'])/(1+an['Total cost of the project'])
# an.sort_values(by='score', ascending=False)
an['score'].sort_values(ascending=False)[:50].plot(kind='barh', logx=True, figsize=(10, 12), title='Scoring mitigation actions by cost, energy savings, and emission redictions (2020)');

The following text output box lists key opportunities that have been identified by cities in addressing climate change, along with the keywords mentioned with them in the reported data - sorted by occurence counts.

In [None]:
##
c6 = get_cities('6.0')
piv6 = c6.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer']
piv6['keywords'] = piv6['Describe how the city is maximizing this opportunity'].astype('str') \
.apply(lambda x: ' '.join([item for item in x.lower().split() if item not in stop and len(item) > 3]))

s = piv6['keywords'].str.split(expand=True).stack()
s.index = s.index.droplevel(-1)
s.name = 'keywords2'
dff = piv6.drop('keywords', axis=1).join(s)

ordered = dff.groupby('Opportunity')['keywords2'].value_counts()

print('\n\nTop opportunity keyword counts:\n')
for opp in ordered.index.get_level_values(0).unique()[:25]:
    top10 = ordered[opp][:10]
    print(opp)
    for k,val in zip(top10.index, top10):
        print('-', k, val)
    print()

City and business collaboration is necessary to adapt to and speed up the fight against climate change. The main areas of collaboration, along with collaborative activities are displayed in the heatmap below. 

In [None]:
c6_2a = get_cities('6.2a')
piv6_2a = c6_2a.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'].drop('Description of collaboration', axis=1) \
            .groupby('Collaboration area')['Type of collaboration'].value_counts().unstack()

cols = [col for col in piv6_2a.columns if 'Other,' not in col]

plt.subplots(figsize=(12, 8))
sns.heatmap(piv6_2a[cols], annot=True);
plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('City and business collaborations (2020)')
plt.show()

The following heatmap shows the differences in the sources of energy used to power cities within each region.

In [None]:
###
#use region instead of year
c8_1 = get_cities('8.1', cols=['CDP Region', 'City', 'Account Number'])
piv8_1 = c8_1.pivot(index=['CDP Region', 'City', 'Account Number'], columns=['Column Name'])['Response Answer'] \
.drop(['Total - please ensure this equals 100%', 'Year data applies to'], axis=1)

for col in piv8_1.columns:
    piv8_1[col] = piv8_1[col].astype('float64', copy=False )
#     if not col == 'Year data applies to':
#         piv8_1[col] = piv8_1[col].fillna(0)

# piv8_1['total'] = piv8_1.drop('Year data applies to', axis=1).dropna(how='all').sum(axis=1)
plt.subplots(figsize=(10, 7))
sns.heatmap(piv8_1.groupby(['CDP Region']).mean(), annot=True);
plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Difference in mean source mix of electricity in each region (2020)')
plt.show()
# .plot(kind='bar', stacked=True, figsize=(12, 6)).legend(bbox_to_anchor=(1.2, 0.5));

Renewable energy installed in cities depends in large part on the geographical attributes and locations of the cities that enable certain technologies for energy capture/generation. The next visualization shows the division of renewable energy capacity (in MW) in each region by its source.

In [None]:
###
c8_4 = get_cities('8.4', by='Row Name', cols=['CDP Region', 'City', 'Account Number'])
piv8_4 = c8_4.pivot(index=['CDP Region', 'City', 'Account Number', 'Row Name'], columns=['Column Name'])['Response Answer']
piv8_4['MW capacity'] = piv8_4['MW capacity'].astype('float32').fillna(0)

# piv8_4['MW capacity'].sort_values(ascending=False)[:60]
piv8_4['MW capacity'].groupby(['CDP Region', 'Row Name']).mean().unstack() \
.plot(kind='barh', cmap='tab20', title='Mean renewable energy capacity (MW) installed in cities by region and source (2020)', figsize=(10, 8), logx=True, width=0.9);

The predominant form of passenger transport in cities continues to be private vehicles - by a huge margin. 

As such, cities should explore ways to reduce dependence on private vehicles by building the required infrastructe for well connected public transport services. Moreover, green zones within city perimeters, or implementation of surge pricing withing city areas prone to overcrowding,  congestion, or poor air quality could influence a shift towards other forms of travel such as cycling.

In [None]:
# not my colors
colors= ['#003f5c','#2f4b7c','#665191','#a05195','#d45087','#f95d6a','#ff7c43','#ffa600','#fcca46','#a1c181','#619b8a','#386641']

c10_1 = get_cities('10.1', by='Row Name')
piv10_1 = c10_1.pivot(index=['City', 'Account Number'], columns=['Column Name'])['Response Answer'].fillna(0)

for col in piv10_1.columns:
    piv10_1[col] = piv10_1[col].astype('float32')
    
piv10_1.mean().plot.pie(labels=None, ylabel='', autopct='%1.1f%%', figsize=(8, 7), pctdistance=0.8, textprops={'color':"w"},
                        title='Mean share of passenger transport in cities (2020)', colors=colors) \
.legend(labels=piv10_1.columns, bbox_to_anchor=(1, 0.6));

In [None]:
c10_2 = get_cities('10.2', by='Row Name')
piv10_2 = c10_2.pivot(index=['City', 'Account Number', 'Row Name'], columns=['Column Name'])['Response Answer'].fillna(0).unstack()['Mode share']
for col in piv10_2.columns:
    piv10_2[col] = piv10_2[col].astype('float32')
    
piv10_2.mean().plot.pie(autopct='%1.1f%%', figsize=(8, 7), labels=None, ylabel='', pctdistance=0.8, textprops={'color': "w"}, colors=colors,
                        title='Mean share of frieght transport in cities (2020)') \
.legend(labels=piv10_2.columns, bbox_to_anchor=(1, 0.6));

As discussed above, green zones are important in improving air quality and traffic congestion within city limits. The following bar chart shows the total size (in sq. km) of green zones within countries - as per the reported data.

In [None]:
c10_7a = get_cities('10.7a', cols=['Country', 'City', 'Account Number'])
piv10_7a = c10_7a.pivot(index=['Country', 'City', 'Account Number'], columns=['Column Name'])['Response Answer']
piv10_7a['Size (sq. km)'] = piv10_7a['Size (sq. km)'].astype('float32')
piv10_7a.groupby('Country')['Size (sq. km)'].sum().sort_values() \
.plot.barh(figsize=(10, 8), title='Cumulative size (in sq. km) of zero emission zones in countries (2020)');

Air pollution primarily consists of the harmful PM2.5 and PM10 matter which passes into the lungs and damages the blood vessels. 

Air pollution today is one of the major reasons for health issues for people living in densely populated cities, especially in developing nations whose control on air quality is generally below par. The next visualization presents breakups for concentrations of various constituents of air pollution at different time scales.

In [None]:
get_cities('10.14', by='Row Name').dropna().pivot(index=['City', 'Account Number', 'Row Name'], columns=['Column Name'])['Response Answer']['Average concentration for most recent year available (ug/m3)'] \
.astype('float32').dropna().unstack().plot(kind='barh', cmap='tab20', figsize=(12,20), logx=True, width=0.9, title='Air pollution concentrations by city (2020)');

The amount of food served by cities shows their dedication towards eliminating hunger. Chennai outshines other cities in terms of total number of meals served.

In [None]:
c12 = get_cities('12.0', by='Row Name')
piv12 = c12.pivot(index=['City', 'Account Number'], columns=['Column Name'])['Response Answer'].drop('Comment', axis=1)

for col in piv12.columns:
    piv12[col] = piv12[col].astype('float32')

piv12.sort_values(by='Number of meals', ascending=False)[:30].plot(kind='barh', figsize=(10, 12), title='Number and tonnes of meals served/sold through city programs (2020)');

In [None]:
c12_1 = get_cities('12.1', by='Row Name', cols=['Country', 'City', 'Account Number'])
piv12_1 = c12_1.pivot(index=['Country', 'City', 'Account Number', 'Row Name'], columns=['Column Name'])['Response Answer']['Amount'].astype('float32').unstack().dropna(how='all')
piv12_1['sum'] = piv12_1.sum(axis=1)

piv12_1.groupby('Country').sum().sort_values(by='sum', ascending=False).drop('sum', axis=1) \
.plot(kind='barh', logx=True, width=0.8, figsize=(10, 12), title='Meat and daiy consumotion by city (2020)');
# piv12_1.sort_values(by='sum', ascending=False)[:30].drop('sum', axis=1).plot(kind='barh', stacked=True, logx=True, figsize=(12, 7));

Minimizing city-wide food waste and promoting sustainable foods further shows a city's commitment towards meeting their carbon goals and making the most of their food resources, while alleviating hunger. 

The next two visualizations show how cities say they are promoting sustainable foods, and the total annual food wastage in the city, respectively.

In [None]:
##
c12_4 = get_cities('12.4', by='Row Name')
# c12_4[c12_4['Column Name']=='Action implemented'].pivot(index=['City', 'Account Number'], columns=['Row Name'])['Response Answer']
c12_4.pivot(index=['City', 'Account Number', 'Row Name'], columns=['Column Name'])['Response Answer'].groupby(['Row Name'])['Action implemented'] \
.value_counts().unstack().plot(kind='barh', stacked=True, cmap='tab20', title='Promotion of access to sustainable foods').legend(bbox_to_anchor=(1,1));

In [None]:
## w/13
get_cities('12.5', cols=['CDP Region', 'City', 'Population']).pivot(index=['CDP Region', 'City', 'Population'], columns=['Column Name'])['Response Answer']['Total annual volume of food waste in tonnes'] \
.astype('float32').dropna().sort_values(ascending=False).unstack(0).plot(kind='barh', cmap='tab20', stacked=True, figsize=(10, 8), title='Annual food waste in tonnes by city and region (2020)');

The amount of solid waste generation in a city is a useful metric to judge the amount of recycling and waste management a city might require to undertake. However, when comparing different cities, it would be more useful to compare them on the per capita waste generation. 

The same is presented below in the bar chart sorted to indicate the cities responsible for the maximum per capita solid waste.

In [None]:
###
c13 = get_cities('13.0', by='Row Name', cols=['Country', 'City', 'Account Number', 'Population'])
piv13 = c13.pivot(index=['Country', 'City', 'Account Number', 'Population'], columns=['Column Name'])['Response Answer'].reset_index().set_index(['City', 'Country'])

per_capita_waste = piv13['Amount of waste generated (tonnes/year)'].astype('float32')/piv13['Population'].astype('float32')

per_capita_waste.sort_values(ascending=False)[:50] \
.plot(kind='barh', logx=True, stacked=True, figsize=(13, 11), cmap='tab20', title='Per capita solid waste generation by city (2020)');

In [None]:
c14_1 = get_cities('14.1', cols=['CDP Region', 'Country'])
c14_1['Response Answer'] = c14_1['Response Answer'].astype('float32')
c14_1.groupby(['CDP Region', 'Country'])['Response Answer'].mean().unstack(0) \
.plot(kind='barh', stacked=True, title='Mean access to potable water supply by country and region (2020)', cmap='tab20', figsize=(10, 14)).legend(bbox_to_anchor=(1,1));

The next vizualization identifies the main risk drivers affecting water security in cities. These are grouped according to their magnitude and the expected timescales for their materialization. 

Declining water quality and other risks that affect access to water, such as stressed water resources, droughts, and inadequate water supply infrastructure are expected to pose serious issues to cities in the near to medium term.

In [None]:
##
c14_2a = get_cities('14.2a')
plt.subplots(figsize=(12,8))
sns.heatmap(c14_2a.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby(['Water security risk drivers', 'Estimated magnitude of potential impact'])['Anticipated timescale'].value_counts().unstack().unstack()[:12], annot=True);
# .plot(kind='barh', stacked=True, figsize=(12,6), cmap='tab20').legend(bbox_to_anchor=(1, 0.6));

plt.xticks(rotation=65, horizontalalignment='right',)
plt.title('Risks to water security, and their magnitude & timescales (2020)')
plt.show()

The final visualization of this notebook presents the mitigation actions being undertaken to reduce risks to water security. 

The important actions being considered that work with multiple risks are spreading awareness and increasing investment in improving water supply infrastructure. This makes sense considering the need for water conservation requires every individual to do his or her part and since a number of cities report inadequate water supply structure

In [None]:
##
c14_3 = get_cities('14.3')
piv14_3 = c14_3.pivot(index=['City', 'Account Number', 'Row Number'], columns=['Column Name'])['Response Answer'] \
.groupby('Risks')['Adaptation action'].value_counts().sort_values(ascending=False).unstack()[:17]

cols = [col for col in piv14_3.columns if 'Other,' not in col][:-1]

piv14_3[cols].plot(kind='barh', stacked=True, figsize=(12, 8), cmap='tab20', title='Mitigation actions considered and implemented by risk to water security (2020)');

Thank you for sticking with this notebook right to the end. I hope this was useful. 