In [None]:
!pip install geopandas
!pip install pyshp
!pip install shapely
!pip install plotly-geo

<center><h1>CDP: Unlocking Climate Solutions</h1></center>
<center><i>Developing a methodology for calculating KPIs that relate to the environmental and social issues that are discussed in the CDP survey data.</i></center>
<br>

<div class="h2">Table of Contents</div>
<ul>
    <li><a href="#ld">Loading Data</a></li>
    <li><a href="#ctq">Question based on  CDP questionaries by countries and cities</a></li>
    <li><a href="#cdr">contribution based on CDP Region</a></li>
    <li><a href="#cu">what are the countries contributed the most ?</a></li>
    <li><a href="#cuct">Contries and CDP region over the years </a></li>
    <li><a href="#res">CDP Questionaries and countries and cities responses</a></li>
    <li><a href="#ans">Percent of question answered by orgs</a></li>
    <li><a href="#dis">City disclosure</a></li>
    <li><a href="#cdc5">CDC 500 Cities Census Tract Data</a></li>
</ul>

In [None]:
%%HTML
<style type="text/css">

div.h2 {
    background-color:	#6e7b8b; 
    color: white; 
    padding: 5px; 
    padding-right: 300px; 
    font-size: 25px; 
    max-width: 1500px; 
    margin-top: 2px;
    margin-bottom: 10px;
}
</style>

In [None]:
# standard libs
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import json

print(os.getcwd()) #returns current working directory of a process

<a id='ld'></a>
<div class="h2">Loading Data</div>
Lets look into dataset one by one.First lets gather details about cities datasets on both responses and disclosure of cities


In [None]:
# import cities response dataset 
cities_res_2020 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv")

In [None]:
cities_res_2020.head().T

<a id='ctq'></a>
<div class="h2">Question based on CDP questionaries by countries and cities</div>
Now we try to answer some quesions using this dataset, in orderto undestand the data deeply
<br>

<ul>
    <li>contribution based on CDP Region</li>
    <li>what are the countries contributed the most </li>
    <li>contribution of contries and CDP region over the years 2018,2019,2020</li>
</ul>

<a id='cdr'></a>
<div class="h2">contribution based on CDP Region </div>

In [None]:
#Combining dataset for year 2018, 2019,2020 to compare
cities_res_2020=pd.read_csv('../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv')
cities_res_2019=pd.read_csv('../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2019_Full_Cities_Dataset.csv')
cities_res_2018=pd.read_csv('../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2018_Full_Cities_Dataset.csv')

cities_resp = pd.concat([cities_res_2020, cities_res_2019, cities_res_2018])

In [None]:
#number of organisation count
country_count = cities_resp[['Organization', 'Country', 'CDP Region','Year Reported to CDP']]\
        .groupby(['Country', 'CDP Region','Year Reported to CDP']).count().\
            sort_values(by = ['Organization'],ascending = False)\
                .reset_index()\
                    .rename(columns={'Organization' : 'num_orgs'})
country_count.head(10)

In [None]:
#replacing some misspelled words to match external flag image api
country_count['Country']= country_count['Country'].replace({'Taiwan, Greater China':'Taiwan, Province of China'})
country_count['Country']= country_count['Country'].replace({'China, Hong Kong Special Administrative Region':'Hong Kong Special Administrative Region'})
country_count['Country']= country_count['Country'].replace({'Bolivia (Plurinational State of)':'Bolivia, Plurinational State of'})
country_count['Country']= country_count['Country'].replace({'Democratic Republic of the Congo':'Republic of the Congo'})
country_count['Country']= country_count['Country'].replace({'Venezuela (Bolivarian Republic of)':'Venezuela, Bolivarian Republic of'})

In [None]:
# adding two letter abbreviation to countries
import pycountry
def country_codes(country_name):
        return pycountry.countries.search_fuzzy(country_name)[0].alpha_2 

country_count['codes']=country_count.apply(lambda x: country_codes(x['Country']), axis=1)

In [None]:
country_count.head()

In [None]:

cdp_reg_count = country_count[country_count['Year Reported to CDP']==2020].groupby(['CDP Region'])['num_orgs'].sum().rename("count").sort_values(ascending = False)
country_2020=country_count[country_count['Year Reported to CDP']==2020]
print(cdp_reg_count)

group_names=list(country_count['CDP Region'].unique())
print(group_names)
group_size=list(cdp_reg_count/cdp_reg_count.sum() *100)
print(group_size)

# Create colors
a, b, c=[plt.cm.Blues, plt.cm.gist_heat, plt.cm.Greens]

# First Ring (outside)
fig, ax = plt.subplots()
ax.axis('equal')
mypie, _ = ax.pie(group_size, radius=2.5, labels=group_names, colors=[b(0.1),b(0.2),b(0.4),b(0.6),b(0.7),b(0.8), b(0.9),b(2.0)] )
plt.setp( mypie, width=1.0, edgecolor='white')
 
 
# show it
plt.show()


<a id='cu'></a>
<div class="h2">what are the countries contributed the most ? </div>


In [None]:
import requests
from io import BytesIO
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
# Create colors
a, b, c=[plt.cm.Blues, plt.cm.gist_heat, plt.cm.Greens]
country_2020=country_count[country_count['Year Reported to CDP']==2020]
top5_countries=country_2020.iloc[:5]

def offset_image(x, y, label, bar_is_too_short, ax):
    response = requests.get(f'https://www.countryflags.io/{label}/flat/64.png')
    img = plt.imread(BytesIO(response.content))
    im = OffsetImage(img, zoom=0.65)
    im.image.axes = ax
    x_offset = -25
    if bar_is_too_short:
        x = 0
    ab = AnnotationBbox(im, (x, y), xybox=(x_offset, 0), frameon=False,
                        xycoords='data', boxcoords="offset points", pad=0)
    ax.add_artist(ab)


#for 2nd map
labels = list(top5_countries['codes'])
colors = [ a(1.2),a(0.8),a(0.6),a(0.4),a(0.1)]
values = list(top5_countries['num_orgs']/country_2020['num_orgs'].sum() *100)

height = 0.9
plt.barh(y=labels, width=values, height=height, color=colors, align='center', alpha=1.0)
max_value = max(values)
for i, (label, value) in enumerate(zip(labels, values)):
    offset_image(value, i, label, bar_is_too_short=value < max_value / 10, ax=plt.gca())
plt.subplots_adjust(left=0.15)
plt.show()

<a id='cuct'></a>
<div class="h2">Countries and CDP region over the years </div>



In [None]:
import plotly.express as px

#country_count["Year Reported to CDP"] = country_count["Year Reported to CDP"].astype(str)
fig = px.treemap(country_count, 
                 path=['Year Reported to CDP','Country'], 
                 values='num_orgs',
                 color='CDP Region'#color_discrete_sequence= px.colors.sequential.Plasma_r##color='CDP Region'
                )
fig.update_layout(uniformtext=dict(minsize=28))
fig.show()

<p>using this above interactive treemap we can get the information about the country participation over the years and could see the number of organization paricipated increase or discrease over the year. click on the year or country to expand the view.</p>
for example:
<ol>
    <li>Denmark has 5,853 organization responded in 2018 and it increased to 7,394 in 2019 and further more in 2020 as 20,697 orgs</li>
    <li>India has 1,924 organization responded in 2018 and it increased to 7,577 in 2019 but in 2020 only 2,512  organization responded.</li>
</ol>

<a id='res'></a>
<div class="h2">CDP Questionaries and countries and cities responses</div>



In [None]:
# Filling the missing values in Response answer column as No Response
cities_resp['Response Answer'] = cities_resp['Response Answer'].fillna('No Response')

In [None]:
cities_resp['Parent Section'].unique()

In [None]:
no_info=['Question not applicable','No Response']# for checking the responses 
ques_responded=cities_resp[cities_resp["Response Answer"].str.contains(('|'.join(no_info)))==0] [["Account Number","Organization","Year Reported to CDP", "Country","Parent Section","Section","Question Name","Question Number","Response Answer"]]
ques_responded.head()

In [None]:
import squarify
import seaborn as sns
plt.style.use('fivethirtyeight')

fig = plt.figure(figsize=(25, 21))
marrimeko=ques_responded.Section.value_counts().to_frame().iloc[:10]
ax = fig.add_subplot(111, aspect="equal")
ax = squarify.plot(sizes=marrimeko['Section'].values,label=marrimeko.index,
              color=sns.color_palette("YlGn", 10), alpha=1)
ax.set_xticks([])
ax.set_yticks([])
fig=plt.gcf()
fig.set_size_inches(30,25)
plt.title("Overall top 10 Sections with Responded Answer", fontsize=18,fontweight="bold")
plt.show()

In [None]:
not_applicable_info=['Question not applicable']# for checking the responses 
ques_not_appl=cities_resp[cities_resp["Response Answer"].str.contains(('|'.join(not_applicable_info)))==1] [["Account Number","Organization","Year Reported to CDP", "Country","Parent Section","Section","Question Name","Question Number","Response Answer"]]
ques_not_appl.head()

In [None]:
import squarify
import seaborn as sns
plt.style.use('fivethirtyeight')

fig = plt.figure(figsize=(25, 21))
marrimeko=ques_not_appl.Section.value_counts().to_frame().iloc[:10]
ax = fig.add_subplot(111, aspect="equal")
ax = squarify.plot(sizes=marrimeko['Section'].values,label=marrimeko.index,
              color=sns.color_palette("Spectral", 18), alpha=1)
ax.set_xticks([])
ax.set_yticks([])
fig=plt.gcf()
fig.set_size_inches(30,25)
plt.title("Overall top 10 Sections with Question not applicable", fontsize=18,fontweight="bold")
plt.show()

In [None]:
no_resp_info=['No Response']# for checking the responses 
ques_no_resp=cities_resp[cities_resp["Response Answer"].str.contains(('|'.join(no_resp_info)))==1] [["Account Number","Organization","Year Reported to CDP", "Country","Parent Section","Section","Question Name","Question Number","Response Answer"]]
ques_no_resp.head()

In [None]:
no_res_count=ques_no_resp.groupby(['Parent Section', 'Section','Question Number','Year Reported to CDP']).size().reset_index(name='counts')
no_res_count.head()

In [None]:
import squarify
import seaborn as sns
plt.style.use('fivethirtyeight')

fig = plt.figure(figsize=(25, 21))
marrimeko=ques_no_resp.Section.value_counts().to_frame().iloc[:10]
ax = fig.add_subplot(111, aspect="equal")
ax = squarify.plot(sizes=marrimeko['Section'].values,label=marrimeko.index,
              color=sns.color_palette("RdYlBu", 18), alpha=1)
ax.set_xticks([])
ax.set_yticks([])
fig=plt.gcf()
fig.set_size_inches(30,25)
plt.title(" Overall top 10 Sections with No Response", fontsize=18,fontweight="bold")
plt.show()

<a id='ans'></a>
<div class="h2">Percent of question answered by orgs</div>


In [None]:
no_info=['Question not applicable','No Response']# for checking the responses 
no_responded=cities_resp[cities_resp["Response Answer"].str.contains(('|'.join(no_info)))==1] [["Account Number","Organization","Year Reported to CDP", "Country","Parent Section","Section","Question Name","Question Number","Response Answer"]]
no_responded.head()

In [None]:
City_Helsinki=no_responded.loc[(no_responded["Country"] == "Norway") ]
#City_Helsinki=no_responded.loc[(no_responded["Organization"] == "City of Gretna, LA") ]
plt.style.use('seaborn-white')
#plt.style.use('dark_background')
Resp_Helsinki = City_Helsinki.groupby(['Section','Response Answer']).size()
Resp_Helsinki.unstack().plot(kind='bar',stacked=True, colormap= 'Wistia', figsize=(13,11),  grid=False)
plt.title('Norway-Stacked Barplot of Section and their Response Answer distribution', fontsize=18,fontweight="bold")
plt.ylabel('Count of Response Answer in a particular Section', fontsize=16)
plt.xlabel('Section', fontsize=16)
plt.show()

<a id='dis'></a>
<div class="h2"> City disclosure</div>


In [None]:
country_codes = pd.read_csv('../input/alpha-country-codes/Alpha__2_and_3_country_codes.csv', sep=';')
country_codes = country_codes[['Country','Alpha-2 code','Alpha-3 code']]

In [None]:
country_codes['Country'] = country_codes['Country'].replace({"Korea, Democratic People's Republic of": 'Republic of Korea'})
country_codes['Country'] = country_codes['Country'].replace({'United Kingdom': 'United Kingdom of Great Britain and Northern Ireland'})
country_codes['Country'] = country_codes['Country'].replace({'Taiwan':'Taiwan, Greater China'})
country_codes['Country'] = country_codes['Country'].replace({'Russia':'Russian Federation'})
country_codes['Country'] = country_codes['Country'].replace({'Palestinian Territory, Occupied':'State of Palestine'})
country_codes['Country'] = country_codes['Country'].replace({'Congo, the Democratic Republic of the':'Democratic Republic of the Congo'})
country_codes['Country'] = country_codes['Country'].replace({'Bolivia':'Bolivia (Plurinational State of)'})
country_codes['Country'] = country_codes['Country'].replace({'Vietnam':'Viet Nam'})
country_codes['Country'] = country_codes['Country'].replace({'Hong Kong':'Hong Kong Special Administrative Region'})
country_codes['Country'] = country_codes['Country'].replace({'Venezuela':'Venezuela (Bolivarian Republic of)'})
country_codes['Country'] = country_codes['Country'].replace({'United States Of America':'United States of America'})

In [None]:
cities_dis=pd.read_csv('../input/cdp-unlocking-climate-solutions/Cities/Cities Disclosing/2018_Cities_Disclosing_to_CDP.csv')
cities_dis.head().T

In [None]:
cities_dis = pd.merge(cities_dis,country_codes, how = 'left', on = 'Country')
cities_dis.head().T

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Choropleth(
    locations = cities_dis['Country'],
    locationmode='country names',
    z = cities_dis['Population'],
    text = cities_dis['Country'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '',
    colorbar_title = 'Population',
))

fig.update_layout(
    title_text='2018 Population',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)

fig.show()

<a id='cdc5'></a>
<div class="h2"> CDC 500 Cities Census Tract Data</div>
<br>
<p>This dataset has vast majority of medical indicators such as cancer, asthma, tobacco, "behavioral risk factors", and more.These are relevant to social equity as it relates to the environment and also we can estimate the air/environment quality by asthma prevelance percent in each city/state.</p>		


In [None]:
CDC_Census =pd.read_csv("../input/cdp-unlocking-climate-solutions/Supplementary Data/CDC 500 Cities Census Tract Data/500_Cities__Census_Tract-level_Data__GIS_Friendly_Format___2019_release.csv")
CDC_Census .head()

## Multiple state visualization

In [None]:
import plotly.figure_factory as ff

NE_states = ['CT', 'ME', 'MA', 'NH', 'RI', 'VT']
CDC_Census_r =CDC_Census[CDC_Census['StateAbbr'].isin(NE_states)]
CDC_Census_r ['FIPS'] =CDC_Census_r ['Place_TractID'].apply(lambda x: str(x).split("-",1)[1][:5] )

values = CDC_Census_r['CASTHMA_CrudePrev'].tolist()
fips =  CDC_Census_r['FIPS'].tolist()

colorscale = [
    'rgb(68.0, 1.0, 84.0)',
    'rgb(66.0, 64.0, 134.0)',
    'rgb(38.0, 130.0, 142.0)',
    'rgb(63.0, 188.0, 115.0)',
    'rgb(216.0, 226.0, 25.0)'
]

fig = ff.create_choropleth(
    fips=fips, values=values,
    scope=['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont'], county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5}, state_outline={'color': 'rgb(15, 15, 55)','width': 0.5},
    legend_title='asthma %', title="Asthma prevalence percentage on multiple state"

)
fig.update_layout(
    legend_x = 0,
    annotations = {'x': -0.12, 'xanchor': 'left'}
)

fig.layout.template = None
fig.show()

## Single state visualization

In [None]:
import plotly.figure_factory as ff

CDC_Census_s = CDC_Census[CDC_Census['StateAbbr'] == 'FL']
CDC_Census_s ['FIPS'] = CDC_Census_s ['Place_TractID'].apply(lambda x: str(x).split("-",1)[1][:5] )

values = CDC_Census_s['CASTHMA_CrudePrev'].tolist()
fips = CDC_Census_s['FIPS'].tolist()

endpts = list(np.mgrid[min(values):max(values):4j])
colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0",
              "#4989bc","#60a7c7","#85c5d3","#b7e0e4","#eafcfd"]
fig = ff.create_choropleth(
    fips=fips, values=values, scope=['Florida'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=endpts, round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='ASTHMA %',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    state_outline={'color': 'rgb(15, 15, 55)','width': 0.5},
    title="Asthma percentage in various cities of Florida State",
    exponent_format=True,
)
fig.layout.template = None
fig.show()

<p>Thanks a lot for having a look at this notebook.I Hope all you liked the notebook. Any suggestions and feedback are always welcome.</p>

<b>Please Upvote this notebook as it encourages me in doing better.</b>