In this notebook, we explore a section of the data provided under 'City-Response'.

CDP is currently collaborating with cities across the world. So we will try to put up some visualizations, on the data contribution and few aspects, covered in the survey, from regional perspective .

In [None]:
from glob import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
from difflib import SequenceMatcher
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

import seaborn as sns
import plotly as ply
import altair as alt
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:


from IPython.core.display import display, HTML
display(HTML("<style>div.output_scroll { height: 4444em,width: 4444em; }</style>"))

In [None]:
data_2018 = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/2018*.csv')+glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*2018*.csv')
data_2019 = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/2019*.csv')+glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*2019*.csv')
data_2020 = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/2020*.csv')+glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*2020*.csv')

In [None]:
cities_response_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*Full_Cities_Dataset.csv')
cities_disclosing_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*Cities_Disclosing_to_CDP.csv')

In [None]:
cities_disclosing = pd.concat([pd.read_csv(file) for file in cities_disclosing_files])
cities_response = pd.concat([pd.read_csv(file) for file in cities_response_files])

In [None]:
corporate_climate_change_disclosing_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*/*Corporates_Disclosing_to_CDP_Climate_Change*.csv')
corporate_water_security_disclosing_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*/*Corporates_Disclosing_to_CDP_Water_Security*.csv')
corporate_climate_change_response_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*/*Full_Climate_Change_Dataset*.csv')
corporate_water_security_response_files = glob('/kaggle/input/cdp-unlocking-climate-solutions/*/*/*/*Full_Water_Security_Dataset*.csv')

In [None]:
corporate_climate_change_disclosing = pd.concat([pd.read_csv(file) for file in corporate_climate_change_disclosing_files])
corporate_water_security_disclosing = pd.concat([pd.read_csv(file) for file in corporate_water_security_disclosing_files])
corporate_climate_change_response = pd.concat([pd.read_csv(file) for file in corporate_climate_change_response_files])
corporate_water_security_response = pd.concat([pd.read_csv(file) for file in corporate_water_security_response_files])

In [None]:
worldcities = pd.read_csv('/kaggle/input/world-cities/worldcities.csv').rename(columns={'country':'Country'})
worldcities['Country'].replace(['Bolivia','Hong Kong','Côte D’Ivoire','Congo (Kinshasa)',
                               'Korea, South','Moldova','Russia','West Bank','Taiwan','United Kingdom','Tanzania',
                                'United States','Venezuela','Vietnam'                                
                               ],
                               
                               
                              ['Bolivia (Plurinational State of)',
 'China, Hong Kong Special Administrative Region',                              
 "Côte d'Ivoire",
 'Democratic Republic of the Congo',
 'Republic of Korea',
 'Republic of Moldova',
 'Russian Federation',
 'State of Palestine',
 'Taiwan, Greater China',
 'United Kingdom of Great Britain and Northern Ireland',
 'United Republic of Tanzania',
 'United States of America',
 'Venezuela (Bolivarian Republic of)',
 'Viet Nam'],
                              inplace=True)

worldcities = worldcities.merge(cities_response[['Country','CDP Region']].drop_duplicates(),on='Country',how='left')

In [None]:
world_data = worldcities[['city','CDP Region']][(~worldcities['CDP Region'].isna())&(worldcities['population']>100000)].drop_duplicates()
world_data = pd.DataFrame(world_data.groupby(['CDP Region'])['city'].count()).rename(columns={'city':'Total City Count'}).reset_index()


world_bars = alt.Chart(world_data).mark_bar(size=17).encode(
    y=alt.Y('CDP Region'),
    x=alt.X('Total City Count', stack='zero'),
    opacity=alt.value(0.2),

                    )
world_bars.height = 300
world_bars.width = 650


cdp_data = cities_response[['Organization','CDP Region']].drop_duplicates()
cdp_data = pd.DataFrame(cdp_data.groupby(['CDP Region'])['Organization'].count()).rename(columns={'Organization':'Total Organization Count'}).reset_index()


cdp_bars = alt.Chart(cdp_data).mark_bar(size=17).encode(
    y=alt.Y('CDP Region'),
    x=alt.X('Total Organization Count', stack='zero'),
    opacity=alt.value(0.8),
                    )

cdp_bars.height = 300
cdp_bars.width = 650



perc_data = world_data.merge(cdp_data,on='CDP Region')
perc_data['Percentage of participation'] = (perc_data['Total Organization Count']/perc_data['Total City Count'])*100
perc_data['text'] = perc_data['Percentage of participation'].apply(lambda x: 'Percentage of participation : '+str(np.round(x,2)))

text = alt.Chart(perc_data).mark_text(dx=78, dy=1, color='#0e7735',size=10).encode(
    y=alt.Y('CDP Region'),
    x=alt.X('Total City Count', stack='zero'),
    detail='Percentage of participation:N',
    text=alt.Text('text'),
    )

legend = alt.Chart().mark_bar().encode(color=alt.Color('Legend:N', 
              scale=alt.Scale(domain=['Total City Count',
                                      'Total Organization Count'],
                              range=[ '#bed8ec','#3181bd']),
                    legend=alt.Legend(orient='bottom')))

legend.height = 300
legend.width = 650

comb = (cdp_bars+world_bars+text).properties(title={
    'text':['Percentage of participation at city level',
           '(Considers only the countries, present in CDP data / cities having 100K+ population in region)'],
     'subtitle':['Total City Count: Total no. of cities with 100K+ population at the region ,    Total Organization Count : Total no. of city-based organizations collaborating with CDP at the region'],
           })
    



(comb+legend)


# Percentage of participation at city level:
#### The visualization above tells us about the participation level from each CDP region.
This considers number of cities participating at the region currently and total no. of the cities with 100k+ population at the region.
The percentage number shows where CDP can improve collaboration and by how much. 

The chart shows, 
* 'Latin america' and 'North america' are considerably ahead of other regions. 
* 'East Asia' and 'South and West Asia' reginos are having pretty low participation.

In [None]:
data = cities_response[(cities_response['Question Number']=='2.0')][['Response Answer','CDP Region','Organization','Year Reported to CDP']].drop_duplicates()
data.sort_values(by=['Organization','Year Reported to CDP'],inplace=True)
final_data = pd.DataFrame(columns=[ 'Organization',
       'Year Reported to CDP'],index=np.arange(len(data['Organization'].unique())*3))
final_data['Year Reported to CDP'] = [2018]*len(data['Organization'].unique())+[2019]*len(data['Organization'].unique())+[2020]*len(data['Organization'].unique())
final_data['Organization'] = list(data['Organization'].unique())+list(data['Organization'].unique())+list(data['Organization'].unique())
final_data = final_data.merge(data[['Organization','Year Reported to CDP','Response Answer']],how='left',on=['Organization','Year Reported to CDP'])
final_data = final_data.merge(data[['Organization','CDP Region']].drop_duplicates(),how='left',on=['Organization'])

final_data.sort_values(by=['Organization','Year Reported to CDP'],inplace=True)
final_data.reset_index(drop=True,inplace=True)

idx = final_data[(final_data['Response Answer'].isna())&(final_data['Year Reported to CDP']==2018)].index.values
final_data['Response Answer'].iloc[idx] = 'Do not know'

idx = final_data[(final_data['Response Answer'].isna())&(final_data['Year Reported to CDP']==2019)].index.values
final_data['Response Answer'].iloc[idx] = final_data['Response Answer'].iloc[idx-1].values

idx = final_data[(final_data['Response Answer'].isna())&(final_data['Year Reported to CDP']==2020)].index.values
final_data['Response Answer'].iloc[idx] = final_data['Response Answer'].iloc[idx-1].values

data2 = pd.DataFrame((final_data.groupby(['CDP Region','Year Reported to CDP'])['Response Answer'].value_counts().sort_index()/final_data.groupby(['CDP Region','Year Reported to CDP'])['CDP Region'].count().sort_index())*100)
data2.reset_index(inplace=True)
data2.rename(columns={0:'Percentage','Response Answer':'Status of climate change risk and vulnerability assessment'},inplace=True)
data2.loc[-1] = ['Africa',2018,'Not intending to undertake',0.0]
data2.index = data2.index + 1
data2.loc[-1] = ['Africa',2018,'Intending to undertake in the next 2 years',0.0]
data2.index = data2.index + 1
data2.sort_values(by=['Status of climate change risk and vulnerability assessment'],ascending=False,inplace=True)
# alt.Legend(orient='top-left')

bars = alt.Chart(data2).mark_bar(size=18).encode(
    y=alt.Y('CDP Region'),
    x=alt.X('Percentage', stack='zero'),
    color=alt.Color('Status of climate change risk and vulnerability assessment',\
                    legend=alt.Legend(orient='bottom'),
                    scale=alt.Scale(
            domain=['Yes', 'Not intending to undertake', 'No',
       'Intending to undertake in the next 2 years',
       'Intending to undertake in future', 'In progress', 'Do not know'],
                        range=['#46ab5e','orange','#dc2a25','#bbccdd','teal','#d18954','#5fb0c0']
                    ))
).properties(title={
    'text':['Status of climate risk and vulnerability assessment for each CDP region (in percentage)',
           '(Includes all the organizations reported till 2020)'],
    })

bars.height = 460
bars.width = 650


text = alt.Chart(data2).mark_text(dx=-10, dy=1, color='white',size=12).encode(
    y=alt.Y('CDP Region'),
    x=alt.X('Percentage', stack='zero'),
    detail='Status of climate change risk and vulnerability assessment:N',
    text=alt.Text('Percentage', format='2d'),
)

text.height = 460
text.width = 650

year_slider = alt.binding_range(name='Year Reported to CDP',min=2018, max=2020, step=1)
slider_selection = alt.selection_single(bind=year_slider, 
                                        fields=['Year Reported to CDP'], 
                                        name="Year Reported to CDP",
                                        init={'Year Reported to CDP':2018})

comb = bars+text

filter_year = comb.add_selection(
    slider_selection
).transform_filter(
    slider_selection
)

filter_year

# Status of climate change risk and vulnerability assessment:

#### This shows, where the participating cities are in terms of assessing climate change risk and vulnerability in each CDP region.
The participation status , as defined by CDP, are: 
1. Yes 
2. Not intending to take 
3. No 
4. Intending to take in next 2 years
5. Intending to undertake in future
6. In progress
7. Do not know 


* **Numbers in the chart are percentage of all cities , that participated over last 3 years, in each segment of that particular year.
The slider in the chart will show you, how the numbers have changed over the years. Any city that did not participate in a previous year has been marked under 'Do not know' segment for that year.**


* **The final status on year 2020, shows though the participations across asian regions are low, almost 80% of the participating cities of 'South and West Asia','East Asia' and 'Southeast Asia and Oceania' have conducted a climate change risk and vulnerability assessment.**

In [None]:
def get_regional_summary(info,feature):
    info = info.groupby(['CDP Region','Year Reported to CDP'])[feature].describe()
    return info.reset_index()


location_values= pd.read_csv('/kaggle/input/comp-dataset/location_values.csv')

# select the data 

# climate hazard related responses
# 2018 reponse options are not in sync with 2019 and 2020.
data = cities_response[((cities_response['Question Number']=='2.1')&(cities_response['Year Reported to CDP'].isin([2019,2020])))
               |((cities_response['Question Number']=='2.2a')&(cities_response['Year Reported to CDP']==2018))
               ][['Year Reported to CDP',
                  'CDP Region',
                  'Column Name',
                  'Country',
                  'Question Name',
                  'Organization',
                  'Response Answer']].drop_duplicates().reset_index(drop=True)


data['Column Name'].replace(['Current consequence of hazard','Magnitude of impact'],'Current magnitude of hazard',inplace=True)
data['Question Name'] = 'Most significant climate hazards faced by city'

CDP_enlisted_major_impacts = list(data[data['Column Name']=='Social impact of hazard overall']['Response Answer'].value_counts()[:30].index.values[:11])



social_impact_info = pd.pivot_table(data[['Year Reported to CDP','CDP Region','Country','Organization','Response Answer','Column Name']][(data['Column Name']=='Social impact of hazard overall')&(data['Response Answer'].isin(CDP_enlisted_major_impacts))],index=['Organization','CDP Region','Country','Year Reported to CDP'],columns='Response Answer',aggfunc='count',fill_value=0.0)
social_impact_info['Total no. of impacted social aspects'] = social_impact_info.sum(axis=1)
social_impact_info.reset_index(inplace=True)
social_impact_info['percentage of CDP enlisted social aspects impacted'] = (100*social_impact_info['Total no. of impacted social aspects']/len(CDP_enlisted_major_impacts)).round(2)



affected_services = data[data['Column Name'].isin(['Most relevant assets / services affected overall',
                               'Top three assets/ services affected' 
                              ])]
affected_services.dropna(inplace=True)
affected_services.sort_values(by=['Year Reported to CDP','Organization'],inplace=True)
CDP_enlisted_sectors = list(affected_services['Response Answer'].value_counts()[:20].index.values)

affected_services_info = pd.pivot_table(affected_services[['Year Reported to CDP','CDP Region','Country','Organization','Response Answer','Column Name']][affected_services['Response Answer'].isin(CDP_enlisted_sectors)],index=['Year Reported to CDP','CDP Region','Country','Organization'],columns='Response Answer',aggfunc='count',fill_value=0.0)
affected_services_info['Total no. of impacted services'] = affected_services_info.sum(axis=1)
affected_services_info.reset_index(inplace=True)
affected_services_info['percentage of CDP enlisted services impacted'] = np.round(100*(affected_services_info['Total no. of impacted services']/len(CDP_enlisted_sectors)),4)



social_impact_summary = get_regional_summary(social_impact_info,'percentage of CDP enlisted social aspects impacted')
affected_services_summary = get_regional_summary(affected_services_info,'percentage of CDP enlisted services impacted')


CDP begins the survey on cities, with a set of questions on public services/assests,that are crucial to keep city operations up and running. Questions assess, how these services are affected by climate related hazards. They also survey, how it has impacted different aspects of our society. In  below two sections, we explore data submitted by the cities, on these two categories.

# **Exploration of impacted Services/Assets**


1. **Global Severity Map of Impacted Services** :  This global map shows , of the CDP enlisted services and assets , how many are impacted in each city as per last reported status, in terms of percentage.Colorbar beside the map shows serverity scale. A brighter spot represents a city with higher severity.

2. **Regional average of city-based impact% over last 3 years** : This section shows in each CDP region, how the city based impact percentages have been, over the last 3 years. The numbers that you will see on hovering over the chart, will tell you that, ***European region and North American regions are showing a jump from 35% to 40% and 43% to 48% respectively between 2019 and 2020. Other regions are showing around 3% jump***. But in reality, it could be higher, as apart from Europe and North America(USA & Canada) , other regions have less particiaption on 2020. Guess, we all have reasons to be busy with Covid!



3. **Trendline of global average impact%** : Trendline shows how the impact percentage has grown over the years globally.

4. **Service-wise analysis : % of cities having the corresponding sector affected** : This section shows the majorly hit services/assets in the cities, across the world.
Water supply & sanitation are topping the chart, with 81% cities across the world reporting an impact. Which also explains why public health is the next one.
This in a way shows why CDP has chosen to do explicit survey on water security.

5. **Region-wise analysis on - "Water supply & sanitation" and "Public health"** : When we explore, how each region is doing on these two most affected services, we can see a ***strong pattern of obvious correlation in all the region, except, 'Middle East'***(highlighted bars). All the middle-eastern cities have reported to have water supply & sanitation impacted. However, impact on public heath services is least , when compared with other regions. 


**Severity value and other details of a city can be found by hovering over the city in the map**

In [None]:

affected_services_info.columns = ['Year Reported to CDP', 'CDP Region','Country',
       'Organization','Commercial',
       'Community & culture',
       'Education',
       'Emergency management',
       'Emergency services', 'Energy',
       'Environment',
       'Environment, biodiversity, forestry',
       'Food & agriculture',
       'Industrial',
       'Information & communications technology',
       'Land use planning',
       'Law & order', 'Public health',
       'Residential',
       'Society / community & culture',
       'Tourism', 'Transport',
       'Waste management',
       'Water supply & sanitation',
       'Total no. of impacted services',
       'percentage of CDP enlisted services impacted']

affected_services_info = affected_services_info.merge(location_values,on='Organization',how='left')



affected_services_info['text'] = affected_services_info[['Organization','Country','percentage of CDP enlisted services impacted']].\
apply(lambda x: 'Country : '+x[1]+', Organization : '+x[0]+', % of impact: '+str(x[2]),axis=1)

affected_services_last_status = affected_services_info.sort_values(by=['Year Reported to CDP','Organization']).drop_duplicates(subset=['Organization'],keep='last')

smb = go.Scattermapbox(name='severity map',
        lon = affected_services_last_status['long'],
        lat = affected_services_last_status['lat'],
        text = affected_services_info["percentage of CDP enlisted services impacted"],
        mode = 'markers',
#         locationmode='USA-states',
        hovertext=affected_services_last_status['text'],
        marker = dict(
#             sizemin = 5,
#             sizemode='area',
            size = 10,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            colorscale = 'Reds',
            cmin = 0,
            color = affected_services_last_status["percentage of CDP enlisted services impacted"],
            cmax = affected_services_last_status["percentage of CDP enlisted services impacted"].max(),
            colorbar_title="Impact% - Severity scale",
            colorbar_thickness=10,
            colorbar_title_side='right',
            colorbar_len=.3,
            colorbar_xanchor='left',
            colorbar_yanchor='bottom',
            colorbar_y=0.7
        ))

regional_mean = [go.Bar(x=affected_services_summary["CDP Region"][affected_services_summary['Year Reported to CDP']==year],
                        y=affected_services_summary["mean"][affected_services_summary['Year Reported to CDP']==year],
             showlegend=True,orientation='v',textfont_size=2,
             hovertext='Mean % of CDP enlisted services affected across region, on year - '+str(year),
#                         text=affected_services_summary["mean"].round(2)
                        
             ) for year in [2018,2019,2020]]

regional_scatter = go.Scatter(x=[2018,2019,2020], name='Trendline',
                               y=affected_services_info.groupby(['Year Reported to CDP'])["percentage of CDP enlisted services impacted"].mean(),
                               mode='lines+markers')    

services_temp = (affected_services_last_status[CDP_enlisted_sectors].sum(axis=0)/affected_services_last_status.shape[0]).sort_values(ascending=False)*100
services_temp_trace = go.Bar(x=services_temp.index.values, y=services_temp.values,
             showlegend=True,text=np.round(services_temp.values,2),textposition='auto'
             )

services_temp2 = affected_services_last_status.groupby(['CDP Region'])[CDP_enlisted_sectors].sum()/affected_services_last_status.groupby(['CDP Region'])[CDP_enlisted_sectors].count()
water_temp = services_temp2['Water supply & sanitation'].sort_values(ascending=False)*100
health_temp = services_temp2['Public health'][water_temp.index.values]*100
water_health_temp_trace = [go.Bar(x=water_temp.index.values, y=water_temp.values,
             showlegend=True,text=np.round(water_temp.values,2),textposition='auto',
            hovertext=['Water Supply & Sanitation : % of affected cities in the region : '+str(i) for i in np.round(water_temp.values,2)],orientation='v'
             ),
 go.Bar(x=health_temp.index.values, y=health_temp.values,text=np.round(health_temp.values,2),
             showlegend=True,textposition='auto',
        hovertext=['Public health : % of affected cities in the region : '+str(i) for i in np.round(health_temp.values,2)],orientation='v'
             )]


water_health_temp_trace[0].marker.color = ["rgb(255,247,188)"]+["rgb(165,15,21)"]*7
water_health_temp_trace[1].marker.color = ["rgb(255,247,188)"]+["rgb(35,139,69)"]*7   
water_health_temp_trace[0].width=.3
water_health_temp_trace[1].width=.3


fig1 = make_subplots(
    rows=4, cols=2,
    specs=[
           [{"type": "scattermapbox",'colspan':2},None],
           [{"type": "bar",'colspan':1},{"type": "scatter",'colspan':1}],
#            [{"type": "histogram2d"}],
           [{"type": "bar",'colspan':2},None],
           [{"type": "bar",'colspan':2},None] 
          ],
    subplot_titles=['<b>Global Severity map of impacted services</b>',
                    '<b>Regional average of city-based impact% over last 3 years','<b>  Trendline of global average impact%</b>',
                    '<b>Service-wise analysis : % of cities having the corresponding sector affected</b>',
                    '<b>Region-wise analysis on - "Water supply & sanitation" and "Public health"</b>'
                    ],
#     x_title='Exploration of affected services across the world'.upper(),
    vertical_spacing=0.14,horizontal_spacing=0.2,
    row_heights=[0.5,0.15,0.10,0.3],
        
#     column_widths=[1,1,1]
    
                    )

fig1.add_trace(smb,row=1,col=1)
fig1.update_layout(
        mapbox_style="carto-darkmatter",
#         title = dict(text='% of CDP enlisted services affected',x=0.5,y=.97),
        height=1600,
        width=900,
        )

fig1.update_layout(
    template="plotly_dark",
    margin=dict(r=25, t=25, b=140, l=100),
    showlegend=False,
    bargroupgap=0.25,
    bargap=0.25
)


fig1.append_trace(regional_mean[0],row=2,col=1)
fig1.append_trace(regional_mean[1],row=2,col=1)
fig1.append_trace(regional_mean[2],row=2,col=1)

fig1.append_trace(regional_scatter,row=2,col=2)
# regional_scatter.update(xticks=[2018,2019,2020])


fig1.append_trace(services_temp_trace,row=3,col=1)

fig1.append_trace(water_health_temp_trace[0],row=4,col=1)
fig1.append_trace(water_health_temp_trace[1],row=4,col=1)


# fig1.update_histogram(barmode='overlay')
# Reduce opacity to see both histograms
fig1.update_traces(opacity=0.65)
fig1.update_layout(font_color="rgb(199,233,180)",font_family='sans-serif')


# Exploration of Impact on Social Aspects

1. **Global Severity map of impacted social aspects**:  This global map shows , of the CDP enlisted social aspects , how many are impacted in each city as per last reported status, in terms of percentage.Colorbar beside the map shows serverity scale. A brighter spot on the map represents a city with higher severity.

2. **Regional average of city-based impact% over last 2 years** : This section shows in each CDP region, how the city based impact percentages have been over the last 2 years, as 2018 data doesn't include this. Again European region and North American region(USA & Canada) are showing around 5-7% jump in impact. For other regions, although the survey number are show around 3% jump, it is most likely because of lesser participation on 2020 in those regions.

3. **Trendline : global avg. impact% on social aspects** : Trendline shows approximately 3.5% jump in percentage of impacted social aspects globally. But as discussed in the previous section, actual numbers could be more coherent with that of European and North American regions. 

4. **Service-wise analysis : % of cities having the corresponding social aspect affected** : 'Increased risk to already vullnerable population' and 'increased demand for public services' and 'increased demand for health services' are top 3 most impacted social aspects, globally.

5. **Region-wise: "vulnerable population\'s risk" & "demand for public services"** : Regional exploration, shows with a figure of around 92%, no. of  cities in the both African and North American region are reporting risk to vulnerable section. European cities are showing the least number on this.
On the contrary, increased public service demand is least in African cities and highest in North American and 'Southeast and Oceania' cities.


**Severity value and other details of a city can be found by hovering over the city in the map**

In [None]:



social_impact_info.columns = ['Organization', 'CDP Region','Country',
       'Year Reported to CDP','Fluctuating socio-economic conditions',
        'Increased conflict and/or crime',
        'Increased demand for healthcare services',
        'Increased demand for public services',
        'Increased incidence and prevalence of disease and illness',
        'Increased resource demand',
        'Increased risk to already vulnerable populations',
        'Loss of tax base to support public services',
        'Loss of traditional jobs',
        'Migration from rural areas to cities',
        'Population displacement',
        'Total no. of impacted social aspects',
        'percentage of CDP enlisted social aspects impacted']

social_impact_info = social_impact_info.merge(location_values,on='Organization',how='left')



social_impact_info['text'] = social_impact_info[['Organization','Country','percentage of CDP enlisted social aspects impacted']].\
apply(lambda x: 'Country : '+x[1]+', Organization : '+x[0]+', % of impact: '+str(x[2]),axis=1)


social_impact_last_status = social_impact_info.sort_values(by=['Year Reported to CDP','Organization']).drop_duplicates(subset=['Organization'],keep='last')
social_impact_last_status[CDP_enlisted_major_impacts].sum(axis=0)/social_impact_last_status.shape[0]


smb = go.Scattermapbox(name='Severity map',
        lon = social_impact_last_status['long'],
        lat = social_impact_last_status['lat'],
        text = social_impact_info["percentage of CDP enlisted social aspects impacted"],
        mode = 'markers',
#         locationmode='USA-states',
        hovertext=social_impact_last_status['text'],
        marker = dict(
#             sizemin = 5,
#             sizemode='area',
            size = 10,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            colorscale = 'Reds',
            cmin = 0,
            color = social_impact_last_status["percentage of CDP enlisted social aspects impacted"],
            cmax = social_impact_last_status["percentage of CDP enlisted social aspects impacted"].max(),
            colorbar_title="Impact% - Severity scale",
            colorbar_thickness=10,
            colorbar_title_side='right',
            colorbar_len=.3,
            colorbar_xanchor='left',
            colorbar_yanchor='bottom',
            colorbar_y=0.7
        ))

regional_mean = [go.Bar(x=social_impact_summary["CDP Region"][social_impact_summary['Year Reported to CDP']==year],
                        y=social_impact_summary["mean"][social_impact_summary['Year Reported to CDP']==year],
             showlegend=True,orientation='v',textfont_size=2,
             hovertext='Mean % of CDP enlisted social aspects affected across region, on year - '+str(year),
#                         text=social_impact_summary["mean"].round(2)
                        
             ) for year in [2019,2020]]

regional_scatter = go.Scatter(name='Trendline',
                               x=[2019,2020], 
                               y=social_impact_info.groupby(['Year Reported to CDP'])["percentage of CDP enlisted social aspects impacted"].mean(),
                               mode='lines+markers')  

aspects_temp = (social_impact_last_status[CDP_enlisted_major_impacts].sum(axis=0)/social_impact_last_status.shape[0]).sort_values(ascending=False)*100
aspects_temp_trace = go.Bar(x=aspects_temp.index.values, y=aspects_temp.values,
             showlegend=True,text=np.round(aspects_temp.values,2),textposition='auto'
             )

services_temp2 = social_impact_last_status.groupby(['CDP Region'])[CDP_enlisted_major_impacts].sum()/social_impact_last_status.groupby(['CDP Region'])[CDP_enlisted_major_impacts].count()
vul_temp = services_temp2['Increased risk to already vulnerable populations'].sort_values(ascending=False)*100
public_temp = services_temp2['Increased demand for public services'][vul_temp.index.values]*100
vul_public_temp_trace = [go.Bar(x=vul_temp.index.values, y=vul_temp.values,
             showlegend=True,text=np.round(vul_temp.values,2),textposition='auto',
            hovertext=['Increased risk to already vulnerable populations: % of affected cities in the region : '+str(i) for i in np.round(water_temp.values,2)],orientation='v'
             ),
 go.Bar(x=public_temp.index.values, y=public_temp.values,text=np.round(public_temp.values,2),
             showlegend=True,textposition='auto',
        hovertext=['Increased demand for public services: % of affected cities in the region : '+str(i) for i in np.round(public_temp.values,2)],orientation='v'
             )]


vul_public_temp_trace[0].marker.color = ["rgb(255,247,188)"]+["rgb(165,15,21)"]*7
vul_public_temp_trace[1].marker.color = ["rgb(255,247,188)"]+["rgb(35,139,69)"]*7   


fig2 = make_subplots(
    rows=4, cols=2,
    specs=[
           [{"type": "scattermapbox",'colspan':2},None],
           [{"type": "bar",'colspan':1},{"type": "scatter",'colspan':1}],
#            [{"type": "histogram2d"}],
           [{"type": "bar",'colspan':2},None],
           [{"type": "bar",'colspan':2},None] 
          ],
    subplot_titles=['<b>Global Severity map of impacted social aspects</b>',
                    '<b>Regional avg. of city-based impact% over years','  <b>Trendline : global avg. impact% </b>',
                    '<b>Service-wise analysis : % of cities having the corresponding social aspect affected</b>',
                    '<b>Region-wise: "vulnerable population\'s risk" & "demand for public services"</b>'
                    ],
#     x_title='Exploration of social impacts across the world'.upper(),
    vertical_spacing=0.16,horizontal_spacing=0.2,
    row_heights=[0.5,0.15,0.1,0.25],
        
#     column_widths=[1,1,1]
    
                    )

fig2.add_trace(smb,row=1,col=1)
fig2.update_layout(
        mapbox_style="carto-darkmatter",
#         title = dict(text='% of CDP enlisted services affected',x=0.5,y=.97),
        height=1600,
        width=900,
        )

fig2.update_layout(
    template="plotly_dark",
    margin=dict(r=25, t=25, b=140, l=100),
    showlegend=False,
    bargroupgap=0.25,
    bargap=0.25
)


fig2.append_trace(regional_mean[0],row=2,col=1)
fig2.append_trace(regional_mean[1],row=2,col=1)
# fig2.append_trace(regional_mean[2],row=2,col=1)

fig2.append_trace(regional_scatter,row=2,col=2)
# regional_scatter.update(xticks=[2018,2019,2020])


fig2.append_trace(aspects_temp_trace,row=3,col=1)
fig2.append_trace(vul_public_temp_trace[0],row=4,col=1)
fig2.append_trace(vul_public_temp_trace[1],row=4,col=1)

# fig2.append_trace(sun1,row=4,col=2)


# fig2.update_histogram(barmode='overlay')
# Reduce opacity to see both histograms
fig2.update_traces(opacity=0.65)

fig2.update_layout(font_color="rgb(199,233,180)",font_family='sans-serif')
