## Do trends in C02 emissions impact natural disasters? Could governments save taxpayer dollars by reducing C02 emissions?

### Import Libraries

In [None]:
!pip install -q geopandas
!pip install -q jsonschema==3.2

import pandas as pd
import altair as alt
import numpy as np
import re
import geopandas as gpd
from IPython.display import display, HTML

import warnings
warnings.filterwarnings('ignore')

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 KB[0m [31m164.2 kB/s[0m eta [36m0:00:00[0m
[?25h

### Load the Data

In [None]:
event = pd.read_csv('https://raw.githubusercontent.com/Prateek-MSDS/Do-trends-in-C02-emissions-impact-natural-disasters-/main/events-US-1980-2022.csv')
emissions = pd.read_csv('https://raw.githubusercontent.com/Prateek-MSDS/Do-trends-in-C02-emissions-impact-natural-disasters-/main/CO2_Emissions_1960-2018.csv')
disaster = pd.read_csv('https://raw.githubusercontent.com/Prateek-MSDS/Do-trends-in-C02-emissions-impact-natural-disasters-/main/natural-disasters.csv')
disaster_cleaned = pd.read_excel('https://raw.githubusercontent.com/Prateek-MSDS/Do-trends-in-C02-emissions-impact-natural-disasters-/main/Disasters_renamed.xlsx')
data_world = gpd.read_file('https://raw.githubusercontent.com/Prateek-MSDS/Do-trends-in-C02-emissions-impact-natural-disasters-/main/countries.geojson')


### Data Cleaning

##### Emissions Data Frame

In [None]:
emissions_new = emissions.copy()

emissions.loc['median_year']= emissions.median()

emissions['Country Name'] = emissions['Country Name'].fillna(value='Global Median')

avg_year = emissions.mean(axis=0)

median_year = emissions.median(axis = 0)

global_emissions = pd.melt(emissions, ['Country Name'], var_name = 'Year', value_name = 'Emissions')

countries = emissions[emissions['Country Name'].isin(['Saudi Arabia','United Arab Emirates','Qatar','Chad','Malawi','United States','China','India','Global Median'])]

countries_emissions = pd.melt(countries, ['Country Name'], var_name = 'Year', value_name = 'Emissions')

filter_countries = countries_emissions[countries_emissions['Country Name'] != 'Global Median']

filter_median = countries_emissions[countries_emissions['Country Name'] == 'Global Median']

# Filtering the emissions for the last two decades.
col_list= ['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007','2008', 
           '2009', '2010', '2011', '2012', '2013', '2014',	'2015',	'2016',	'2017',	'2018']

df_emis_world = emissions.copy()

# Converting dataframe from Wide to long form:
df_ems_world_tidy = pd.melt(df_emis_world, id_vars=['Country Name'], var_name='Year', value_name='CO2')

# Calculating the median CO2 emission vales per year:
df_median_emis = df_ems_world_tidy.groupby('Year', as_index=False).median()

# Converting 'Year' to numeric
df_median_emis['Year'] = df_median_emis['Year'].astype(int)

# Considering the years from 1980:
df_median_emis = df_median_emis[df_median_emis['Year'] >= 1980]

#Converting 'Year' to string 
#df_median_emis['Year'] = df_median_emis['Year'].astype(str)


##### Event Data Frame

In [None]:
event['Begin Date'] = (event['Begin Date']/10000).astype(int)

df_event_us = event

# Dropping unwanted columns:
df_event_us = df_event_us.drop(['End Date', 'Deaths'], axis=1)

# Renaming the column name from 'Total CPI-Adjusted Cost (Millions of Dollars)' to 'Cost_US'
df_event_us.rename(columns = {'Total CPI-Adjusted Cost (Millions of Dollars)': 'Cost_US'}, inplace = True)

# Dropping the rows that containing values otherthan digits in 'Cost_US' column 
df_event_us = df_event_us[~df_event_us['Cost_US'].str.contains("[a-zA-Z]").fillna(False)]

# Considering the years only till 2018
df_event_us = df_event_us[df_event_us['Begin Date'] < 2019] 

# Converting 'Total CPI-Adjusted Cost (Millions of Dollars)' from Object to Float type 
df_event_us['Cost_US'] = df_event_us['Cost_US'].astype(float)

# Converting 'Begin Date' to string 
#df_event_us['Begin Date'] = df_event_us['Begin Date'].astype(str)

# Total cost per year in USA:
df_sum_cost_us = df_event_us.groupby('Begin Date', as_index=False).sum()

# Renaming the column name:
df_sum_cost_us.rename(columns = {'Begin Date': 'Year'}, inplace = True)

df_merge_world_us = pd.merge(df_median_emis, df_sum_cost_us, how='left', on = 'Year')

df_merge_world_us['Cost_per_C02'] = df_merge_world_us['Cost_US']/df_merge_world_us['CO2']

#### Data World Dataframe

In [None]:
# Changing the column name and country names of 'data_world' table in accordance with the merging tables:
data_world.rename(columns = {'ADMIN': 'Country Name'}, inplace = True)

data_world['Country Name'] = data_world['Country Name'].replace(
    ['United States of America', 'Russia', 'Yemen', 'Democratic Republic of the Congo', 'Republic of Congo' 'United Republic of Tanzania', 'Egypt', 'Venezuela',
     'United Republic of Tanzania'], 
    ['United States','Russian Federation', 'Yemen, Rep.', 'Congo', 'Tanzania', 'Egypt, Arab Rep.', 'Venezuela, RB', 'Tanzania'])


# Merging the CO2 emissions table and World data table:
df_world = pd.merge(emissions, data_world, how='left', on = 'Country Name')
df_world['mean_emission'] = df_world[col_list].mean(axis=1)

df_filter = df_world.filter(['Country Name', 'mean_emission'])
df_filter['Country Name'] = df_filter['Country Name'].replace(['Congo, Dem. Rep.' ], ['Congo'])
df_world_new = pd.merge(data_world, df_filter, how='left', on = 'Country Name')
dff = df_world_new.sort_values('mean_emission',ascending = False).head(20) 


#### Disaster Cleaned Dataframe

In [None]:
df_disaster = disaster_cleaned
df_dis= df_disaster[df_disaster['year'] >= 1998]

df_sum = df_dis.groupby('Country', as_index=False).sum()
df_filter_dis = df_sum.filter(['Country', 'people_affected_disasters', 'affected_disasters_million'])

df_filter_dis.rename(columns = {'Country': 'Country Name'}, inplace = True)

data_world['Country Name'] = data_world['Country Name'].replace(['Russian Federation', 'Egypt, Arab Rep.', 'Venezuela, RB', 'Yemen, Rep.'], 
                                                                ['Russia', 'Egypt', 'Venezuela', 'Yemen'])

df_dis_top = df_filter_dis.sort_values('affected_disasters_million',ascending = False).dropna(
    subset=['affected_disasters_million']).head(20)

df_filter_dis.drop(df_filter_dis[df_filter_dis['Country Name'] == 'World'].index, inplace = True)

# Merging the Disasters table and World data (with geographic locations of countries) table:
df_world_disasters = pd.merge(data_world, df_filter_dis, how='left', on = 'Country Name')



##### Disaster Data Frame

In [None]:
# Filter out only the columns required
disaster_new = disaster[disaster.columns[disaster.columns.str.contains('^Number of deaths from|^Number of people affected|^Year|^Entity')]]

# remove coulms with GDP and mass
disaster_new = disaster_new[disaster_new.columns.drop(list(disaster_new.filter(regex='GDP|mass')))]

# filter out additional colunms
disaster_new = disaster_new.iloc[:, np.r_[0:2,3:5,
                           6:8,9:11,
                           12:14,15:17,
                           18:20,21:23,
                           24:26,27:29,
                           30:32,33:35]]


# Rename Extra words from the column names
disaster_new.columns = disaster_new.columns.str.replace(r'.*of', '', regex=True)
disaster_new.columns = disaster_new.columns.str.strip()

# Tranpose the Data
disaster_new = pd.melt(disaster_new, id_vars =['Entity','Year'], var_name="Col", value_name="Count")

# Create a new column with flags Deaths and People Affected
disaster_new['the_Impact'] = disaster_new['Col'].str.split().str[0]
disaster_new['the_Impact'] = np.where(disaster_new['the_Impact']=='deaths', 'Deaths', 'Affected')

# Extract the Disaster name from the columns
disaster_new['Disasters'] = disaster_new['Col'].str.extract(r'(?=from|by)\s*(.*)')
disaster_new['Disasters'] = disaster_new['Disasters'].str.split(" ", 1).str[1]

# Replace the value for Total count of Disaster
disaster_new['Disasters'] = disaster_new['Disasters'].replace('disasters', 'all')
disaster_new['Disasters'] = disaster_new['Disasters'].replace('extreme temperatures', 'Extreme temp')

# Capitalize first letter of the Disaster
disaster_new['Disasters'] = disaster_new['Disasters'].str.capitalize()

# Fill NAN values
disaster_new['Count'] = disaster_new['Count'].fillna(0)

# Re-order the Data Frame
disaster_new = disaster_new[['Entity', 'Year','the_Impact','Disasters','Count']]

# add a columns to bin year by 10 years
disaster_new['Year_Bins'] = np.where(disaster_new['Year']<=1910, 1910,
                            np.where((disaster_new['Year']>1910) & (disaster_new['Year']<=1920), 1920,
                            np.where((disaster_new['Year']>1920) & (disaster_new['Year']<=1930), 1930,
                            np.where((disaster_new['Year']>1930) & (disaster_new['Year']<=1940), 1940,
                            np.where((disaster_new['Year']>1940) & (disaster_new['Year']<=1950), 1950,
                            np.where((disaster_new['Year']>1950) & (disaster_new['Year']<=1960), 1960,
                            np.where((disaster_new['Year']>1960) & (disaster_new['Year']<=1970), 1970,
                            np.where((disaster_new['Year']>1970) & (disaster_new['Year']<=1980), 1980,
                            np.where((disaster_new['Year']>1980) & (disaster_new['Year']<=1990), 1990,
                            np.where((disaster_new['Year']>1990) & (disaster_new['Year']<=2000), 2000,
                            np.where((disaster_new['Year']>2000) & (disaster_new['Year']<=2010), 2010, 2020)))))))))))




# Filter records for the World
world_disasters = disaster_new.loc[disaster_new['Entity']=='World', ]
# Filter records for the USA
usa_disasters = disaster_new.loc[disaster_new['Entity']=='United States', ]



### Most affected countries in the last two decades

In [None]:
disasters_r = alt.Chart(df_world_disasters).mark_geoshape(stroke='#aaa', strokeWidth=0.25
).project(
    type='mercator'
).encode(
    alt.Color('affected_disasters_million:Q' 
              , scale=alt.Scale(scheme='yelloworangered')
             ,legend=alt.Legend(title="People Affected Per Million"
                                , orient="none"
                                ,titleAnchor='middle'
                                ,direction='horizontal'
                                ,tickCount=7
                                ,legendX=320, legendY=460
                                )),       
    tooltip = ['Country Name:N', alt.Tooltip('affected_disasters_million:Q', format= '.0f', 
                                             title = 'People affected (per million)') ]
).properties(width=850,height=500, title = 'Most Affected Countries in the Last Two Decades'
).configure_title(fontSize=18, anchor='middle', offset = 10
                  ).configure_view(stroke=None).configure_view(stroke=None).configure_legend(
    gradientLength=300,
    gradientThickness=10,
    gradientStrokeColor = 'black',
    gradientStrokeWidth= 1
) 

disasters_r  

### Countries with more number of people affected in the last two decades

In [None]:
df_total_top = df_filter_dis.sort_values('people_affected_disasters'
                                         ,ascending = False).dropna(subset=['people_affected_disasters']).head(5)


df_total_top['Count_in_millions'] = df_total_top['people_affected_disasters']/1000000


alt.Chart(df_total_top).mark_bar(color = '#e57a44', opacity = .8).encode(
    alt.X('Country Name:N',sort ='-y', axis=alt.Axis(labelAngle=0
                                                     #, minExtent=80
                                                     , labelFontWeight = 700
                                                     , labelFontSize = 12
                                                     , title = None
                                                     , tickSize=0
                                                     ,labelPadding=7)),                                   
    alt.Y('Count_in_millions:Q', axis=alt.Axis(title='Number of People Affected (in Millions)'
                                               , titleFontSize = 15
                                               , minExtent=38
                                               ,labelPadding=7
                                               , labelFontWeight = 700
                                               , titleFontWeight = 400
                                               , tickSize=0
                                               ,format= '.0f'), scale=alt.Scale(type='sqrt'
                                                                              , domain=[0, 2400]))              
).properties(width = 570
             ,height = 350
             ,title = 'Top 5 Countries with Most People Affected in the last Two Decades'
).configure_title(fontSize=17
                  , font='Times New Roman'
                  , anchor='middle'
                  , offset = 12)


                                  


### Global trend of CO2 emissions

In [None]:
countries = alt.Chart(filter_countries).mark_line(strokeWidth=2.5).encode(
    alt.X('Year:T'),
    alt.Y('Emissions:Q', title = 'CO2 Emissions (metric tons per capita)', scale = alt.Scale(type = 'sqrt')),
    alt.Color('Country Name:N', title = 'Country Name')
).properties(
width=650,
height=400
)

median = alt.Chart(filter_median).mark_line(strokeWidth=3
                                            ,strokeDash=[8,4],
                                           color = "#F60030").encode(
    alt.X('Year:T', title= ''),
    alt.Y('Emissions:Q', title = 'CO2 Emissions (metric tons per capita)'
          , scale = alt.Scale(type = 'sqrt'))
).properties(title = 'CO2 Emissions per Year',
width=710,
height=400
)

text = alt.Chart({'values':[{}]}).mark_text(text='Global Median', align="left",color = "#F60030").encode(
 x=alt.value(520), y=alt.value(325))

alt.layer(countries + median+text).configure_axis(
     labelAngle=0,
     labelFontSize=11,
     titleFontSize=13).configure_title(fontSize=15
                                              ,anchor='middle'
                                              ,offset = 10).configure_legend(titleFontSize=12,labelFontSize=11
                                                                             , symbolSize=200,
                                                                            symbolStrokeWidth=4) 

### Count of Disaster in the US

In [None]:
# Convert to datetime
#event['Begin Date'] = pd.to_datetime(event['Begin Date'], format='%Y')

# Extract the disasters list
disaster_list = event['Disaster'].unique() # get unique field values
disaster_list = list(filter(lambda d: d is not None, disaster_list)) # filter out None values
disaster_list.sort() # sort alphabetically


selectType = alt.selection_single(
    name='Select', 
    fields=['Disaster'],
    init={'Disaster': 'Severe Storm'}, 
    bind=alt.binding_select(options=disaster_list) 
)

total_count = alt.Chart(event).mark_bar(color = '#e57a44', opacity = .6).transform_aggregate(
    groupby = ['Begin Date'],
    Count = 'count(Disaster)'
).encode(
    alt.X('Begin Date:N' , axis=alt.Axis(title = ''
                          , labelPadding=5
                          )),
    alt.Y('Count:Q'
          , axis=alt.Axis(title = ''
                          , labelPadding=5
                          , tickSize=0)
          , stack = None)
)

disaster_count = alt.Chart(event).mark_bar(color = '#e57a44').transform_aggregate(
    groupby = ['Disaster','Begin Date'],
    Count = 'count(Disaster)'
).add_selection(selectType).encode(
    alt.X('Begin Date:N', title = ''),
    alt.Y('Count:Q', title = '', stack = None),
    opacity = alt.condition(selectType, alt.value(0.8), alt.value(0))
)

(disaster_count + total_count).properties(title = "Count of Disasters in the US per Year"
                                          ,width=735, height=280).configure_axisBottom(
    labelAngle = 60).configure_view(strokeWidth=0).configure_title(fontSize=15,
                                                                   anchor='middle',
                                                                   offset = 10)

### Number of Deaths/Affected People per Year by Catastrophes - Static

    The data is grouped by 10 years starting from the year 1900 through 2020 and the y-axis is set to show count of people impacted.

In [None]:
# force altair to display all the rows
alt.data_transformers.disable_max_rows()


# World Bar chart
world = alt.Chart(world_disasters).mark_bar(opacity = 0.8
                                            , fill = "#e57a44").transform_filter(
    'datum.the_Impact == "Deaths"'
).transform_filter(
    'datum.Disasters == "All"'
).transform_calculate(
    deaths='datum.Count' 
).encode(
    alt.X('Year_Bins:O', title='',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('sum(deaths):Q', title=' ',axis=alt.Axis(ticks=False, domain=False))
).properties(width=735, 
             height=280, 
             title='Deaths by Catastrophes Around the World in every Decade')




# USA Bar plot
usa = alt.Chart(usa_disasters).mark_bar(opacity = 0.8
                                        , fill = "#e57a44").transform_filter(
    'datum.the_Impact == "Deaths"'
).transform_filter(
    'datum.Disasters == "All"'
).transform_calculate(
    deaths='datum.Count' 
).encode(
    alt.X('Year_Bins:O', title='',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('sum(deaths):Q', title=' ',axis=alt.Axis(ticks=False, domain=False))
).properties(width=735, 
             height=280, 
             title='Deaths by Catastrophes in the USA in every Decade')


# Concatenate the view
(world & usa).configure_axis(
     labelAngle=0,
     labelFontSize=11,
     titleFontSize=13).configure_title(fontSize=15).configure_legend(
 titleFontSize=12,
 labelFontSize=11).configure_view(strokeWidth=0)

In [None]:
# force altair to display all the rows
alt.data_transformers.disable_max_rows()


# World Bar chart
world = alt.Chart(world_disasters).mark_bar(opacity = 0.8
                                            , fill = "#e57a44").transform_filter(
    'datum.the_Impact == "Affected"'
).transform_filter(
    'datum.Year >= 1980'
).transform_filter(
    'datum.Disasters == "All"'
).transform_calculate(
    deaths='datum.Count/1000000' 
).encode(
    alt.X('Year_Bins:O', title='',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('sum(deaths):Q', title=' ',axis=alt.Axis(ticks=False, domain=False))
).properties(width=635, 
             height=285, 
             title='People Affected (in Millions) by Catastrophes Around the World in every Decade')




# USA Bar plot
usa = alt.Chart(usa_disasters).mark_bar(opacity = 0.8
                                        , fill = "#e57a44").transform_filter(
    'datum.the_Impact == "Affected"'
).transform_filter(
    'datum.Year >= 1980'
).transform_filter(
    'datum.Disasters == "All"'
).transform_calculate(
    deaths='datum.Count/1000000' 
).encode(
    alt.X('Year_Bins:O', title='',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('sum(deaths):Q', title=' ',axis=alt.Axis(ticks=False, domain=False))
).properties(width=635, 
             height=280, 
             title='People Affected (in Millions) by Catastrophes in the USA in every Decade')


# Concatenate the view
(world & usa).configure_axis(
     labelAngle=0,
     labelFontSize=11,
     titleFontSize=13).configure_title(fontSize=15).configure_legend(
 titleFontSize=12,
 labelFontSize=11).configure_view(strokeWidth=1)

### Percentage of People Impacted by Catastrophes - Interactive

In [None]:
# force altair to display all the rows
alt.data_transformers.disable_max_rows()

# CSS manipulation to set the interactive bindings posiiton
display(HTML("""
<style>
form.vega-bindings {
  position: absolute;
  left: 820px;
  top: 50px;
}
</style>
"""))

#### World ########

# Extract Whether affected or death for the world
deaths_affected_list = world_disasters['the_Impact'].unique().tolist()

# Interactive Selections
selection = alt.selection_single(
    name='Select',
    fields=['the_Impact'],
    init={'the_Impact': 'Deaths'},
    bind={'the_Impact': alt.binding_radio(options=deaths_affected_list)}
)


# World Plot
world = alt.Chart(world_disasters).mark_bar(opacity = 0.8
                                    , fill = "#e57a44"
                                    ,cornerRadiusTopLeft=3
                                    ,cornerRadiusTopRight=3).transform_filter(
    alt.FieldOneOfPredicate(field='Disasters'
                            , oneOf=["Earthquakes"
                                     , "Floods"
                                     , "Volcanic activity"
                                     ,"Drought"
                                     ,"Landslides"
                                     ,"Storms"
                                     ,"Wildfires"
                                     ,"Extreme temp"])).add_selection(
    selection
).transform_filter(
    selection
).transform_aggregate(diss_count = 'sum(Count)'
                      ,groupby=['Disasters']).transform_joinaggregate(
    Total='sum(diss_count)',
).transform_calculate(
    PercentOfTotal="(datum.diss_count / datum.Total)"
).encode(
    alt.X('Disasters:N', title='', sort='-y',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('PercentOfTotal:Q', title=' ',axis=alt.Axis(ticks=False, domain=False,format='%'))
).properties(width=750, 
             height=280, 
             title='Percentage of People Impacted by Catastrophes Around the World')


######## USA###########

# Extract Whether affected or death for the usa
deaths_affected_list_usa = usa_disasters['the_Impact'].unique().tolist()

# Interactive Selections for USA
selection_usa = alt.selection_single(
    name='Select',
    fields=['the_Impact'],
    init={'the_Impact': 'Deaths'},
    bind={'the_Impact': alt.binding_radio(options=deaths_affected_list_usa)}
)


usa = alt.Chart(usa_disasters).mark_bar(opacity = 0.8
                                    , fill = "#e57a44"
                                    ,cornerRadiusTopLeft=3
                                    ,cornerRadiusTopRight=3).transform_filter(
    alt.FieldOneOfPredicate(field='Disasters'
                            , oneOf=["Earthquakes"
                                     , "Floods"
                                     , "Volcanic activity"
                                     ,"Drought"
                                     ,"Landslides"
                                     ,"Storms"
                                     ,"Wildfires"
                                     ,"Extreme temp"])).add_selection(
    selection_usa
).transform_filter(
    selection_usa
).transform_aggregate(diss_count = 'sum(Count)'
                      ,groupby=['Disasters']).transform_joinaggregate(
    Total='sum(diss_count)',
).transform_calculate(
    PercentOfTotal="(datum.diss_count / datum.Total)"
).encode(
    alt.X('Disasters:N', title='', sort='-y',axis=alt.Axis(ticks=False, domain=False)),
    alt.Y('PercentOfTotal:Q', title=' ',axis=alt.Axis(ticks=False, domain=False,format='%'))
).properties(width=750, 
             height=280, 
             title='Percentage of People Impacted by Catastrophes in the USA')

text = alt.Chart({'values':[{}]}).mark_text(text='Percentages are based off the total count from the year 1980 to 2022'
                                            , align="left",color = "gray").encode(
 x=alt.value(250), y=alt.value(315))

alt.vconcat(world , usa+text, spacing = 30).configure_axis(
     labelAngle=0,
     labelFontSize=11,
     titleFontSize=13).configure_title(fontSize=15).configure_legend(
 titleFontSize=12,
 labelFontSize=11).configure_view(strokeWidth=0)


### Cost of disasters for USA and Global CO2 emissions

In [None]:
# Line plot to show the Total cost of disasters per year for USA

line_cost = alt.Chart(df_sum_cost_us).mark_line(strokeWidth=3, interpolate='monotone').transform_fold(
    fold=['Cost_US'], 
    as_=['variable', 'value']
).encode(
    alt.X('Year:O', axis=alt.Axis(title =''
                                  , values= [1980,2000,2018])),                                   
    alt.Y('value:Q', axis=alt.Axis(title='Cost of disasters for USA (Million Dollars)'
                                   , titleFontSize = 14
                                   , minExtent=60
                                   , titleFontWeight = 400,
                                   labelColor = '#e57a44'
                                   , labelFontWeight = 700
                                   ,labelFontSize = 12
                                  , tickSize=0
                                  ,labelPadding=5), scale=alt.Scale(type='sqrt')),              
    alt.Color('variable:N',title = '', legend= None, scale = alt.Scale(domain=['Cost_US'],range=['#e57a44']))
).properties(width = 800,height = 350)

            


# Line plot to show the global CO2 emissions (metric tons/person) per year

line_ems_world = alt.Chart(df_median_emis).mark_line(strokeWidth=3, interpolate='monotone').transform_fold(
    fold=['CO2'], 
    as_=['variable', 'value']
).encode(
    alt.X('Year:O', axis=alt.Axis(title =''
                                  , values= [1980,2000,2018])),                                   
    alt.Y('value:Q',axis=alt.Axis(title='CO2 emissions (metric tons/person)'
                                  , titleFontSize = 14, titleFontWeight = 400
                                  , minExtent=30
                                  , labelColor = '#1f78b4'
                                  , labelFontWeight = 700
                                  , labelFontSize = 12
                                  ,labelAngle= -90
                                  , labelPadding=5
                                 , tickSize=0)),
    alt.Color('variable:N', title = '', legend = None, scale = alt.Scale(domain=['CO2'],range=['#1f78b4']))   
).properties(width = 800,height = 350)



# Dashed line for total costs:

point_cost = alt.Chart(df_sum_cost_us).mark_point(opacity = 0).encode(
    alt.X('Year:O', axis=alt.Axis(title ='', labelAngle=0
                                  , values= [1980,2000,2018]
                                 , tickSize=0
                                 , labelPadding=10
                                  , labelFontWeight = 700
                                  , labelFontSize = 12)),                                   
    alt.Y('Cost_US:Q', axis=None, scale=alt.Scale(type='sqrt'))
).properties(width = 800,height = 350)

                                  

reg_cost = point_cost + point_cost.transform_regression('Year'
                                                        ,'Cost_US').mark_line(color="#e57a44"
                                                                              , strokeDash=[6,6])


# Combined line plots for 'CO2 EMISSIONS (METRIC TONS/PERSON)' globally and 'TOTAL COSTS OF DISASTERS' for USA

alt.layer(reg_cost, line_ems_world, line_cost).resolve_scale(y='independent'
                                                             , color = 'independent',
).properties(title = 'Yearly Global CO2 Emissions and Total Cost of Disaster for the USA'
).configure_title(fontSize=18
                  , font='Times New Roman'
                  , anchor='middle'
                  , offset = 20)


### Scatterplot with a regression line for visualizing CO2 emissions total cost of disaster

In [None]:
scatter = alt.Chart(df_merge_world_us).mark_point(color = '#e57a44').encode(
    alt.X('Year:N', axis=alt.Axis(title ='',labelAngle=0, labelPadding=8,
                                  titleFontSize = 14, titleFontWeight = 400, minExtent=30, 
                                  labelFontWeight = 600, labelFontSize = 10
                                  , values=[1980, 2000, 2018], tickSize=0)),                                   
    alt.Y('Cost_per_C02:Q', axis=alt.Axis(title='Cost of disasters per CO2 Emissions'
                                          , titleFontSize = 14
                                          , titleFontWeight = 400
                                          , minExtent=30
                                          ,labelFontWeight = 700
                                          , labelFontSize = 10
                                         , tickSize=0
                                         ,labelPadding=8), scale=alt.Scale(type='sqrt'))
).properties(width = 800,height = 350)


reg_scatter = scatter + scatter.transform_regression('Year','Cost_per_C02').mark_line(strokeWidth=2
                                                                                      , color = '#e57a44'
).properties(title = 'Yearly Economic Costs per CO2 Emission for the USA')

text = alt.Chart({'values':[{}]}).mark_text(text='Cost of Disasters per CO2 emissions is calulated as Cost (in Millions US Dollars) per CO2 emissions (Metric Tons per Capita)'
                                            , align="left",color = "gray").encode(
 x=alt.value(100), y=alt.value(390))

alt.layer(reg_scatter, text).configure_title(fontSize=18, font='Times New Roman'
                            , anchor='middle'
                            , offset = 12)