## Libraries

In [1]:
import sys
import os
import datetime as dt
import numpy as np
import pandas as pd
import geopandas as gpd
import altair as alt
from vega_datasets import data

In [2]:
if not 'mainDir' in globals():
    mainDir = os.path.dirname(os.getcwd()) # Get parent dir: os.path.dirname()
print(mainDir)

/Users/lassescheele/Documents/Projects/altair-climate-change


In [3]:
dir_input = os.path.join(mainDir,'data','raw')
dir_output = os.path.join(mainDir,'data','processed')

## Settings

In [4]:
first_relevant_year = 1850

start_reference_period = 1961
end_reference_period = 1990

In [5]:
list_relevant_iso = [
    'CA',
    'US',
    'NI',
    'BR',
    'AR',
    'GL',
    'IS',
    'DE',
    'ES',
    'EG',
    'MR',
    'MG',
    'ZA',
    'CD',
    'SA',
    'RU',
    'CN',
    'JP',
    'IN',
    'ID',
    'AU',
    'NZ'
]

## Read data

### Read shapes

In [6]:
gdf = gpd.read_file(os.path.join(dir_input,"countries.geojson"))
gdf.shape

(255, 4)

In [7]:
gdf.loc[gdf['ADMIN'].str.lower().str.contains('congo')]

Unnamed: 0,ADMIN,ISO_A3,ISO_A2,geometry
47,Democratic Republic of the Congo,COD,CD,"MULTIPOLYGON (((12.99459 -5.86824, 12.96930 -5..."
48,Republic of Congo,COG,CG,"MULTIPOLYGON (((17.62728 3.62632, 17.70913 3.6..."


In [8]:
gdf = gdf.rename(columns={'ADMIN':'Country'})
if len(list_relevant_iso) > 0:
    gdf = gdf.loc[gdf['ISO_A2'].isin(list_relevant_iso)]
gdf.shape

(22, 4)

In [9]:
list_relevant_countries = gdf['Country'].unique().tolist()

### Read climate data

In [10]:
df = pd.read_csv(os.path.join(dir_input,"GlobalLandTemperaturesByCountry.csv"))
df.shape

(577462, 4)

In [11]:
df = df[['dt','AverageTemperature','Country']]

In [12]:
df['dt'] = pd.to_datetime(df['dt'], format="%Y-%m-%d")
df = df.loc[(df['dt'].dt.year>=first_relevant_year)]
df['Year'] = df['dt'].dt.year
df['Month'] = df['dt'].dt.month
df.shape
df.head(3)

Unnamed: 0,dt,AverageTemperature,Country,Year,Month
1274,1850-01-01,-9.083,Åland,1850,1
1275,1850-02-01,-2.309,Åland,1850,2
1276,1850-03-01,-4.801,Åland,1850,3


In [13]:
country_names_dict = {
    'Antigua And Barbuda':'Antigua and Barbuda',
    'Bahamas':'The Bahamas',
    'Bosnia And Herzegovina':'Bosnia and Herzegovina',
    'Burma':'Myanmar',
    'Congo':'Republic of Congo',
    'Congo (Democratic Republic Of The)':'Democratic Republic of the Congo',
    "Côte D'Ivoire":'Ivory Coast',
    'Falkland Islands (Islas Malvinas)':'Falkland Islands',
    'Federated States Of Micronesia':'Federated States of Micronesia',
    'French Southern And Antarctic Lands':'French Southern and Antarctic Lands',
    'Heard Island And Mcdonald Islands':'Heard Island and McDonald Islands',
    'Hong Kong':'Hong Kong S.A.R.',
    'Isle Of Man':'Isle of Man',
    'Macau':'Macao S.A.R',
    'Palestina':'Palestine',
    'Saint Barthélemy':'Saint Barthelemy',
    'Saint Kitts And Nevis':'Saint Kitts and Nevis',
    'Saint Pierre And Miquelon':'Saint Pierre and Miquelon',
    'Saint Vincent And The Grenadines':'Saint Vincent and the Grenadines',
    'Sao Tome And Principe':'Sao Tome and Principe',
    'Serbia':'Republic of Serbia',
    'South Georgia And The South Sandwich Isla':'South Georgia and South Sandwich Islands',
    'Tanzania':'United Republic of Tanzania',
    'Timor Leste':'East Timor',
    'Trinidad And Tobago':'Trinidad and Tobago',
    'Turks And Caicas Islands':'Turks and Caicos Islands',
    'United States':'United States of America',
    'Virgin Islands':'United States Virgin Islands',
    'Åland':'Aland'
}

In [14]:
for key in country_names_dict.keys():
    #print(key, country_names_dict[key])
    df.loc[df['Country']==key,'Country'] = country_names_dict[key]

In [15]:
if len(list_relevant_countries) > 0:
    df = df.loc[df['Country'].isin(list_relevant_countries)]
df.shape

(43016, 5)

## Prepare data

### Heatmap data

In [16]:
df_reference = df.loc[(df['Year']>=start_reference_period) & (df['Year']<=end_reference_period)]
df_reference = pd.DataFrame(df_reference.groupby(['Country','Month'])['AverageTemperature'].mean())
#df_reference

In [17]:
country_counter = 1
for country in list_relevant_countries:
    if country_counter % 10 == 0:
        print(f"{country_counter} of {len(list_relevant_countries)} countries")
    for month in range(1,13):
        df.loc[
            (df['Country']==country) &
            (df['Month']==month),
            'AverageTemperature_ref'] = df.loc[
            (df['Country']==country) &
            (df['Month']==month),
            'AverageTemperature'] - df_reference.loc[(country,month),'AverageTemperature']
    country_counter += 1

10 of 22 countries
20 of 22 countries


### Map data (centroids)

In [18]:
df_stats_countries = pd.DataFrame(df.loc[df['Year']>end_reference_period].groupby('Country')['AverageTemperature_ref'].mean()).reset_index()

In [19]:
gdf = pd.merge(
    gdf,
    df_stats_countries,
    on='Country', how='left'
).rename(columns={'AverageTemperature_ref':'AverageTemperature_sinceRef'})
gdf['centroid'] = gdf.centroid
gdf['longitude'] = gdf.centroid.x
gdf['latitude'] = gdf.centroid.y
#gdf

In [20]:
gdf.loc[gdf['ISO_A2']=='US',['longitude','latitude']] = [-98.5,39.5]
gdf.loc[gdf['ISO_A2']=='CA',['longitude','latitude']] = [-96.5,54.5]
#gdf

## Create dashboard (light mode)

In [21]:
first_analysis_year = 1850
last_analysis_year = 2013

In [22]:
title_text = ["Climate change is a global phenomenon"]
credentials_text = ["Source: Berkeley Earth | Visualization: Lasse Scheele (@LasSchee)"]

In [23]:
#alt.data_transformers.enable('data_server') # data will be served in the background rather than embedded in the chart specification
#alt.data_transformers.enable('json')        # data will be serialized to disk and referenced by URL
alt.data_transformers.enable('default')    # data will be fully embedded in the notebook

DataTransformerRegistry.enable('default')

In [24]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [25]:
#df['AverageTemperature_ref'].quantile([0.05,.25,.5,.75,.95]).values.tolist()

In [26]:
mode = 'light_mode'

# General colors
color_background = 'white' # '#FFFCF2'
color_titles = 'black' # '#4f4f4f'
color_labels = '#4f4f4f' # '#6e6e6e'

# Colors background map
fill_sphere = '#D5F5FF' # '#7FD1D8'
stroke_color_graticule = 'white'
stroke_width_graticule = 1.0
fill_countries = '#DED9D8' # '#DED5D2' # 'lightgrey'
stroke_color_countries = '#978984' # 'grey'

# Colors points
stroke_color_points = color_titles
color_points_selected = '#FF00E8'
color_points_not_selected = '#bbb1ae' # stroke_color_countries # '#2ECC71'
size_points_selected = 400
size_points_not_selected = 200
stroke_width_points_selected = 4
stroke_width_points_not_selected = 3

# Colors heatmap
domain_heatmap = [-5,-.5,0,.5,5]
range_heatmap = ['#0571b0','#92c5de','#f7f7f7','#f4a582','#ca0020']
color_background_heatmap = '#e0e0e0'

# Colors trend
color_trend_selected = color_points_selected # '#FF5733'
color_trends_not_selected = color_points_not_selected # color_labels
stroke_width_trends_selected = 2.5
stroke_width_trends_not_selected = 1.25
opacity_trends_not_selected = 0.2
color_zero_line = 'grey' # '#008FFF'
stroke_width_zero_line = 1.2

In [27]:
sphere = alt.sphere()
graticule = alt.graticule()

source_map = alt.topo_feature(data.world_110m.url, 'countries')
source_points = gdf[['Country','longitude','latitude']]
source_heatmap = df.loc[
    df['Year'].isin(list(range(first_analysis_year,last_analysis_year+1))),
    ['Year','Month','Country','AverageTemperature_ref']
]#.melt('index', var_name='category', value_name='value')
source_trend = source_heatmap.groupby(['Country','Year'])['AverageTemperature_ref'].mean().reset_index()

width_map = 900
height_map = 450
width_plots = width_map*1.25
height_plots = width_plots/((last_analysis_year-first_analysis_year)/12)

default_country = "Germany"
selector = alt.selection(
    type="single", fields=['Country'], init={"Country": default_country}
)

color_points = alt.condition(selector,alt.value(color_points_selected),alt.value(color_points_not_selected))
size_points = alt.condition(selector,alt.value(size_points_selected),alt.value(size_points_not_selected))
strokeWidth_points = alt.condition(selector,alt.value(stroke_width_points_selected),alt.value(stroke_width_points_not_selected))

domain_trends = list_relevant_countries
range_trends = [color_trend_selected for x in list_relevant_countries]
color_trends = alt.condition(
    selector,
    alt.Color('Country:N', scale=alt.Scale(domain=domain_trends, range=range_trends), legend=None),
    alt.value(color_trends_not_selected))
strokeWidth_trends = alt.condition(selector,alt.value(stroke_width_trends_selected),alt.value(stroke_width_trends_not_selected))
opacity_trends = alt.condition(selector,alt.value(1),alt.value(opacity_trends_not_selected))

source_title = pd.DataFrame({'text': title_text})
title = alt.Chart(
    source_title
).mark_text(
    size=16,
    fontWeight='bold',
    align='center',
    color=color_titles,
    dx=width_plots/2,
).encode(
    text="text:N",
)

background = alt.layer(
    alt.Chart(sphere).mark_geoshape(fill=fill_sphere),
    alt.Chart(graticule).mark_geoshape(stroke=stroke_color_graticule, strokeWidth=stroke_width_graticule),
    alt.Chart(source_map).mark_geoshape(fill=fill_countries, stroke=stroke_color_countries, strokeWidth=0.5),
).project(
    'naturalEarth1'
).properties(
    width=width_map, height=height_map,
    #title="Map of the selectable countries"
)

points = alt.Chart(
    source_points
).mark_circle(
    stroke=stroke_color_points,
    opacity=1.0
).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    color=color_points,
    size=size_points,
    strokeWidth=strokeWidth_points,
    tooltip=['Country:N']
).add_selection(
    selector
)

zero_line = alt.Chart(
    pd.DataFrame({'AverageTemperature_ref': [0]})
).mark_rule(
    color=color_zero_line,
    strokeWidth=stroke_width_zero_line
).encode(
    y='AverageTemperature_ref:Q'
)

trends = alt.Chart(
    source_trend,
    title=f'Average yearly temperature {first_analysis_year}-{last_analysis_year} (compared to the period {start_reference_period}-{end_reference_period})'
).mark_line(
).encode(
    x=alt.X('Year:O', sort=alt.EncodingSortField('Year', order='ascending')),
    y=alt.Y('AverageTemperature_ref:Q', title='°C'),
    color=color_trends,
    opacity=opacity_trends,
    strokeWidth=strokeWidth_trends,
    #size=alt.Size('AverageTemperature_ref:Q', legend=None),
    tooltip=[
        'Country:N','Year:Q',
        alt.Tooltip(
            'AverageTemperature_ref:Q',
            title=f'Average monthly temperature change compared to {start_reference_period}-{end_reference_period} (°C)',
            format='+.2f'
        ),
    ]
).add_selection(
    selector
#).transform_filter(
#    selector
).properties(
    width=width_plots,
    height=height_plots,
)

heatmap_background = alt.Chart(
    pd.DataFrame(index=pd.MultiIndex.from_product([range(first_analysis_year,last_analysis_year+1), range(1,13)],names=['Year', 'Month'])).reset_index()
).mark_rect(
    color=color_background_heatmap,
    strokeWidth=0.5,
).encode(
    y=alt.Y('Month:O', sort=alt.EncodingSortField('Month', order='ascending')),
    x=alt.X('Year:O', sort=alt.EncodingSortField('Year', order='ascending')),
).properties(
    width=width_plots,
    height=height_plots,
)

heatmap = alt.Chart(
    source_heatmap,
    title=f'Average monthly temperature {first_analysis_year}-{last_analysis_year} (compared to the period {start_reference_period}-{end_reference_period})'
).mark_rect(
    #stroke='grey',
    #strokeWidth=0.5,
).encode(
    y=alt.Y('Month:O', sort=alt.EncodingSortField('Month', order='ascending')),
    x=alt.X('Year:O', sort=alt.EncodingSortField('Year', order='ascending')),
    color=alt.Color(
        'AverageTemperature_ref:Q',
        scale=alt.Scale(domain=domain_heatmap, range=range_heatmap),
        #scale=alt.Scale(type='sqrt', scheme="redyellowblue", order="descending"), # domain=[max_value, -max_value]
        title='Temperature difference (°C)'
    ),
    tooltip=[
        'Country:N','Year:O','Month:O',
        alt.Tooltip(
            'AverageTemperature_ref:Q',
            title=f'Average monthly temperature change compared to {start_reference_period}-{end_reference_period} (°C)',
            format='+.2f'),
    ]
).add_selection(
    selector
).transform_filter(
    selector
).properties(
    width=width_plots,
    height=height_plots,
)

source_credentials = pd.DataFrame({'text': credentials_text})
credentials = alt.Chart(
    source_credentials
).mark_text(
    size=12,
    align='right',
    dx=width_plots,
    color=color_labels
).encode(
    text="text:N",
)

chart = alt.vconcat(
    title + credentials,
    (background + points),
    (zero_line + trends),
    (heatmap_background + heatmap),
    #credentials,
    center=True
).configure(
    background=color_background,
).configure_axis(
    titleColor=color_titles,
    labelColor=color_labels,
).configure_legend(
    orient='top',
    titleColor=color_titles,
    labelColor=color_labels
).configure_title(
    color=color_titles,
).configure_view(
    stroke=None
)
chart

In [28]:
file_name = f"countries_centroids_tavg_{first_analysis_year}-{last_analysis_year}_ref{start_reference_period}-{end_reference_period}"
print(file_name)

countries_centroids_tavg_1850-2013_ref1961-1990


In [29]:
chart.save(os.path.join(mainDir,'docs',file_name+'.html'))
chart.save(os.path.join(mainDir,'plots',file_name+'.png'), scale_factor=2.5)