# Libraries

In [1]:
import sys
import os
import math as m
import glob
import datetime as dt
import numpy as np
import pandas as pd
import geopandas as gpd
import altair as alt
from vega_datasets import data

In [2]:
if not 'mainDir' in globals():
    mainDir = os.path.dirname(os.path.dirname(os.getcwd())) # Get parent dir: os.path.dirname()
print(mainDir)

/Users/lassescheele/Documents/Projects/altair-climate-change


In [3]:
dir_input = os.path.join(mainDir,'data','raw')
dir_output = os.path.join(mainDir,'data','processed')

In [4]:
alt.data_transformers.enable('default')
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Define plotting functions

In [5]:
def create_percentile_areas(source, list_percentiles, x_axis_title, y_axis_title, iter_year=None):
    list_elements = list()
    
    if not iter_year == None:
        source['Year'] = iter_year
        source['Timestamp'] = pd.to_datetime(source['Year'].astype(str).str.zfill(2)+'-'+source['Month-Day'])
        x_column_str = 'Timestamp:T'
    else:
        x_column_str = 'Month-Day:O'
    base = alt.Chart(source).encode(
        alt.X(x_column_str, axis=alt.Axis(title=x_axis_title, format = ("%b"), labelAngle=-45))
    )

    area_percentile_1 = base.mark_area(
        #interpolate='monotone',
        opacity=0.25,
        color=color_percentiles
    ).encode(
        alt.Y('Max:Q',axis=alt.Axis(title=y_axis_title)),
        alt.Y2('Min:Q'),
        #tooltip=[
        #    x_column_str,
        #    alt.Tooltip('Max:Q',title='Max',format='+.2f'),
        #    alt.Tooltip('Min:Q',title='Min',format='+.2f'),
        #]
    )
    list_elements.append(area_percentile_1)

    area_percentile_2 = base.mark_area(
        interpolate='monotone',
        opacity=0.25,
        color=color_percentiles
    ).encode(
        alt.Y(f'{int(100*list_percentiles[3])}th percentile:Q'),
        alt.Y2(f'{int(100*list_percentiles[1])}th percentile:Q')
    )
    list_elements.append(area_percentile_2)

    line_median = base.mark_line(
        interpolate='monotone',
        opacity=0.5,
        color=color_percentiles,
        strokeWidth=3
    ).encode(
        alt.Y('Median:Q'),
    )
    list_elements.append(line_median)
    
    return list_elements

In [90]:
def create_line_from_oberservations(source, column, x_axis_title, y_axis_title, tooltip_title, line_color):
    line_oberservations = alt.Chart(
        source
    ).mark_line(
        interpolate='monotone',
        #opacity=0.4,
        strokeWidth=2,
        color=line_color
    ).encode(
        #alt.X('Month-Day:O'),
        alt.X('Timestamp:T',axis=alt.Axis(title=x_axis_title)),
        alt.Y(f'{column}:Q',axis=alt.Axis(title=y_axis_title)),
        #color=alt.Color(
        #    'Year:O',
        #    title='Year',
        #    legend=None
        #),
        tooltip=[
            'Timestamp:T',
            alt.Tooltip(
                'Value:Q',
                title=tooltip_title,
                format='+.2f'),
        ]
    )
    return line_oberservations

In [7]:
def create_df_extra_legend():
    start_x = -1
    end_x = 1
    center_value = 0.0
    step = 0.4
    source_vertline = [
        {
            "start_x": start_x,
            "end_x": end_x,
            "start_y": center_value + step/8,
            "end_y": center_value - step/8,
            "start_x_label": end_x,
            "start_y_label": center_value,
            "label": "Median",
            "opacity": 0.5
        },
        {
            "start_x": start_x,
            "end_x": end_x,
            "start_y": center_value + 1.25*step,
            "end_y": center_value - 1.25*step,
            "start_x_label": end_x,
            "start_y_label": center_value + 1*step,
            "label": f"Range between {int(100*list_percentiles[1])}th & {int(100*list_percentiles[3])}th percentile",
            "opacity": 0.25
        },
        {
            "start_x": start_x,
            "end_x": end_x,
            "start_y": center_value + 2.5*step,
            "end_y": center_value - 2.5*step,
            "start_x_label": end_x,
            "start_y_label": center_value + 2*step,
            "label": "Range between min & max",
            "opacity": 0.25
        },
    ]
    df_legend = pd.DataFrame(source_vertline)
    return df_legend

In [8]:
def create_extra_legend(source, iter_year=None):
    list_extra_legend = list()
    
    if not iter_year == None:
        source['Year'] = iter_year
        source['start_timestamp'] = pd.to_datetime(source['Year'].astype(str).str.zfill(2)+'-'+source['start_day'])
        source['end_timestamp'] = pd.to_datetime(source['Year'].astype(str).str.zfill(2)+'-'+source['end_day'])
        source['label_timestamp'] = pd.to_datetime(source['Year'].astype(str).str.zfill(2)+'-'+source['label_day'])
        x1_column_str = 'start_timestamp:T'
        x2_column_str = 'end_timestamp:T'
        xlabel_column_str = 'label_timestamp:T'
    else:
        x1_column_str = 'start_day:O'
        x2_column_str = 'end_day:O'
        xlabel_column_str = 'label_day:O'
    
    frame = alt.Chart(
        source.loc[source['label']=='Range between min & max']
    ).mark_rect(
        fill='white'
    ).encode(
        x=alt.X(x1_column_str),
        x2=alt.X2(x2_column_str),
        y=alt.Y('start_value:Q'),
        y2=alt.Y2('end_value:Q'),
    )
    list_extra_legend.append(frame)
    
    symbols = alt.Chart(
        source
    ).mark_rect(
        fill=color_percentiles
    ).encode(
        opacity=alt.Opacity('opacity:Q', legend=None),
        x=alt.X(x1_column_str),
        x2=alt.X2(x2_column_str),
        y=alt.Y('start_value:Q'),
        y2=alt.Y2('end_value:Q'),
    )
    list_extra_legend.append(symbols)
    
    text = alt.Chart(source).mark_text(
        #align='right',
        align='left',
        baseline='middle',
        dx=5,
        #dy=-135,
        size=11
    ).encode(
        x=xlabel_column_str,
        y='label_value:Q',
        text='label',
        color=alt.value('#666666')
    )
    list_extra_legend.append(text)

    return list_extra_legend

In [9]:
def create_extra_legend2(source):
    list_extra_legend = list()
    
    frame = alt.Chart(
        source.loc[source['label']=='Range between min & max']
    ).mark_rect(
        fill='white'
    ).encode(
        x=alt.X('start_x:Q', axis=None),
        x2=alt.X2('end_x:Q'),
        y=alt.Y('start_y:Q', axis=None),
        y2=alt.Y2('end_y:Q'),
    )
    list_extra_legend.append(frame)
    
    symbols = alt.Chart(
        source
    ).mark_rect(
        fill=color_percentiles
    ).encode(
        opacity=alt.Opacity('opacity:Q', legend=None),
        x=alt.X('start_x:Q', axis=None),
        x2=alt.X2('end_x:Q'),
        y=alt.Y('start_y:Q', axis=None),
        y2=alt.Y2('end_y:Q'),
    )
    list_extra_legend.append(symbols)
    
    text = alt.Chart(source).mark_text(
        #align='right',
        align='left',
        baseline='middle',
        dx=5,
        #dy=-135,
        size=11
    ).encode(
        x=alt.X('start_x_label:Q', axis=None),
        y=alt.Y('start_y_label:Q', axis=None),
        text='label',
        color=alt.value('#666666')
    )
    list_extra_legend.append(text)

    return list_extra_legend

# Process Mean Temperature

In [44]:
city = 'hamburg'
station_id = '01975'
parameter_str = 'mean temperature at 2 m above ground'

## Read data

In [45]:
df_products = pd.read_csv(os.path.join(dir_input,'dwd_daily_station_data','cdc_download_2020-04-30_21_14','data','prd.csv'))
#df_products
index = df_products.loc[df_products['Produkt_Titel'].str.contains(parameter_str)].index[0]
product_code = df_products.loc[index,'Produkt_Code']
product_title = df_products.loc[index,'Produkt_Titel'].replace('Daily station observations of ','')
product_title = product_title[0].capitalize()+product_title[1:]
product_unit = df_products.loc[index,'Einheit']
print(product_code, product_title, product_unit)

TMK_MN004 Mean temperature at 2 m above ground in °C °C


In [46]:
df_data = pd.read_csv(os.path.join(dir_input,'dwd_daily_station_data','cdc_download_2020-04-30_21_14','data',f'data_{product_code}.csv'))#.replace(-999,np.nan)
df_data = df_data[['Zeitstempel','Wert']].rename(columns={'Zeitstempel':'Timestamp','Wert':'Value'})
df_data['Timestamp'] = pd.to_datetime(df_data['Timestamp'], format="%Y%m%d")
df_data['Year'] = df_data['Timestamp'].dt.year
df_data['Month'] = df_data['Timestamp'].dt.month
df_data['Week'] = df_data['Timestamp'].dt.week
df_data['Day'] = df_data['Timestamp'].dt.day
df_data['Month-Day'] = df_data['Month'].astype(str).str.zfill(2)+"-"+df_data['Day'].astype(str).str.zfill(2)
df_data.info(null_counts=True)
#df_data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30801 entries, 0 to 30800
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Timestamp  30801 non-null  datetime64[ns]
 1   Value      30801 non-null  float64       
 2   Year       30801 non-null  int64         
 3   Month      30801 non-null  int64         
 4   Week       30801 non-null  int64         
 5   Day        30801 non-null  int64         
 6   Month-Day  30801 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(4), object(1)
memory usage: 1.6+ MB


In [47]:
# Remove leap days
df_data = df_data.loc[
    df_data['Month-Day']!="02-29"
]

## Calculate stats

In [48]:
stats_column = 'Value'

### Get percentiles per month and day

In [49]:
# Group data
list_groups = ['Month-Day']
list_percentiles = [0.0,0.05,0.5,0.95,1.0]
df_daily_percentiles = df_data[list_groups+[stats_column]].groupby(list_groups).quantile(list_percentiles)
# Reorder and rename data
df_daily_percentiles = df_daily_percentiles.unstack(1)
df_daily_percentiles.columns = df_daily_percentiles.columns.droplevel()
df_daily_percentiles = df_daily_percentiles.rename(columns={
    0.0:'Min',list_percentiles[1]:f'{int(100*list_percentiles[1])}th percentile',0.5:'Median',list_percentiles[3]:f'{int(100*list_percentiles[3])}th percentile',1.0:'Max'
})
# Create month & day columns
df_daily_percentiles = df_daily_percentiles.reset_index()
df_daily_percentiles

Unnamed: 0,Month-Day,Min,5th percentile,Median,95th percentile,Max
0,01-01,-12.8,-9.400,1.40,7.640,9.5
1,01-02,-13.5,-8.780,1.10,7.400,10.9
2,01-03,-12.2,-7.080,0.80,7.780,10.1
3,01-04,-11.2,-7.680,0.70,8.240,10.6
4,01-05,-13.0,-6.960,0.90,8.000,10.9
...,...,...,...,...,...,...
360,12-27,-7.6,-5.680,1.65,8.370,10.2
361,12-28,-11.3,-6.770,1.60,8.555,10.9
362,12-29,-7.9,-5.185,1.55,7.385,11.2
363,12-30,-9.9,-5.585,1.90,7.025,8.1


## Create plots

### Settings

In [50]:
start_year = 2000
end_year = 2020

In [51]:
width_plot = 300
height_plot = 150
number_columns = 3

In [52]:
# Colors general
color_axis_title = '#FF4500'

# Color percentiles
color_percentiles = '#C0C0C0'

# Lines
domain_recent_years = list(range(start_year,end_year+1))
range_recent_years = [
    '#1b9e77',
    '#d95f02',
    '#7570b3'
][0:len(domain_recent_years)]

line_color = 'red'

In [53]:
# Set general colors
color_background = 'white' # '#FFFCF2'
color_titles = 'black' # '#4f4f4f'
color_labels = '#4f4f4f' # '#6e6e6e'

# Set colors background map
fill_sphere = '#D5F5FF' # '#7FD1D8'
stroke_color_graticule = 'white'
stroke_width_graticule = 1.0
fill_countries = '#DED9D8' # '#DED5D2' # 'lightgrey'
stroke_color_countries = '#978984' # 'grey'

# Set colors points
stroke_color_points = color_titles
color_points_selected = '#FF00E8'
color_points_not_selected = '#bbb1ae' # stroke_color_countries # '#2ECC71'
size_points_selected = 400
size_points_not_selected = 200
stroke_width_points_selected = 4
stroke_width_points_not_selected = 3

# Set colors heatmap
domain_heatmap = [-5,-.5,0,.5,5]
range_heatmap = ['#0571b0','#92c5de','#f7f7f7','#f4a582','#ca0020']
color_background_heatmap = fill_countries # '#e0e0e0'

# Set colors trends
color_trend_selected = color_points_selected # '#FF5733'
color_trends_not_selected = color_points_not_selected # color_labels
stroke_width_trends_selected = 2.5
stroke_width_trends_not_selected = 1.25
opacity_trends_not_selected = 0.2
color_zero_line = 'grey' # '#008FFF'
stroke_width_zero_line = 1.2

In [54]:
start_day = "08-01"
end_day = "09-01"
#center_value = df_daily_percentiles['Max'].max() - ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/10)
center_value = df_daily_percentiles['Min'].min() + ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/10)
step = ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/35)
source_vertline = [
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": start_day,
        "start_value": center_value + step/8,
        "end_value": center_value - step/8,
        "label_value": center_value,
        "label": "Median",
        "opacity": 0.5
    },
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": start_day,
        "start_value": center_value + 1.25*step,
        "end_value": center_value - 1.25*step,
        "label_value": center_value + 1*step,
        "label": "Range between 25th & 75th percentile",
        "opacity": 0.25
    },
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": start_day,
        "start_value": center_value + 2.5*step,
        "end_value": center_value - 2.5*step,
        "label_value": center_value + 2*step,
        "label": "Range between min & max",
        "opacity": 0.25
    },
]
df_legend = pd.DataFrame(source_vertline)
df_legend

Unnamed: 0,start_day,end_day,label_day,start_value,end_value,label_value,label,opacity
0,08-01,09-01,08-01,-17.255714,-17.624286,-17.44,Median,0.5
1,08-01,09-01,08-01,-15.597143,-19.282857,-15.965714,Range between 25th & 75th percentile,0.25
2,08-01,09-01,08-01,-13.754286,-21.125714,-14.491429,Range between min & max,0.25


### Create title

In [55]:
# Create title
title_text = [product_title]
source_title = pd.DataFrame({'text': title_text})
title = alt.Chart(
    source_title
).mark_text(
    size=16,
    fontWeight='bold',
    align='center',
    color=color_titles,
    dx=number_columns * width_plot / 2,
).encode(
    text="text:N",
)
# Create credentials
credentials_text = ["Data: DWD (dwd.de)"]
source_credentials = pd.DataFrame({'text': credentials_text})
credentials1 = alt.Chart(
    source_credentials
).mark_text(
    size=11,
    align='right',
    dx=number_columns*width_plot,
    dy=-5,
    color=color_labels
).encode(
    text="text:N",
)
credentials_text = ["Visualization: Lasse Scheele (@LasSchee)"]
source_credentials = pd.DataFrame({'text': credentials_text})
credentials2 = alt.Chart(
    source_credentials
).mark_text(
    size=11,
    align='right',
    dx=number_columns*width_plot,
    dy=10,
    color=color_labels
).encode(
    text="text:N",
)
# Create extra legend
legend = alt.layer(
    *create_extra_legend2(create_df_extra_legend())
).properties(
    width=width_plot/7, height=height_plot/3,
)
title + credentials1 + credentials2 + legend

### Create chart for each year

In [56]:
list_charts = list()
for iter_year in range(start_year,end_year+1):
    #print(iter_year)
    
    # Create area percentiles
    list_percentile_areas = create_percentile_areas(df_daily_percentiles.copy(), list_percentiles, '', '', iter_year)
    
    # Create line from observations
    line_recent_years = create_line_from_oberservations(df_data.loc[(df_data['Year']==iter_year)], stats_column, '', '', product_title, line_color)
    
    # Add extra legend
    #if iter_year == start_year:
    #    list_extra_legend = create_extra_legend(df_legend.copy(), iter_year)
    #else:
    #    list_extra_legend = list()
    list_extra_legend = list()
    
    # Create chart
    chart = alt.layer(
        *list_extra_legend, *list_percentile_areas, line_recent_years
    ).properties(
        width=width_plot, height=height_plot,
        title=f"{iter_year}"
    )
    list_charts.append(chart)

### Combine charts to rows

In [57]:
list_charts_rows = list()
for index_row in range(0,m.ceil(len(list_charts)/number_columns)):
    start_index = index_row * number_columns
    end_index = (index_row+1) * number_columns
    #print(index_row, start_index, end_index)
    list_charts_row = list_charts[start_index:end_index]
    chart = alt.hconcat(
        *list_charts_row,
        center=True
    )
    list_charts_rows.append(chart)

### Combine title and rows of charts

In [58]:
chart = alt.vconcat(
    title + credentials1 + credentials2 + legend,
    *list_charts_rows,
    #title=f"{product_title}",
    center=True
).configure_view(
    stroke=None
)
chart

# Process Precipitation Height

In [59]:
city = 'hamburg'
station_id = '01975'
parameter_str = 'precipitation height in mm'

## Read data

In [60]:
df_products = pd.read_csv(os.path.join(dir_input,'dwd_daily_station_data','cdc_download_2020-04-30_21_14','data','prd.csv'))
#df_products
index = df_products.loc[df_products['Produkt_Titel'].str.contains(parameter_str)].index[0]
product_code = df_products.loc[index,'Produkt_Code']
product_title = df_products.loc[index,'Produkt_Titel'].replace('Daily station observations ','')
product_title = product_title[0].capitalize()+product_title[1:]
product_unit = df_products.loc[index,'Einheit']
print(product_code, product_title, product_unit)

RS_MN006 Precipitation height in mm mm


In [61]:
df_data = pd.read_csv(os.path.join(dir_input,'dwd_daily_station_data','cdc_download_2020-04-30_21_14','data',f'data_{product_code}.csv'))#.replace(-999,np.nan)
df_data = df_data[['Zeitstempel','Wert']].rename(columns={'Zeitstempel':'Timestamp','Wert':'Value'})
df_data['Timestamp'] = pd.to_datetime(df_data['Timestamp'], format="%Y%m%d")
df_data['Year'] = df_data['Timestamp'].dt.year
df_data['Month'] = df_data['Timestamp'].dt.month
df_data['Week'] = df_data['Timestamp'].dt.week
df_data['Day'] = df_data['Timestamp'].dt.day
df_data['Month-Day'] = df_data['Month'].astype(str).str.zfill(2)+"-"+df_data['Day'].astype(str).str.zfill(2)
df_data.info(null_counts=True)
#df_data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30801 entries, 0 to 30800
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Timestamp  30801 non-null  datetime64[ns]
 1   Value      30801 non-null  float64       
 2   Year       30801 non-null  int64         
 3   Month      30801 non-null  int64         
 4   Week       30801 non-null  int64         
 5   Day        30801 non-null  int64         
 6   Month-Day  30801 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(4), object(1)
memory usage: 1.6+ MB


In [62]:
# Remove leap days
df_data = df_data.loc[
    df_data['Month-Day']!="02-29"
]

## Calculate cumulative data

In [63]:
df_data['Value cumulative'] = df_data.groupby(['Year'])['Value'].cumsum()

## Calculate stats

In [64]:
stats_column = 'Value cumulative'

### Get percentiles per month and day

In [65]:
# Group data
list_groups = ['Month-Day']
#list_percentiles = [0.0,0.25,0.5,0.75,1.0]
list_percentiles = [0.0,0.05,0.5,0.95,1.0]
df_daily_percentiles = df_data[list_groups+[stats_column]].groupby(list_groups).quantile(list_percentiles)
# Reorder and rename data
df_daily_percentiles = df_daily_percentiles.unstack(1)
df_daily_percentiles.columns = df_daily_percentiles.columns.droplevel()
df_daily_percentiles = df_daily_percentiles.rename(columns={
    0.0:'Min',list_percentiles[1]:f'{int(100*list_percentiles[1])}th percentile',0.5:'Median',list_percentiles[3]:f'{int(100*list_percentiles[3])}th percentile',1.0:'Max'
})
# Create month & day columns
df_daily_percentiles = df_daily_percentiles.reset_index()
df_daily_percentiles

Unnamed: 0,Month-Day,Min,5th percentile,Median,95th percentile,Max
0,01-01,0.0,0.000,0.90,9.040,21.3
1,01-02,0.0,0.000,2.20,20.100,33.2
2,01-03,0.0,0.000,4.70,27.940,39.2
3,01-04,0.0,0.000,6.60,32.900,40.9
4,01-05,0.0,0.300,9.10,38.320,50.5
...,...,...,...,...,...,...
360,12-27,388.7,537.445,741.75,973.435,1071.0
361,12-28,389.2,537.700,745.30,975.635,1071.0
362,12-29,392.0,539.260,746.25,979.250,1071.2
363,12-30,392.3,544.955,746.25,979.420,1071.8


## Create plots

### Settings

In [34]:
start_year = 2000
end_year = 2020

In [35]:
width_plot = 300
height_plot = 150
number_columns = 3

In [82]:
# Colors general
color_axis_title = '#FF4500'

# Color percentiles
color_percentiles = '#FF4500' # '#C0C0C0'

# Lines
domain_recent_years = list(range(start_year,end_year+1))
range_recent_years = [
    '#1b9e77',
    '#d95f02',
    '#7570b3'
][0:len(domain_recent_years)]

line_color = 'blue'

In [83]:
# Set general colors
color_background = 'white' # '#FFFCF2'
color_titles = 'black' # '#4f4f4f'
color_labels = '#4f4f4f' # '#6e6e6e'

# Set colors background map
fill_sphere = '#D5F5FF' # '#7FD1D8'
stroke_color_graticule = 'white'
stroke_width_graticule = 1.0
fill_countries = '#DED9D8' # '#DED5D2' # 'lightgrey'
stroke_color_countries = '#978984' # 'grey'

# Set colors points
stroke_color_points = color_titles
color_points_selected = '#FF00E8'
color_points_not_selected = '#bbb1ae' # stroke_color_countries # '#2ECC71'
size_points_selected = 400
size_points_not_selected = 200
stroke_width_points_selected = 4
stroke_width_points_not_selected = 3

# Set colors heatmap
domain_heatmap = [-5,-.5,0,.5,5]
range_heatmap = ['#0571b0','#92c5de','#f7f7f7','#f4a582','#ca0020']
color_background_heatmap = fill_countries # '#e0e0e0'

# Set colors trends
color_trend_selected = color_points_selected # '#FF5733'
color_trends_not_selected = color_points_not_selected # color_labels
stroke_width_trends_selected = 2.5
stroke_width_trends_not_selected = 1.25
opacity_trends_not_selected = 0.2
color_zero_line = 'grey' # '#008FFF'
stroke_width_zero_line = 1.2

In [84]:
start_day = "03-15"
end_day = "04-01"
center_value = df_daily_percentiles['Max'].max() - ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/10)
#center_value = df_daily_percentiles['Min'].min() + ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/10)
step = ((df_daily_percentiles['Max'].max() - df_daily_percentiles['Min'].min())/35)
source_vertline = [
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": end_day,
        "start_value": center_value + step/8,
        "end_value": center_value - step/8,
        "label_value": center_value,
        "label": "Median",
        "opacity": 0.5
    },
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": end_day,
        "start_value": center_value + 1.25*step,
        "end_value": center_value - 1.25*step,
        "label_value": center_value + 1*step,
        "label": f"Range between {int(100*list_percentiles[1])}th & {int(100*list_percentiles[3])}th percentile",
        "opacity": 0.25
    },
    {
        "start_day": start_day,
        "end_day": end_day,
        "label_day": end_day,
        "start_value": center_value + 2.5*step,
        "end_value": center_value - 2.5*step,
        "label_value": center_value + 2*step,
        "label": "Range between min & max",
        "opacity": 0.25
    },
]
df_legend = pd.DataFrame(source_vertline)
df_legend

Unnamed: 0,start_day,end_day,label_day,start_value,end_value,label_value,label,opacity
0,03-15,04-01,04-01,968.447857,960.792143,964.62,Median,0.5
1,03-15,04-01,04-01,1002.898571,926.341429,995.242857,Range between 5th & 95th percentile,0.25
2,03-15,04-01,04-01,1041.177143,888.062857,1025.865714,Range between min & max,0.25


In [85]:
df_legend = create_df_extra_legend()
df_legend

Unnamed: 0,start_x,end_x,start_y,end_y,start_x_label,start_y_label,label,opacity
0,-1,1,0.05,-0.05,1,0.0,Median,0.5
1,-1,1,0.5,-0.5,1,0.4,Range between 5th & 95th percentile,0.25
2,-1,1,1.0,-1.0,1,0.8,Range between min & max,0.25


### Create title

In [86]:
# Create title
title_text = [product_title]
source_title = pd.DataFrame({'text': title_text})
title = alt.Chart(
    source_title
).mark_text(
    size=16,
    fontWeight='bold',
    align='center',
    color=color_titles,
    dx=number_columns * width_plot / 2,
).encode(
    text="text:N",
)
# Create credentials
credentials_text = ["Data: DWD (dwd.de)"]
source_credentials = pd.DataFrame({'text': credentials_text})
credentials1 = alt.Chart(
    source_credentials
).mark_text(
    size=11,
    align='right',
    dx=number_columns*width_plot,
    dy=-5,
    color=color_labels
).encode(
    text="text:N",
)
credentials_text = ["Visualization: Lasse Scheele (@LasSchee)"]
source_credentials = pd.DataFrame({'text': credentials_text})
credentials2 = alt.Chart(
    source_credentials
).mark_text(
    size=11,
    align='right',
    dx=number_columns*width_plot,
    dy=10,
    color=color_labels
).encode(
    text="text:N",
)
# Create extra legend
legend = alt.layer(
    *create_extra_legend2(create_df_extra_legend())
).properties(
    width=width_plot/7, height=height_plot/3,
)
title + credentials1 + credentials2 + legend

### Create chart for each year

In [91]:
list_charts = list()
for iter_year in range(start_year,end_year+1):
    #print(iter_year)
    
    # Create area percentiles
    list_percentile_areas = create_percentile_areas(df_daily_percentiles.copy(), list_percentiles, '', '', iter_year)
    
    # Create line from observations
    line_recent_years = create_line_from_oberservations(df_data.loc[(df_data['Year']==iter_year)], stats_column, '', '', product_title, line_color)
    
    # Add extra legend
    #if iter_year == start_year:
    #    list_extra_legend = create_extra_legend(df_legend.copy(), iter_year)
    #else:
    #    list_extra_legend = list()
    list_extra_legend = list()
    
    # Create chart
    chart = alt.layer(
        *list_extra_legend, *list_percentile_areas, line_recent_years
    ).properties(
        width=width_plot, height=height_plot,
        title=f"{iter_year}"
    )
    list_charts.append(chart)

### Combine charts to rows

In [92]:
list_charts_rows = list()
for index_row in range(0,m.ceil(len(list_charts)/number_columns)):
    start_index = index_row * number_columns
    end_index = (index_row+1) * number_columns
    #print(index_row, start_index, end_index)
    list_charts_row = list_charts[start_index:end_index]
    chart = alt.hconcat(
        *list_charts_row,
        center=True
    )
    list_charts_rows.append(chart)

### Combine title and rows of charts

In [93]:
chart = alt.vconcat(
    title + credentials1 + credentials2 + legend,
    *list_charts_rows,
    #title=f"{product_title}",
    center=True
).configure_view(
    stroke=None
)
chart