In [24]:
from math import pi

import pandas as pd

from bokeh.io import show
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.palettes import brewer

In [25]:
data = pd.read_csv('tempdata_clean.csv')
data['DATE'] = pd.to_datetime(data['DATE'])
data.set_index(data['DATE'], inplace=True)

In [26]:
data['dayofyear'] = data.index.dayofyear

In [27]:
data['mean_for_this_day'] = data['temp'].groupby(data['dayofyear']).transform('mean')
data['temp'] = data['temp'] - data['mean_for_this_day'] 
data.drop(['mean_for_this_day', 'dayofyear'], axis=1)

Unnamed: 0_level_0,DATE,temp
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
1876-01-01,1876-01-01,2.094483
1876-01-02,1876-01-02,2.137931
1876-01-03,1876-01-03,-0.204138
1876-01-04,1876-01-04,-6.211724
1876-01-05,1876-01-05,-10.008276
...,...,...
2020-09-26,2020-09-26,-2.368056
2020-09-27,2020-09-27,-2.722917
2020-09-28,2020-09-28,1.344444
2020-09-29,2020-09-29,1.848611


In [28]:
data['Year'] = data.index.year.astype(str)

In [29]:
data['day'] = data.index.strftime('%m-%d')
data.columns.name = 'day'
data['day'] = data['day'].astype(str)
data.head()

day,DATE,temp,dayofyear,mean_for_this_day,Year,day
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1876-01-01,1876-01-01,2.094483,1,0.105517,1876,01-01
1876-01-02,1876-01-02,2.137931,2,0.362069,1876,01-02
1876-01-03,1876-01-03,-0.204138,3,0.504138,1876,01-03
1876-01-04,1876-01-04,-6.211724,4,0.411724,1876,01-04
1876-01-05,1876-01-05,-10.008276,5,0.208276,1876,01-05


In [30]:
data['day'] = data['day'].apply(lambda x: x.zfill(3))


In [31]:
data.head()

day,DATE,temp,dayofyear,mean_for_this_day,Year,day
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1876-01-01,1876-01-01,2.094483,1,0.105517,1876,01-01
1876-01-02,1876-01-02,2.137931,2,0.362069,1876,01-02
1876-01-03,1876-01-03,-0.204138,3,0.504138,1876,01-03
1876-01-04,1876-01-04,-6.211724,4,0.411724,1876,01-04
1876-01-05,1876-01-05,-10.008276,5,0.208276,1876,01-05


In [32]:
data = data.pivot(index='Year', columns='day', values='temp')


In [33]:
years = list(data.index)
days = list(data.columns)

# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['temp']).reset_index()

In [34]:
df.columns = ['Year', 'day', 'temp']

In [35]:
colors = brewer['RdBu'][11]
mapper = LinearColorMapper(palette=colors, low=df.temp.min(), high=df.temp.max(), nan_color='#a9a9a9')

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

In [36]:
p = figure(title=f"Temperatures in Berlin ({years[0]} - {years[-1]})",
           x_range=years, y_range=list(reversed(days)),
           x_axis_location="above", plot_width=1000, plot_height=1000,
           tools=TOOLS, toolbar_location='below',
           tooltips=[('date', '@day @Year'), ('temp', '@temp °C')])

In [37]:
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

In [38]:
p.rect(x="Year", y="day", width=1, height=1,
       source=df,
       fill_color={'field': 'temp', 'transform': mapper},
       line_color=None)

In [39]:
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="7px",
                     ticker=BasicTicker(desired_num_ticks=len(colors)),
                     formatter=PrintfTickFormatter(format="%f"),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')

In [40]:
show(p)      # show the plot