In [1]:
%%time
# import necessary libraries
import pandas as pd
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter, HoverTool
from bokeh.plotting import figure, show
from bokeh.transform import transform
from bokeh.palettes import Viridis256
from bokeh.io import output_notebook

# Enable notebook output
output_notebook()

# Load penguins.csv
df = pd.read_csv("data/penguins.csv")


# Extract numeric columns for heatmap
numeric_data = df.select_dtypes(include=['float64'])

# Calculate correlation matrix
corr_matrix = numeric_data.corr().round(2)

# Convert correlation matrix to a DataFrame with multi-index which Bokeh uses as a source
df = pd.DataFrame(corr_matrix.stack(), columns=['value']).reset_index()
df.columns = ['x', 'y', 'value']

# Create Bokeh figure
colors = list(reversed(Viridis256))
mapper = LinearColorMapper(palette=colors, low=df.value.min(), high=df.value.max())
source = ColumnDataSource(df)

p = figure(title="Correlation Heatmap of Penguin Dataset", x_range=list(corr_matrix.columns), y_range=list(corr_matrix.columns[::-1]),
           x_axis_location="above", width=500, height=500,
           tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below')

# Add rectangles for each correlation value
p.rect(x="x", y="y", width=1, height=1, source=source,
       line_color=None, fill_color=transform('value', mapper))

# Create hover tooltip with rounded values
hover = HoverTool(tooltips=[
    ('Correlation', '@value'),])  # Use '@value' to access the value from the ColumnDataSource
p.add_tools(hover)

# Add text annotations for correlation values with smaller font size
text_props = {"source": source, "text_align": "center", "text_baseline": "middle"}
p.text(x="x", y="y", text="value", text_color="black", text_font_size="5pt", **text_props)

# Rotate x-axis labels by 45 degrees
p.xaxis.major_label_orientation = 45

# Create a color bar 
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="7px",
                     label_standoff=6, border_line_color=None, location=(0, 0),
                     major_tick_line_color=None)

p.add_layout(color_bar, 'right')

# Show plot
show(p)

CPU times: user 2.99 s, sys: 532 ms, total: 3.53 s
Wall time: 7.02 s
