In [1]:
%%time
# import necessary libraries
import numpy as np
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook
import pandas as pd

# load data stored in a CSV file
df = pd.read_csv("data/penguins.csv")

# Enable notebook output
output_notebook()

# Filter out non-numeric columns
numeric_columns = df.select_dtypes(include=np.number).columns.tolist()

# Define color palette for species
species_palette = {'Adelie': 'blue', 'Chinstrap': 'green', 'Gentoo': 'red'}
df['color'] = df['species'].map(species_palette)

# Create a ColumnDataSource from the DataFrame
source = ColumnDataSource(df)

# Create scatter plots and histograms
plots = []
for i, col1 in enumerate(numeric_columns):
    row_plots = []
    for j, col2 in enumerate(numeric_columns):
        if i == j:
            hist, edges = np.histogram(df[col1].dropna(), bins=20)  # Drop NaN values
            p = figure(title=f"{col1} Histogram", background_fill_color="#fafafa", width=275, height=200)
            p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="navy", line_color="white", alpha=0.5)
            p.xaxis.axis_label = col1
            p.yaxis.axis_label = 'Frequency'
        else:
            p = figure(title=f"{col1} vs {col2}", background_fill_color="#fafafa",  width=275, height=200)
            p.circle(x=col2, y=col1, color='color', legend_field='species', source=source)
            p.xaxis.axis_label = col2
            p.yaxis.axis_label = col1

            # Add hover tool
            hover = HoverTool(tooltips=[(col2, f"@{col2}"), (col1, f"@{col1}"), ("species", "@species")])
            p.add_tools(hover)
        row_plots.append(p)
    plots.append(row_plots)

# Arrange plots into a grid
grid = gridplot(plots)

# Show the plot
show(grid)

CPU times: user 4.85 s, sys: 454 ms, total: 5.31 s
Wall time: 12.6 s
