# Exam: Time Series Visualization with Bokeh

This exam tests your ability to visualize time series data using the Bokeh library.
You will be working with the "Daily Minimum Temperatures in Melbourne" dataset.
For each question, provide the Python code using Bokeh to generate the requested visualization.

**Dataset:** "daily-minimum-temperatures-in-melbourne.csv"


In [4]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv(".\datasets\daily-minimum-temperatures-in-melbourne.csv")

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

  df = pd.read_csv(".\datasets\daily-minimum-temperatures-in-melbourne.csv")


Question 1: Basic Time Series Line Plot
1.  Create a basic line plot showing the daily minimum temperature over time.

    * Use the 'Date' column on the x-axis and the 'Temperature' column on the y-axis.
    * Set the plot title to "Daily Minimum Temperatures".
    * Label the x-axis as "Date" and the y-axis as "Temperature (°C)".
    * Add tooltips to display the date and temperature when hovering over the line.
    * Enable pan, wheel zoom, and reset tools.


In [14]:
# Answer 1: (Provide code here)
df.head()
output_notebook()

source = ColumnDataSource(df)
p = figure( title="Daily Minimum Temperatures", x_axis_type="datetime", width=900, height=400, tools="pan,wheel_zoom,reset",toolbar_location="above")
p.line(x='Date', y='Temperature', source=source, line_width=2, color="navy", legend_label="Temperature")

hover = HoverTool(tooltips=[("Date", "@Date{%F}"), ("Temperature", "@Temperature °C")], formatters = {'@Date':'datetime'}, mode = 'vline')

p.add_tools(hover)

p.xaxis.axis_label = "Date"
p.yaxis.axis_label = "Temperature (°C)"
p.xaxis.formatter.days = '%b %Y'

show(p)

Question 2: Rolling Average
2.  Calculate the 30-day rolling average of the daily minimum temperature and plot it
    alongside the original temperature data.

    * Create a new column 'Rolling_Avg' in the DataFrame containing the 30-day rolling average.
    * Plot both the original 'Temperature' and the 'Rolling_Avg' on the same plot.
    * Use different colors and line styles to distinguish between the two.
    * Add a legend to the plot to label the lines.
    * Add tooltips to display the date, original temperature, and rolling average.

In [18]:
# Answer 2: (Provide code here)
df['Rolling_Avg'] = df['Temperature'].rolling(window=30).mean()
source = ColumnDataSource(df)

p2 = figure(title="Daily Minimum Temperatures with 30-Day Rolling Average", x_axis_type="datetime", width=900, height=400, tools="pan,wheel_zoom,reset", toolbar_location="above")
p2.line('Date', 'Temperature', source=source, line_width=2, color='navy', legend_label='Daily Temperature')
# Ligne de la moyenne mobile
p2.line('Date', 'Rolling_Avg', source=source, line_width=2, color='orange', line_dash='dashed', legend_label='30-Day Rolling Avg')

hover = HoverTool( tooltips=[ ("Date", "@Date{%F}"),("Temperature", "@Temperature °C"),("Rolling Avg", "@Rolling_Avg{0.2f} °C"),],formatters={'@Date': 'datetime'},mode='vline')
p2.add_tools(hover)

# Configuration des axes
p2.xaxis.axis_label = "Date"
p2.yaxis.axis_label = "Temperature (°C)"
p2.xaxis.formatter.days = '%b %Y'

# Configuration de la légende
p2.legend.location = "top_left"
p2.legend.click_policy = "hide"  # Permet de masquer/afficher les courbes

# Affichage
show(p2)

Question 3: Monthly Box Plots
3.  Create box plots to visualize the distribution of temperatures for each month.

    * Extract the month from the 'Date' column and create a new 'Month' column.
    * Group the data by 'Month' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution.
    * Label the x-axis with month names and the y-axis with "Temperature (°C)".
    * Add tooltips to display the month and relevant statistical values (min, max, media

In [22]:
# Answer 3: (Provide code here)
import numpy as np
from bokeh.models import ColumnDataSource, Whisker, HoverTool

# Extraire le mois en format texte
df['Month'] = df['Date'].dt.strftime('%B')  
df['Month_Num'] = df['Date'].dt.month      

# Ordonner les mois correctement
month_order = ['January', 'February', 'March', 'April', 'May', 'June','July', 'August', 'September', 'October', 'November', 'December']

# Groupement et statistiques
stats = df.groupby('Month').agg(
    q1=('Temperature', lambda x: np.percentile(x, 25)),
    q2=('Temperature', lambda x: np.percentile(x, 50)),  # médiane
    q3=('Temperature', lambda x: np.percentile(x, 75)),
    min_val=('Temperature', 'min'),
    max_val=('Temperature', 'max')
).reindex(month_order)

# Calcul de l'écart interquartile
stats['iqr'] = stats.q3 - stats.q1

# Source de données pour Bokeh
source = ColumnDataSource(data=dict(
    months=month_order,
    q1=stats.q1,
    q2=stats.q2,
    q3=stats.q3,
    upper=stats.q3,
    lower=stats.q1,
    min_val=stats.min_val,
    max_val=stats.max_val
))

# Création de la figure
p3 = figure(x_range=month_order, width=950, height=500, title="Monthly Temperature Distribution (Box Plot)", tools="pan,reset,wheel_zoom")

# Ajout des boxes
p3.segment('months', 'max_val', 'months', 'q3', source=source, line_color="black")
p3.segment('months', 'min_val', 'months', 'q1', source=source, line_color="black")

p3.vbar('months', 0.7, 'q2', 'q3', source=source, fill_color="#E08E79", line_color="black")
p3.vbar('months', 0.7, 'q1', 'q2', source=source, fill_color="#3B8686", line_color="black")

# Ajouter des Whiskers
p3.add_layout(Whisker(source=source, base="months", upper="max_val", lower="min_val"))

# Tooltips
hover = HoverTool(
    tooltips=[
        ("Month", "@months"),
        ("Min", "@min_val{0.0} °C"),
        ("Q1", "@q1{0.0} °C"),
        ("Median", "@q2{0.0} °C"),
        ("Q3", "@q3{0.0} °C"),
        ("Max", "@max_val{0.0} °C"),
    ]
)
p3.add_tools(hover)

# Axes
p3.xaxis.axis_label = "Month"
p3.yaxis.axis_label = "Temperature (°C)"
p3.xaxis.major_label_orientation = 1

# Affichage
show(p3)

In [23]:
from bokeh.plotting import output_notebook, show
import pandas as pd

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("./datasets/daily-minimum-temperatures-in-melbourne.csv")

# Now you can proceed with your Bokeh plotting code!
print(df.head()) # Just to see if the dataframe loaded correctly


# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

df

         Date DailyTemperature
0  1981-01-01             20.7
1  1981-01-02             17.9
2  1981-01-03             18.8
3  1981-01-04             14.6
4  1981-01-05             15.8


Unnamed: 0,Date,Temperature
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
3645,1990-12-27,14.0
3646,1990-12-28,13.6
3647,1990-12-29,13.5
3648,1990-12-30,15.7


4.  Create box plots to visualize the distribution of temperatures for each year,
    and use color mapping to highlight temperature variations.

    * Extract the year from the 'Date' column and create a new 'Year' column.
    * Group the data by 'Year' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution for each year.
    * Label the x-axis with the 'Year' and the y-axis with "Temperature (°C)".
    * Use `factor_cmap` to color the boxes based on the median temperature of each year.
    * Add tooltips to display the year and relevant statistical values (min, max, median, etc.).
    * Enable pan, wheel zoom, and reset tools.

In [27]:
# Answer 4: (Provide code here)

from bokeh.palettes import Viridis256

df['Year']=df['Date'].dt.year
# Calcul des statistiques nécessaires par année
stats_yearly = df.groupby('Year').agg(
    q1=('Temperature', lambda x: np.percentile(x, 25)),
    q2=('Temperature', lambda x: np.percentile(x, 50)),  # médiane
    q3=('Temperature', lambda x: np.percentile(x, 75)),
    min_val=('Temperature', 'min'),
    max_val=('Temperature', 'max')
).reset_index()

# Préparer les données pour Bokeh
stats_yearly['Year'] = stats_yearly['Year'].astype(str)  
source = ColumnDataSource(stats_yearly)

# Création de la figure
p = figure(x_range=stats_yearly['Year'], title="Yearly Temperature Distribution (Box Plot)", width=1000, height=500, tools="pan,wheel_zoom,reset", toolbar_location="above")

# Appliquer une palette en fonction de la médiane
p.vbar(x='Year',width=0.7, bottom='q2', top='q3', source=source, fill_color=factor_cmap('Year', palette=Viridis256, factors=stats_yearly['Year'], start=1), line_color="black", legend_label="Q2 to Q3")
p.vbar(x='Year', width=0.7, bottom='q1', top='q2', source=source, fill_color=factor_cmap('Year', palette=Viridis256, factors=stats_yearly['Year'], start=1),line_color="black",legend_label="Q1 to Q2")

# Ajout des moustaches
p.segment('Year', 'max_val', 'Year', 'q3', source=source, line_color="black")
p.segment('Year', 'min_val', 'Year', 'q1', source=source, line_color="black")
p.add_layout(Whisker(source=source, base='Year', upper='max_val', lower='min_val'))

# Tooltips
hover = HoverTool(tooltips=[("Year", "@Year"),("Min", "@min_val{0.0} °C"),("Q1", "@q1{0.0} °C"),("Median", "@q2{0.0} °C"),("Q3", "@q3{0.0} °C"),("Max", "@max_val{0.0} °C"),])
p.add_tools(hover)

# Configuration des axes
p.xaxis.axis_label = "Year"
p.yaxis.axis_label = "Temperature (°C)"
p.xaxis.major_label_orientation = 1

# Affichage
show(p)

Question 5: Interactive Time Range Selection

5.  Create an interactive line plot where the user can select a specific time range
    to view using a date range slider.

    * Create a basic line plot of 'Temperature' over 'Date'.
    * Implement a date range slider using Bokeh widgets to allow users to select a start and end date.
    * Update the plot dynamically based on the selected date range.
    * Add tooltips to display the date and temperature.
    * Enable pan, wheel zoom, and reset tools.

In [31]:
# Answer 5: (Provide code here)
from bokeh.models import DateRangeSlider, CustomJS


full_source = ColumnDataSource(df)
filtered_source = ColumnDataSource(df.copy())

# Création de la figure
p = figure(x_axis_type="datetime", width=950, height=400,
           title="Daily Minimum Temperatures - Select Date Range",
           tools="pan,wheel_zoom,reset")

p.line('Date', 'Temperature', source=filtered_source, line_width=2, color='navy')

# Tooltips
p.add_tools(HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Temp", "@Temperature °C")
    ],
    formatters={'@Date': 'datetime'},
    mode='vline'
))

# Date Range Slider
slider = DateRangeSlider(
    title="Select Date Range:",
    start=df['Date'].min(),
    end=df['Date'].max(),
    value=(df['Date'].min(), df['Date'].max()),
    step=1,
    width=900
)

# Callback JavaScript
callback = CustomJS(args=dict(source=full_source, filtered=filtered_source, slider=slider), code="""
    const data = source.data;
    const fdata = filtered.data;
    const start = new Date(slider.value[0]);
    const end = new Date(slider.value[1]);
    
    const dates = data['Date'];
    const temps = data['Temperature'];
    
    fdata['Date'] = [];
    fdata['Temperature'] = [];
    
    for (let i = 0; i < dates.length; i++) {
        let date = new Date(dates[i]);
        if (start <= date && date <= end) {
            fdata['Date'].push(dates[i]);
            fdata['Temperature'].push(temps[i]);
        }
    }
    filtered.change.emit();
""")

slider.js_on_change('value', callback)

# Affichage final
show(column(p, slider))


Question 6: Time Series Decomposition Visualization

6.  Perform a simple time series decomposition to visualize the trend and seasonality
    components of the temperature data.

    * Resample the data to monthly frequency and calculate the monthly average temperature.
    * Use a simple moving average to estimate the trend component.
    * Calculate the seasonal component by subtracting the trend from the original monthly data.
    * Create three separate Bokeh plots: one for the original monthly data, one for the trend,
        and one for the seasonal component.
    * Ensure the plots are aligned and share the same x-axis (Date).
    * Add tooltips to each plot to display the date and corresponding value.
    * Enable pan, wheel zoom, and reset tools for each plot.

In [32]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, HoverTool

output_notebook()

# Chargement et préparation
df = pd.read_csv("./datasets/daily-minimum-temperatures-in-melbourne.csv")
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = pd.to_numeric(df['Temperature'].astype(str).str.replace('?', '', regex=False))

# 1. Résample mensuellement (moyenne)
monthly = df.resample('M', on='Date').mean().reset_index()

# 2. Tendance = moyenne mobile (rolling window de 12 mois)
monthly['Trend'] = monthly['Temperature'].rolling(window=12, center=True).mean()

# 3. Saisonnalité = température - tendance
monthly['Seasonality'] = monthly['Temperature'] - monthly['Trend']

# Source unique (pour synchroniser les plots)
source = ColumnDataSource(monthly)

# OUTIL commun
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%b %Y}"),
        ("Temp", "@Temperature{0.0} °C"),
        ("Trend", "@Trend{0.0} °C"),
        ("Seasonal", "@Seasonality{0.0} °C")
    ],
    formatters={"@Date": "datetime"},
    mode="vline"
)

TOOLS = "pan,wheel_zoom,reset"

# 4. Plot original
p1 = figure(title="Monthly Average Temperature", x_axis_type="datetime", tools=TOOLS, height=250, width=900)
p1.line('Date', 'Temperature', source=source, color="steelblue", line_width=2)
p1.add_tools(hover)
p1.yaxis.axis_label = "Temperature (°C)"

# 5. Plot Trend
p2 = figure(title="Trend (12-Month Moving Average)", x_axis_type="datetime", tools=TOOLS, height=250, width=900, x_range=p1.x_range)
p2.line('Date', 'Trend', source=source, color="orange", line_width=2)
p2.add_tools(hover)
p2.yaxis.axis_label = "Trend (°C)"

# 6. Plot Seasonality
p3 = figure(title="Seasonal Component", x_axis_type="datetime", tools=TOOLS, height=250, width=900, x_range=p1.x_range)
p3.line('Date', 'Seasonality', source=source, color="green", line_width=2)
p3.add_tools(hover)
p3.yaxis.axis_label = "Seasonality (°C)"

# Affichage empilé
show(column(p1, p2, p3))


  monthly = df.resample('M', on='Date').mean().reset_index()
