# Exam: Time Series Visualization with Bokeh

This exam tests your ability to visualize time series data using the Bokeh library.
You will be working with the "Daily Minimum Temperatures in Melbourne" dataset.
For each question, provide the Python code using Bokeh to generate the requested visualization.

**Dataset:** "daily-minimum-temperatures-in-melbourne.csv"

In [1]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
    CustomJS,
    DateRangeSlider,

)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20_20
from pathlib import Path
import calendar
import numpy as np

DATA_PATH = Path(".")

df = pd.read_csv("./datasets/AirPassengersDates.csv")

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

output_notebook()

source = ColumnDataSource(df)

In [2]:
print(df.head())
print(df.dtypes)

        Date  Temperature
0 1949-01-12          112
1 1949-02-24          118
2 1949-03-22          132
3 1949-04-05          129
4 1949-05-24          121
Date           datetime64[ns]
Temperature             int64
dtype: object


Question 1: Basic Time Series Line Plot
1.  Create a basic line plot showing the daily minimum temperature over time.

    * Use the 'Date' column on the x-axis and the 'Temperature' column on the y-axis.
    * Set the plot title to "Daily Minimum Temperatures".
    * Label the x-axis as "Date" and the y-axis as "Temperature (°C)".
    * Add tooltips to display the date and temperature when hovering over the line.
    * Enable pan, wheel zoom, and reset tools.


In [3]:
p = figure(
    title="Daily Minimum Temperatures",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Temperature (°C)",
    tools="pan,wheel_zoom,reset"
)
p.line(
    x='Date',
    y='Temperature',
    source=source,
    line_width=2,
    legend_label="Température journalière"
)
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%Y-%m-%d}"),
        ("Température", "@Temperature{0.0} °C")
    ],
    formatters={
        '@Date': 'datetime',     # formate la date
        '@Temperature': 'numeral'  # formate la valeur numérique
    },
    mode='vline'
)
p.add_tools(hover)

show(p)

Question 2: Rolling Average
2.  Calculate the 30-day rolling average of the daily minimum temperature and plot it
    alongside the original temperature data.

    * Create a new column 'Rolling_Avg' in the DataFrame containing the 30-day rolling average.
    * Plot both the original 'Temperature' and the 'Rolling_Avg' on the same plot.
    * Use different colors and line styles to distinguish between the two.
    * Add a legend to the plot to label the lines.
    * Add tooltips to display the date, original temperature, and rolling average.

In [4]:
df['Rolling_Avg'] = df['Temperature'].rolling(window=30, min_periods=1).mean()

source = ColumnDataSource(df)

p = figure(
    title="Daily Minimum Temperatures with 30-Day Rolling Average",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Temperature (°C)",
    tools="pan,wheel_zoom,reset"
)
p.line(
    x='Date',
    y='Temperature',
    source=source,
    line_width=2,
    color='navy',
    legend_label="Original Temperature"
)
p.line(
    x='Date',
    y='Rolling_Avg',
    source=source,
    line_width=2,
    line_dash='dashed',
    color='orange',
    legend_label="30-Day Rolling Avg"
)
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%Y-%m-%d}"),
        ("Temp", "@Temperature{0.0} °C"),
        ("Rolling Avg", "@Rolling_Avg{0.0} °C")
    ],
    formatters={
        '@Date': 'datetime',
        '@Temperature': 'numeral',
        '@Rolling_Avg': 'numeral'
    },
    mode='vline'
)
p.add_tools(hover)

show(p)

Question 3: Monthly Box Plots
3.  Create box plots to visualize the distribution of temperatures for each month.

    * Extract the month from the 'Date' column and create a new 'Month' column.
    * Group the data by 'Month' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution.
    * Label the x-axis with month names and the y-axis with "Temperature (°C)".
    * Add tooltips to display the month and relevant statistical values (min, max, media

In [5]:
df['Month'] = df['Date'].dt.month

group = df.groupby('Month')['Temperature']
q1 = group.quantile(0.25)
q2 = group.quantile(0.50)
q3 = group.quantile(0.75)
iqr = q3 - q1
upper_whisker = np.minimum(q3 + 1.5 * iqr, group.max())
lower_whisker = np.maximum(q1 - 1.5 * iqr, group.min())

months = q1.index.tolist()
month_names = [calendar.month_abbr[m] for m in months]
source_box = ColumnDataSource(data=dict(
    month=month_names,
    q1=q1.values,
    q2=q2.values,
    q3=q3.values,
    upper=upper_whisker.values,
    lower=lower_whisker.values
))

p = figure(
    title="Distribution mensuelle des températures",
    x_range=month_names,
    x_axis_label="Mois",
    y_axis_label="Temperature (°C)",
    tools="pan,wheel_zoom,reset"
)

p.segment('month', 'upper', 'month', 'q3', source=source_box, line_color="black")
p.segment('month', 'lower', 'month', 'q1', source=source_box, line_color="black")

p.vbar('month', 0.7, 'q2', 'q3', source=source_box, fill_color="#E08E79", line_color="black")
p.vbar('month', 0.7, 'q1', 'q2', source=source_box, fill_color="#3B8686", line_color="black")

p.rect('month', 'q2', width=0.2, height=0.01, source=source_box, line_color="black")

hover = HoverTool(
    tooltips=[
        ("Mois", "@month"),
        ("Min Whisker", "@lower{0.0} °C"),
        ("Q1", "@q1{0.0} °C"),
        ("Médiane", "@q2{0.0} °C"),
        ("Q3", "@q3{0.0} °C"),
        ("Max Whisker", "@upper{0.0} °C")
    ],
    formatters={
        '@lower': 'numeral',
        '@q1': 'numeral',
        '@q2': 'numeral',
        '@q3': 'numeral',
        '@upper': 'numeral'
    },
    mode='vline'
)
p.add_tools(hover)

show(p)

4.  Create box plots to visualize the distribution of temperatures for each year,
    and use color mapping to highlight temperature variations.

    * Extract the year from the 'Date' column and create a new 'Year' column.
    * Group the data by 'Year' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution for each year.
    * Label the x-axis with the 'Year' and the y-axis with "Temperature (°C)".
    * Use `factor_cmap` to color the boxes based on the median temperature of each year.
    * Add tooltips to display the year and relevant statistical values (min, max, median, etc.).
    * Enable pan, wheel zoom, and reset tools.

In [6]:
df['Year'] = df['Date'].dt.year

group_year = df.groupby('Year')['Temperature']
q1 = group_year.quantile(0.25)
q2 = group_year.quantile(0.50)
q3 = group_year.quantile(0.75)
iqr = q3 - q1
upper_whisker = np.minimum(q3 + 1.5 * iqr, group_year.max())
lower_whisker = np.maximum(q1 - 1.5 * iqr, group_year.min())

medians = q2.sort_values()
years_sorted = medians.index.tolist()
palette = Category20_20[: len(years_sorted)]

source_year = ColumnDataSource(data=dict(
    Year=[str(y) for y in years_sorted],
    q1=q1[years_sorted].values,
    q2=q2[years_sorted].values,
    q3=q3[years_sorted].values,
    upper=upper_whisker[years_sorted].values,
    lower=lower_whisker[years_sorted].values
))

p = figure(
    title="Distribution annuelle des températures",
    x_range=[str(y) for y in years_sorted],
    x_axis_label="Année",
    y_axis_label="Temperature (°C)",
    tools="pan,wheel_zoom,reset"
)

p.segment('Year', 'upper', 'Year', 'q3', source=source_year, line_color="black")
p.segment('Year', 'lower', 'Year', 'q1', source=source_year, line_color="black")

p.vbar('Year', 0.7, 'q2', 'q3', source=source_year,
       fill_color=factor_cmap('Year', palette=palette, factors=[str(y) for y in years_sorted]),
       line_color="black")
p.vbar('Year', 0.7, 'q1', 'q2', source=source_year,
       fill_color=factor_cmap('Year', palette=palette, factors=[str(y) for y in years_sorted]),
       line_color="black")

p.rect('Year', 'q2', width=0.2, height=0.01, source=source_year, line_color="black")

hover = HoverTool(
    tooltips=[
        ("Année", "@Year"),
        ("Min Whisker", "@lower{0.0} °C"),
        ("Q1", "@q1{0.0} °C"),
        ("Médiane", "@q2{0.0} °C"),
        ("Q3", "@q3{0.0} °C"),
        ("Max Whisker", "@upper{0.0} °C")
    ],
    formatters={
        '@lower': 'numeral',
        '@q1': 'numeral',
        '@q2': 'numeral',
        '@q3': 'numeral',
        '@upper': 'numeral'
    },
    mode='vline'
)
p.add_tools(hover)

show(p)


Question 5: Interactive Time Range Selection

5.  Create an interactive line plot where the user can select a specific time range
    to view using a date range slider.

    * Create a basic line plot of 'Temperature' over 'Date'.
    * Implement a date range slider using Bokeh widgets to allow users to select a start and end date.
    * Update the plot dynamically based on the selected date range.
    * Add tooltips to display the date and temperature.
    * Enable pan, wheel zoom, and reset tools.

In [7]:
source_complet = ColumnDataSource(df)
source_filtrée = ColumnDataSource(df)

p = figure(
    title="Températures minimales journalières - plage dynamique",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Température (°C)",
    tools="pan,wheel_zoom,reset"
)

p.line('Date', 'Temperature', source=source_filtrée, line_width=2, legend_label="Température")

hover = HoverTool(
    tooltips=[("Date", "@Date{%Y-%m-%d}"), ("Température", "@Temperature{0.0} °C")],
    formatters={'@Date': 'datetime', '@Temperature': 'numeral'},
    mode='vline'
)
p.add_tools(hover)

date_min = df['Date'].min()
date_max = df['Date'].max()
curseur_plage = DateRangeSlider(
    title="Sélection de la plage de dates",
    start=date_min,
    end=date_max,
    value=(date_min, date_max),
    step=24*60*60*1000
)

callback = CustomJS(args=dict(source_complet=source_complet, source_filtrée=source_filtrée), code="""
    const data_complet = source_complet.data;
    const new_data = { Date: [], Temperature: [] };
    const [start, end] = cb_obj.value;
    for (let i = 0; i < data_complet['Date'].length; i++) {
        const d = data_complet['Date'][i];
        if (d >= start && d <= end) {
            new_data['Date'].push(d);
            new_data['Temperature'].push(data_complet['Temperature'][i]);
        }
    }
    source_filtrée.data = new_data;
    source_filtrée.change.emit();
""")
curseur_plage.js_on_change('value', callback)

layout = column(curseur_plage, p)
show(layout)

Question 6: Time Series Decomposition Visualization

6.  Perform a simple time series decomposition to visualize the trend and seasonality
    components of the temperature data.

    * Resample the data to monthly frequency and calculate the monthly average temperature.
    * Use a simple moving average to estimate the trend component.
    * Calculate the seasonal component by subtracting the trend from the original monthly data.
    * Create three separate Bokeh plots: one for the original monthly data, one for the trend,
        and one for the seasonal component.
    * Ensure the plots are aligned and share the same x-axis (Date).
    * Add tooltips to each plot to display the date and corresponding value.
    * Enable pan, wheel zoom, and reset tools for each plot.

In [8]:
df_mensuel = df.set_index('Date').resample('M').mean().reset_index()

df_mensuel['Tendance'] = df_mensuel['Temperature'].rolling(window=12, center=True, min_periods=1).mean()

df_mensuel['seasonal'] = df_mensuel['Temperature'] - df_mensuel['Tendance']

source_mensuel = ColumnDataSource(df_mensuel[['Date', 'Temperature']])
source_tendance = ColumnDataSource(df_mensuel[['Date', 'Tendance']])
source_seasonal = ColumnDataSource(df_mensuel[['Date', 'seasonal']])

p_orig = figure(
    title="Données mensuelles originales",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Température (°C)",
    tools="pan,wheel_zoom,reset"
)
p_trend = figure(
    title="Composante Tendance",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Tendance (°C)",
    x_range=p_orig.x_range,
    tools="pan,wheel_zoom,reset"
)
p_saison = figure(
    title="Composante Saisonnière",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Anomalie (°C)",
    x_range=p_orig.x_range,
    tools="pan,wheel_zoom,reset"
)

p_orig.line('Date', 'Temperature', source=source_mensuel, line_width=2, legend_label="Original")
p_trend.line('Date', 'Tendance', source=source_tendance, line_width=2, line_dash='dashed', legend_label="Tendance")
p_saison.line('Date', 'seasonal', source=source_seasonal, line_width=2, line_dash='dotted', legend_label="Saisonnalité")

hover_orig = HoverTool(
    tooltips=[("Date", "@Date{%Y-%m}"), ("Température", "@Temperature{0.0} °C")],
    formatters={'@Date': 'datetime', '@Temperature': 'numeral'},
    mode='vline'
)
hover_trend = HoverTool(
    tooltips=[("Date", "@Date{%Y-%m}"), ("Tendance", "@Tendance{0.0} °C")],
    formatters={'@Date': 'datetime', '@Tendance': 'numeral'},
    mode='vline'
)
hover_saison = HoverTool(
    tooltips=[("Date", "@Date{%Y-%m}"), ("Saisonnalité", "@seasonal{0.0} °C")],
    formatters={'@Date': 'datetime', '@seasonal': 'numeral'},
    mode='vline'
)
p_orig.add_tools(hover_orig)
p_trend.add_tools(hover_trend)
p_saison.add_tools(hover_saison)

layout = column(p_orig, p_trend, p_saison)
show(layout)


  df_mensuel = df.set_index('Date').resample('M').mean().reset_index()
