# Exam: Time Series Visualization with Bokeh

This exam tests your ability to visualize time series data using the Bokeh library.
You will be working with the "Daily Minimum Temperatures in Melbourne" dataset.
For each question, provide the Python code using Bokeh to generate the requested visualization.

**Dataset:** "daily-minimum-temperatures-in-melbourne.csv"

```python
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("daily-minimum-temperatures-in-melbourne.csv")

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

Question 1: Basic Time Series Line Plot
1.  Create a basic line plot showing the daily minimum temperature over time.

    * Use the 'Date' column on the x-axis and the 'Temperature' column on the y-axis.
    * Set the plot title to "Daily Minimum Temperatures".
    * Label the x-axis as "Date" and the y-axis as "Temperature (°C)".
    * Add tooltips to display the date and temperature when hovering over the line.
    * Enable pan, wheel zoom, and reset tools.


In [2]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook

# Affichage dans un notebook (à commenter si script .py)
output_notebook()

df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')

# Vérifie les noms de colonnes
print(df.columns)

df['Date'] = pd.to_datetime(df['Date'], errors='coerce')  # 'coerce' évite les crash si erreurs

df = df.dropna(subset=['Date', 'DailyTemperature'])

source = ColumnDataSource(df)

p = figure(title="Daily Minimum Temperatures",
           x_axis_label='Date',
           y_axis_label='Temperature (°C)',
           x_axis_type='datetime',
           width=800, height=400,
           tools="pan,wheel_zoom,reset")

p.line(x='Date', y='DailyTemperature', source=source,
       line_width=2, color="tomato", legend_label="Température")

hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Température", "@DailyTemperature °C")
    ],
    formatters={'@Date': 'datetime'},
    mode='vline'
)
p.add_tools(hover)

p.legend.location = "top_left"

show(p)


Index(['Date', 'DailyTemperature'], dtype='object')


Question 2: Rolling Average
2.  Calculate the 30-day rolling average of the daily minimum temperature and plot it
    alongside the original temperature data.

    * Create a new column 'Rolling_Avg' in the DataFrame containing the 30-day rolling average.
    * Plot both the original 'Temperature' and the 'Rolling_Avg' on the same plot.
    * Use different colors and line styles to distinguish between the two.
    * Add a legend to the plot to label the lines.
    * Add tooltips to display the date, original temperature, and rolling average.

In [3]:
# Answer 2: (Provide code here)

import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook

# Affichage en notebook
output_notebook()

# === Lecture et préparation des données ===
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')  # Assure-toi du bon nom

df['Date'] = pd.to_datetime(df['Date'])

# Convertir la colonne DailyTemperature en float, invalider les mauvaises valeurs
df['DailyTemperature'] = pd.to_numeric(df['DailyTemperature'], errors='coerce')

# Supprimer les lignes avec des températures manquantes ou invalides
df = df.dropna(subset=['DailyTemperature'])


# === Calcul de la moyenne mobile sur 30 jours ===
df['Rolling_Avg'] = df['DailyTemperature'].rolling(window=30).mean()


# === Création du ColumnDataSource ===
source = ColumnDataSource(df)

# === Création du graphique ===
p = figure(title="Daily Temperatures and 30-Day Rolling Average",
           x_axis_label='Date',
           y_axis_label='Temperature (°C)',
           x_axis_type='datetime',
           width=800, height=400,
           tools="pan,wheel_zoom,reset")

# === Tracés des lignes ===
p.line(x='Date', y='DailyTemperature', source=source,
       line_width=2, color="tomato", legend_label="Température")

p.line(x='Date', y='Rolling_Avg', source=source,
       line_width=2, color="navy", line_dash="dashed", legend_label="Moyenne mobile (30j)")

# === Tooltips dynamiques ===
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Température", "@DailyTemperature °C"),
        ("Moyenne mobile", "@Rolling_Avg{0.0} °C")
    ],
    formatters={'@Date': 'datetime'},
    mode='vline'
)
p.add_tools(hover)

p.legend.location = "top_left"

# === Affichage ===
show(p)


Question 3: Monthly Box Plots
3.  Create box plots to visualize the distribution of temperatures for each month.

    * Extract the month from the 'Date' column and create a new 'Month' column.
    * Group the data by 'Month' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution.
    * Label the x-axis with month names and the y-axis with "Temperature (°C)".
    * Add tooltips to display the month and relevant statistical values (min, max, media

In [4]:
# Answer 3: (Provide code here)

import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10

output_notebook()

# === Lecture et préparation des données ===
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['DailyTemperature'] = pd.to_numeric(df['DailyTemperature'], errors='coerce')
df = df.dropna(subset=['DailyTemperature'])

# === Extraire le mois et nom du mois ===
df['Month'] = df['Date'].dt.month
df['MonthName'] = df['Date'].dt.strftime('%B')  # Pour noms comme 'January'

# Ordre des mois
month_order = ['January', 'February', 'March', 'April', 'May', 'June',
               'July', 'August', 'September', 'October', 'November', 'December']

# === Calcul des stats pour box plot ===
groups = df.groupby('MonthName')
box_data = []

for month in month_order:
    month_data = df[df['MonthName'] == month]['DailyTemperature']
    q1 = month_data.quantile(0.25)
    q2 = month_data.quantile(0.50)
    q3 = month_data.quantile(0.75)
    iqr = q3 - q1
    upper = min(month_data.max(), q3 + 1.5 * iqr)
    lower = max(month_data.min(), q1 - 1.5 * iqr)
    
    box_data.append({
        'month': month,
        'q1': q1,
        'q2': q2,
        'q3': q3,
        'upper': upper,
        'lower': lower
    })

box_df = pd.DataFrame(box_data)
source = ColumnDataSource(box_df)

# === Création du graphique box plot ===
p = figure(x_range=month_order, width=900, height=400,
           title="Distribution des températures par mois",
           y_axis_label="Temperature (°C)",
           tools="pan,box_zoom,reset")

# Box (IQR)
p.vbar(x='month', width=0.7, top='q3', bottom='q1', source=source,
       fill_color=factor_cmap('month', palette=Category10[10], factors=month_order),
       line_color="black")

# Median
p.segment(x0='month', x1='month', y0='q2', y1='q2', source=source, color="black", line_width=2)

# Whiskers
p.segment(x0='month', x1='month', y0='upper', y1='q3', source=source, color="black")
p.segment(x0='month', x1='month', y0='lower', y1='q1', source=source, color="black")

# Tooltips
hover = HoverTool(tooltips=[
    ("Mois", "@month"),
    ("Min", "@lower{0.0} °C"),
    ("Q1", "@q1{0.0} °C"),
    ("Médiane", "@q2{0.0} °C"),
    ("Q3", "@q3{0.0} °C"),
    ("Max", "@upper{0.0} °C")
])
p.add_tools(hover)

# Affichage
show(p)




In [5]:
from bokeh.plotting import output_notebook, show
import pandas as pd

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("./datasets/daily-minimum-temperatures-in-melbourne.csv")

# Now you can proceed with your Bokeh plotting code!
print(df.head()) # Just to see if the dataframe loaded correctly


# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

df

         Date DailyTemperature
0  1981-01-01             20.7
1  1981-01-02             17.9
2  1981-01-03             18.8
3  1981-01-04             14.6
4  1981-01-05             15.8


Unnamed: 0,Date,Temperature
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
3645,1990-12-27,14.0
3646,1990-12-28,13.6
3647,1990-12-29,13.5
3648,1990-12-30,15.7


4.  Create box plots to visualize the distribution of temperatures for each year,
    and use color mapping to highlight temperature variations.

    * Extract the year from the 'Date' column and create a new 'Year' column.
    * Group the data by 'Year' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution for each year.
    * Label the x-axis with the 'Year' and the y-axis with "Temperature (°C)".
    * Use `factor_cmap` to color the boxes based on the median temperature of each year.
    * Add tooltips to display the year and relevant statistical values (min, max, median, etc.).
    * Enable pan, wheel zoom, and reset tools.

In [6]:
# Answer 4: (Provide code here)

import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import factor_cmap
from bokeh.palettes import Viridis256
from bokeh.io import output_notebook

output_notebook()

# === Chargement et nettoyage des données ===
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['DailyTemperature'] = pd.to_numeric(df['DailyTemperature'], errors='coerce')
df = df.dropna(subset=['DailyTemperature'])

# === Extraire l'année ===
df['Year'] = df['Date'].dt.year.astype(str)  # en str pour l’axe catégorique

# === Calcul des statistiques par année ===
year_stats = []
for year, group in df.groupby('Year'):
    temps = group['DailyTemperature']
    q1 = temps.quantile(0.25)
    q2 = temps.quantile(0.5)  # médiane
    q3 = temps.quantile(0.75)
    iqr = q3 - q1
    upper = min(temps.max(), q3 + 1.5 * iqr)
    lower = max(temps.min(), q1 - 1.5 * iqr)
    year_stats.append({
        'Year': year,
        'q1': q1,
        'q2': q2,
        'q3': q3,
        'upper': upper,
        'lower': lower
    })

stats_df = pd.DataFrame(year_stats)
stats_df = stats_df.sort_values(by='Year')
source = ColumnDataSource(stats_df)

# === Génération du box plot avec Bokeh ===
p = figure(x_range=stats_df['Year'], width=950, height=400,
           title="Distribution des températures par année",
           y_axis_label="Temperature (°C)",
           tools="pan,wheel_zoom,reset")

# === Color mapping basé sur la médiane ===
color_map = factor_cmap('Year', palette=Viridis256, factors=stats_df['Year'].tolist(), 
                        start=0, end=len(stats_df['Year']))

# === Boîtes (Q1–Q3) ===
p.vbar(x='Year', width=0.7, top='q3', bottom='q1', source=source,
       fill_color=color_map, line_color="black")

# === Médiane ===
p.segment(x0='Year', x1='Year', y0='q2', y1='q2', source=source, color="black", line_width=2)

# === Moustaches ===
p.segment(x0='Year', x1='Year', y0='upper', y1='q3', source=source, color="black")
p.segment(x0='Year', x1='Year', y0='lower', y1='q1', source=source, color="black")

# === Tooltips dynamiques ===
hover = HoverTool(tooltips=[
    ("Année", "@Year"),
    ("Min", "@lower{0.0} °C"),
    ("Q1", "@q1{0.0} °C"),
    ("Médiane", "@q2{0.0} °C"),
    ("Q3", "@q3{0.0} °C"),
    ("Max", "@upper{0.0} °C")
])
p.add_tools(hover)

# === Affichage ===
show(p)


Question 5: Interactive Time Range Selection

5.  Create an interactive line plot where the user can select a specific time range
    to view using a date range slider.

    * Create a basic line plot of 'Temperature' over 'Date'.
    * Implement a date range slider using Bokeh widgets to allow users to select a start and end date.
    * Update the plot dynamically based on the selected date range.
    * Add tooltips to display the date and temperature.
    * Enable pan, wheel zoom, and reset tools.

In [7]:
# Answer 5: (Provide code here)

import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, DateRangeSlider
from bokeh.layouts import column
from bokeh.io import output_notebook, curdoc
from bokeh.models.callbacks import CustomJS

output_notebook()

# === Lecture et préparation des données ===
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['DailyTemperature'] = pd.to_numeric(df['DailyTemperature'], errors='coerce')
df = df.dropna(subset=['DailyTemperature'])

# === Sources ===
source_all = ColumnDataSource(df)
source_filtered = ColumnDataSource(df.copy())  # Initial copy

# === Création de la figure ===
p = figure(title="Daily Minimum Temperatures - Interactive",
           x_axis_label='Date',
           y_axis_label='Temperature (°C)',
           x_axis_type='datetime',
           width=900, height=400,
           tools="pan,wheel_zoom,reset")

p.line(x='Date', y='DailyTemperature', source=source_filtered,
       line_width=2, color="tomato")

# === Tooltips ===
hover = HoverTool(tooltips=[
    ("Date", "@Date{%F}"),
    ("Température", "@DailyTemperature °C")
], formatters={'@Date': 'datetime'})
p.add_tools(hover)

# === Slider interactif ===
date_slider = DateRangeSlider(title="Sélection de la plage de dates",
                              start=df['Date'].min(), end=df['Date'].max(),
                              value=(df['Date'].min(), df['Date'].max()),
                              step=1)

# === Callback Python pour filtrer les données ===
def update(attr, old, new):
    start, end = date_slider.value_as_datetime
    filtered = df[(df['Date'] >= start) & (df['Date'] <= end)]
    source_filtered.data = ColumnDataSource.from_df(filtered)

date_slider.on_change('value', update)

# === Mise en page ===
layout = column(date_slider, p)
show(layout)


You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/js_callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



Question 6: Time Series Decomposition Visualization

6.  Perform a simple time series decomposition to visualize the trend and seasonality
    components of the temperature data.

    * Resample the data to monthly frequency and calculate the monthly average temperature.
    * Use a simple moving average to estimate the trend component.
    * Calculate the seasonal component by subtracting the trend from the original monthly data.
    * Create three separate Bokeh plots: one for the original monthly data, one for the trend,
        and one for the seasonal component.
    * Ensure the plots are aligned and share the same x-axis (Date).
    * Add tooltips to each plot to display the date and corresponding value.
    * Enable pan, wheel zoom, and reset tools for each plot.

In [8]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.io import output_notebook

output_notebook()

# === Lecture et prétraitement ===
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['DailyTemperature'] = pd.to_numeric(df['DailyTemperature'], errors='coerce')
df = df.dropna(subset=['DailyTemperature'])

# === Resample mensuel ===
monthly_df = df.resample('M', on='Date').mean().reset_index()

# === Tendance par moyenne mobile (simple) sur 12 mois ===
monthly_df['Trend'] = monthly_df['DailyTemperature'].rolling(window=12, center=True).mean()

# === Saison (approximation) = Donnée - Tendance ===
monthly_df['Seasonality'] = monthly_df['DailyTemperature'] - monthly_df['Trend']

# === Sources Bokeh ===
source = ColumnDataSource(monthly_df)

# === Fonction pour créer un graphe avec HoverTool partagé ===
def make_plot(y, title, color):
    p = figure(title=title,
               x_axis_type='datetime',
               width=900, height=250,
               tools="pan,wheel_zoom,reset")
    
    p.line(x='Date', y=y, source=source, color=color, line_width=2)

    hover = HoverTool(tooltips=[
        ("Date", "@Date{%F}"),
        (title, f"@{y}{{0.00}} °C")
    ], formatters={'@Date': 'datetime'})
    
    p.add_tools(hover)
    p.yaxis.axis_label = "Température (°C)"
    return p

# === Création des 3 graphiques ===
p1 = make_plot('DailyTemperature', "Température moyenne mensuelle", "tomato")
p2 = make_plot('Trend', "Composante de tendance (Moyenne mobile 12 mois)", "navy")
p3 = make_plot('Seasonality', "Composante saisonnière (approx.)", "seagreen")

# === Affichage en colonne alignée ===
show(column(p1, p2, p3))


  monthly_df = df.resample('M', on='Date').mean().reset_index()
