# Exam: Time Series Visualization with Bokeh

This exam tests your ability to visualize time series data using the Bokeh library.
You will be working with the "Daily Minimum Temperatures in Melbourne" dataset.
For each question, provide the Python code using Bokeh to generate the requested visualization.

**Dataset:** "daily-minimum-temperatures-in-melbourne.csv"

```python
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("daily-minimum-temperatures-in-melbourne.csv")

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

In [1]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

Question 1: Basic Time Series Line Plot
1.  Create a basic line plot showing the daily minimum temperature over time.

    * Use the 'Date' column on the x-axis and the 'Temperature' column on the y-axis.
    * Set the plot title to "Daily Minimum Temperatures".
    * Label the x-axis as "Date" and the y-axis as "Temperature (°C)".
    * Add tooltips to display the date and temperature when hovering over the line.
    * Enable pan, wheel zoom, and reset tools.


In [2]:
# Réponse 1
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool

source = ColumnDataSource(df)

p = figure(
    title="Daily Minimum Temperatures",
    x_axis_label="Date",
    y_axis_label="Temperature (°C)",
    x_axis_type="datetime",
    tools="pan,wheel_zoom,reset,save",
    width=800,
    height=400
)

line = p.line(x='Date', y='Temperature', source=source, 
              line_width=1.5, color='navy', legend_label='Température quotidienne')

hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Température", "@Temperature{0.0} °C")
    ],
    formatters={'@Date': 'datetime'},
    mode='vline'
)
p.add_tools(hover)

p.xaxis.formatter = DatetimeTickFormatter(months="%b %Y", days="%d %b")
p.yaxis.formatter = NumeralTickFormatter(format="0.0")

p.legend.location = "top_left"
show(p)

Question 2: Rolling Average
2.  Calculate the 30-day rolling average of the daily minimum temperature and plot it
    alongside the original temperature data.

    * Create a new column 'Rolling_Avg' in the DataFrame containing the 30-day rolling average.
    * Plot both the original 'Temperature' and the 'Rolling_Avg' on the same plot.
    * Use different colors and line styles to distinguish between the two.
    * Add a legend to the plot to label the lines.
    * Add tooltips to display the date, original temperature, and rolling average.

In [3]:
# Réponse 2
df['Rolling_Avg'] = df['Temperature'].rolling(window=30, min_periods=1).mean()

source_avg = ColumnDataSource(df)

p = figure(
    title="Températures avec moyenne mobile (30 jours)",
    x_axis_type="datetime",
    width=800,
    height=400,
    tools="pan,wheel_zoom,reset,save"
)

p.line('Date', 'Temperature', source=source_avg, 
       color='navy', alpha=0.4, legend_label='Quotidienne')

p.line('Date', 'Rolling_Avg', source=source_avg, 
       color='red', line_width=2, legend_label='Moyenne mobile (30j)')

hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Température", "@Temperature{0.0} °C"),
        ("Moyenne", "@Rolling_Avg{0.0} °C")
    ],
    formatters={'@Date': 'datetime'}
)
p.add_tools(hover)

p.yaxis.axis_label = "Température (°C)"
p.xaxis.axis_label = "Date"
p.xaxis.formatter = DatetimeTickFormatter(months="%b %Y")

show(p)


Question 3: Monthly Box Plots
3.  Create box plots to visualize the distribution of temperatures for each month.

    * Extract the month from the 'Date' column and create a new 'Month' column.
    * Group the data by 'Month' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution.
    * Label the x-axis with month names and the y-axis with "Temperature (°C)".
    * Add tooltips to display the month and relevant statistical values (min, max, media

In [4]:
# Réponse 3
from bokeh.models import Whisker
import pandas as pd

df['Month'] = df['Date'].dt.month_name()
df['Month_num'] = df['Date'].dt.month  # Pour le tri
df = df.sort_values('Month_num')

groups = df.groupby('Month')['Temperature']
q = groups.quantile([0.25, 0.5, 0.75]).unstack()
q = q.rename(columns={0.25: 'q1', 0.5: 'median', 0.75: 'q3'}).reset_index()

q['upper'] = groups.max()
q['lower'] = groups.min()

source_box = ColumnDataSource(q)

p = figure(
    x_range=q['Month'].tolist(),  # Ordre chronologique garanti
    title="Distribution mensuelle des températures",
    width=800,
    height=400,
    tools=""
)

p.vbar(
    x='Month', 
    top='q3', 
    bottom='q1', 
    source=source_box, 
    width=0.7, 
    alpha=0.5,
    line_color="black", 
    fill_color="navy"
)

p.segment(
    x0='Month', y0='median', 
    x1='Month', y1='median',
    source=source_box, 
    line_width=2, 
    color='red'
)

whisker = Whisker(
    base='Month',
    upper='upper',
    lower='lower',
    source=source_box,
    line_color="black"
)
p.add_layout(whisker)

p.xaxis.axis_label = "Mois"
p.yaxis.axis_label = "Température (°C)"
p.xgrid.grid_line_color = None

show(p)

In [5]:
from bokeh.plotting import output_notebook, show
import pandas as pd

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("./datasets/daily-minimum-temperatures-in-melbourne.csv")

# Now you can proceed with your Bokeh plotting code!
print(df.head()) # Just to see if the dataframe loaded correctly


# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

df

         Date DailyTemperature
0  1981-01-01             20.7
1  1981-01-02             17.9
2  1981-01-03             18.8
3  1981-01-04             14.6
4  1981-01-05             15.8


Unnamed: 0,Date,Temperature
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
3645,1990-12-27,14.0
3646,1990-12-28,13.6
3647,1990-12-29,13.5
3648,1990-12-30,15.7


4.  Create box plots to visualize the distribution of temperatures for each year,
    and use color mapping to highlight temperature variations.

    * Extract the year from the 'Date' column and create a new 'Year' column.
    * Group the data by 'Year' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution for each year.
    * Label the x-axis with the 'Year' and the y-axis with "Temperature (°C)".
    * Use `factor_cmap` to color the boxes based on the median temperature of each year.
    * Add tooltips to display the year and relevant statistical values (min, max, median, etc.).
    * Enable pan, wheel zoom, and reset tools.

In [6]:
# Réponse 4
from bokeh.transform import factor_cmap
from bokeh.palettes import Plasma256

df['Year'] = df['Date'].dt.year.astype(str)
years = sorted(df['Year'].unique())

groups = df.groupby('Year')['Temperature']
stats = groups.agg(['min', 'max', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)])
stats = stats.rename(columns={
    'median': 'q2',
    '<lambda_0>': 'q1',
    '<lambda_1>': 'q3'
}).reset_index()

colors = factor_cmap(
    'Year', 
    palette=Plasma256[::len(Plasma256)//len(years)], 
    factors=years
)

source = ColumnDataSource(stats)

p = figure(
    x_range=years,
    title="Distribution annuelle avec coloration",
    width=800,
    height=400,
    tools="pan,wheel_zoom,reset"
)

p.vbar(
    x='Year', 
    top='q3', 
    bottom='q1', 
    source=source,
    width=0.7, 
    line_color="black", 
    fill_color=colors,
    legend_field='Year'
)

p.segment(
    x0='Year', y0='q2',
    x1='Year', y1='q2',
    source=source,
    line_width=3,
    color='white'
)

whisker = Whisker(
    base='Year',
    upper='max',
    lower='min',
    source=source,
    line_color="black"
)
p.add_layout(whisker)

p.xaxis.axis_label = "Année"
p.yaxis.axis_label = "Température (°C)"
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"

show(p)

Question 5: Interactive Time Range Selection

5.  Create an interactive line plot where the user can select a specific time range
    to view using a date range slider.

    * Create a basic line plot of 'Temperature' over 'Date'.
    * Implement a date range slider using Bokeh widgets to allow users to select a start and end date.
    * Update the plot dynamically based on the selected date range.
    * Add tooltips to display the date and temperature.
    * Enable pan, wheel zoom, and reset tools.

In [7]:
# Réponse 5
from bokeh.models import DateRangeSlider, CustomJS
from bokeh.layouts import column
from bokeh.plotting import figure

p = figure(
    title="Températures avec sélection de plage temporelle",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Température (°C)",
    tools="pan,wheel_zoom,reset,save",
    width=800,
    height=400
)

p.line(x='Date', y='Temperature', source=ColumnDataSource(df), 
       line_width=1, color='navy', legend_label='Température quotidienne')

date_slider = DateRangeSlider(
    title="Sélectionnez la plage temporelle",
    start=df['Date'].min(),
    end=df['Date'].max(),
    value=(df['Date'].min(), df['Date'].max()),
    step=1,
    width=800
)

callback = CustomJS(args=dict(x_range=p.x_range), code="""
    x_range.start = cb_obj.value[0]
    x_range.end = cb_obj.value[1]
""")
date_slider.js_on_change('value', callback)

show(column(date_slider, p))

Question 6: Time Series Decomposition Visualization

6.  Perform a simple time series decomposition to visualize the trend and seasonality
    components of the temperature data.

    * Resample the data to monthly frequency and calculate the monthly average temperature.
    * Use a simple moving average to estimate the trend component.
    * Calculate the seasonal component by subtracting the trend from the original monthly data.
    * Create three separate Bokeh plots: one for the original monthly data, one for the trend,
        and one for the seasonal component.
    * Ensure the plots are aligned and share the same x-axis (Date).
    * Add tooltips to each plot to display the date and corresponding value.
    * Enable pan, wheel zoom, and reset tools for each plot.

In [8]:
# Réponse 6
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, DatetimeTickFormatter
from bokeh.layouts import column
import pandas as pd

df = pd.read_csv('datasets/daily-minimum-temperatures-in-melbourne.csv')
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

monthly_df = df.resample('M', on='Date').mean().reset_index()

monthly_df['Trend'] = monthly_df['Temperature'].rolling(window=12, center=True, min_periods=1).mean()

monthly_df['Seasonal'] = monthly_df['Temperature'] - monthly_df['Trend']

source_orig = ColumnDataSource(monthly_df)
source_trend = ColumnDataSource(monthly_df)
source_seasonal = ColumnDataSource(monthly_df)

p1 = figure(title="Température mensuelle moyenne", x_axis_type="datetime",
            width=800, height=250, tools="pan,wheel_zoom,reset", toolbar_location="above")
p1.line(x='Date', y='Temperature', source=source_orig, color='navy')
p1.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Température", "@Temperature{0.0} °C")],
                       formatters={'@Date': 'datetime'}))
p1.yaxis.axis_label = "Température (°C)"

p2 = figure(title="Composante de tendance", x_axis_type="datetime",
            width=800, height=250, tools="pan,wheel_zoom,reset", toolbar_location="above",
            x_range=p1.x_range)
p2.line(x='Date', y='Trend', source=source_trend, color='green')
p2.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Tendance", "@Trend{0.0} °C")],
                       formatters={'@Date': 'datetime'}))
p2.yaxis.axis_label = "Tendance (°C)"

p3 = figure(title="Composante saisonnière", x_axis_type="datetime",
            width=800, height=250, tools="pan,wheel_zoom,reset", toolbar_location="above",
            x_range=p1.x_range)
p3.line(x='Date', y='Seasonal', source=source_seasonal, color='orange')
p3.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Saisonnalité", "@Seasonal{0.0} °C")],
                       formatters={'@Date': 'datetime'}))
p3.yaxis.axis_label = "Saisonnalité (°C)"

show(column(p1, p2, p3))

  monthly_df = df.resample('M', on='Date').mean().reset_index()
