# Exam: Time Series Visualization with Bokeh

This exam tests your ability to visualize time series data using the Bokeh library.
You will be working with the "Daily Minimum Temperatures in Melbourne" dataset.
For each question, provide the Python code using Bokeh to generate the requested visualization.

**Dataset:** "daily-minimum-temperatures-in-melbourne.csv"

```python
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap

output_notebook()  # Enable Bokeh output in Jupyter Notebook

# Load the Dataset
df = pd.read_csv("daily-minimum-temperatures-in-melbourne.csv")

# Rename columns for clarity
df.columns = ['Date', 'Temperature']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove '?' from the 'Temperature' column and convert to numeric
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])

In [None]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap
output_notebook()  
df = pd.read_csv("datasets/daily-minimum-temperatures-in-melbourne.csv")
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])
print(df.head(15))

         Date  Temperature
0  1981-01-01         20.7
1  1981-01-02         17.9
2  1981-01-03         18.8
3  1981-01-04         14.6
4  1981-01-05         15.8
5  1981-01-06         15.8
6  1981-01-07         15.8
7  1981-01-08         17.4
8  1981-01-09         21.8
9  1981-01-10         20.0
10 1981-01-11         16.2
11 1981-01-12         13.3
12 1981-01-13         16.7
13 1981-01-14         21.5
14 1981-01-15         25.0


Question 1: Basic Time Series Line Plot
1.  Create a basic line plot showing the daily minimum temperature over time.

    * Use the 'Date' column on the x-axis and the 'Temperature' column on the y-axis.
    * Set the plot title to "Daily Minimum Temperatures".
    * Label the x-axis as "Date" and the y-axis as "Temperature (°C)".
    * Add tooltips to display the date and temperature when hovering over the line.
    * Enable pan, wheel zoom, and reset tools.


In [None]:
output_notebook()
source = ColumnDataSource(df)
p = figure(
    title="Daily Minimum Temperatures",
    x_axis_type="datetime",
    width=800,
    height=400,
    tools="pan,wheel_zoom,reset"
)
p.line(x='Date', y='Temperature', source=source, line_width=2, color='navy', legend_label="Temperature")
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Temperature (°C)", "@Temperature")
    ],
    formatters={
        "@Date": "datetime"
    },
    mode='vline'
)
p.add_tools(hover)
p.xaxis.axis_label = "Date"
p.yaxis.axis_label = "Temperature (°C)"
show(p)

Question 2: Rolling Average
2.  Calculate the 30-day rolling average of the daily minimum temperature and plot it
    alongside the original temperature data.

    * Create a new column 'Rolling_Avg' in the DataFrame containing the 30-day rolling average.
    * Plot both the original 'Temperature' and the 'Rolling_Avg' on the same plot.
    * Use different colors and line styles to distinguish between the two.
    * Add a legend to the plot to label the lines.
    * Add tooltips to display the date, original temperature, and rolling average.

In [None]:
df['Rolling_Avg']=df['Temperature'].rolling(window=30).mean()
source=ColumnDataSource(df)
p = figure(
    title="Daily Minimum Temperatures with 30-Day Rolling Average",
    x_axis_type="datetime",
    width=900,
    height=400,
    tools="pan,wheel_zoom,reset"
)
p.line('Date','Temperature', source=source, line_width=2, color='navy', legend_label="Daily Temperature")
p.line('Date','Rolling_Avg', source=source, line_width=3, color='pink', line_dash='dashed', legend_label="30-Day Rolling Avg")
hover = HoverTool(
    tooltips=[
        ("Date", "@Date{%F}"),
        ("Temperature (°C)", "@Temperature{0.0}"),
        ("30-Day Avg (°C)", "@Rolling_Avg{0.0}")
    ],
    formatters={
        "@Date": "datetime"
    },
    mode='vline'
)
p.add_tools(hover)
p.xaxis.axis_label = "Date"
p.yaxis.axis_label = "Temperature (°C)"
p.legend.location = "top_left"
p.legend.click_policy = "hide"  
show(p)

Question 3: Monthly Box Plots
3.  Create box plots to visualize the distribution of temperatures for each month.

    * Extract the month from the 'Date' column and create a new 'Month' column.
    * Group the data by 'Month' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution.
    * Label the x-axis with month names and the y-axis with "Temperature (°C)".
    * Add tooltips to display the month and relevant statistical values (min, max, media

In [None]:
import pandas as pd
from bokeh.models import FactorRange
from bokeh.transform import factor_cmap
import numpy as np
output_notebook()
df['Month'] = df['Date'].dt.strftime('%B') 
df['Month_num'] = df['Date'].dt.month      
df = df.sort_values('Month_num')
month_order = df[['Month', 'Month_num']].drop_duplicates().sort_values('Month_num')['Month'].tolist()
grouped = df.groupby('Month')
q1 = grouped['Temperature'].quantile(0.25)
q2 = grouped['Temperature'].quantile(0.50)
q3 = grouped['Temperature'].quantile(0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr
def cap(series, bound):
    return np.minimum(series, grouped['Temperature'].max()) if bound == 'upper' else np.maximum(series, grouped['Temperature'].min())
upper = cap(upper, 'upper')
lower = cap(lower, 'lower')
source = ColumnDataSource(data=dict(
    month=q1.index.tolist(),
    q1=q1.values,
    q2=q2.values,
    q3=q3.values,
    upper=upper.values,
    lower=lower.values
))
p = figure(
    x_range=month_order,
    title="Monthly Distribution of Daily Minimum Temperatures",
    width=900,
    height=400,
    tools="pan,box_zoom,reset"
)
p.vbar(x='month', top='q3', bottom='q1', width=0.7, source=source,
       fill_color="skyblue", line_color="black")
p.segment('month', 'q2', 'month', 'q2', source=source,
          line_color="black", line_width=2)
p.segment('month', 'upper', 'month', 'q3', source=source, line_color="black")
p.segment('month', 'lower', 'month', 'q1', source=source, line_color="black")
hover = HoverTool(
    tooltips=[
        ("Month", "@month"),
        ("Min", "@lower{0.0}"),
        ("Q1", "@q1{0.0}"),
        ("Median", "@q2{0.0}"),
        ("Q3", "@q3{0.0}"),
        ("Max", "@upper{0.0}")
    ]
)
p.add_tools(hover)
p.xaxis.axis_label = "Month"
p.yaxis.axis_label = "Temperature (°C)"
p.xgrid.grid_line_color = None
show(p)

In [None]:
from bokeh.plotting import output_notebook, show
import pandas as pd
output_notebook()  
df = pd.read_csv("./datasets/daily-minimum-temperatures-in-melbourne.csv")
print(df.head()) 
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])
df

         Date DailyTemperature
0  1981-01-01             20.7
1  1981-01-02             17.9
2  1981-01-03             18.8
3  1981-01-04             14.6
4  1981-01-05             15.8


Unnamed: 0,Date,Temperature
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
3645,1990-12-27,14.0
3646,1990-12-28,13.6
3647,1990-12-29,13.5
3648,1990-12-30,15.7


4.  Create box plots to visualize the distribution of temperatures for each year,
    and use color mapping to highlight temperature variations.

    * Extract the year from the 'Date' column and create a new 'Year' column.
    * Group the data by 'Year' and prepare it for plotting.
    * Use Bokeh's box plot elements to visualize the distribution for each year.
    * Label the x-axis with the 'Year' and the y-axis with "Temperature (°C)".
    * Use `factor_cmap` to color the boxes based on the median temperature of each year.
    * Add tooltips to display the year and relevant statistical values (min, max, median, etc.).
    * Enable pan, wheel zoom, and reset tools.

In [None]:
from bokeh.palettes import Plasma256
output_notebook()
df['Year'] = df['Date'].dt.year
grouped = df.groupby('Year')
q1 = grouped['Temperature'].quantile(0.25)
q2 = grouped['Temperature'].quantile(0.50)  
q3 = grouped['Temperature'].quantile(0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr
upper = np.minimum(upper, grouped['Temperature'].max())
lower = np.maximum(lower, grouped['Temperature'].min())
years = q1.index.astype(str).tolist() 
source = ColumnDataSource(data=dict(
    year=years,
    q1=q1.values,
    q2=q2.values,
    q3=q3.values,
    upper=upper.values,
    lower=lower.values,
    median=q2.values 
))
color_map = factor_cmap(
    'year',
    palette=Plasma256,
    factors=years,
    start=0,
    end=len(years)
)
p = figure(
    title="Yearly Distribution of Daily Minimum Temperatures",
    x_range=years,
    x_axis_label="Year",
    y_axis_label="Temperature (°C)",
    width=1000,
    height=450,
    tools="pan,wheel_zoom,reset"
)
p.vbar(x='year', top='q3', bottom='q1', width=0.7, source=source,
       fill_color=color_map, line_color="black")
p.segment('year', 'q2', 'year', 'q2', source=source,
          line_color="black", line_width=2)
p.segment('year', 'upper', 'year', 'q3', source=source, line_color="black")
p.segment('year', 'lower', 'year', 'q1', source=source, line_color="black")
hover = HoverTool(
    tooltips=[
        ("Year", "@year"),
        ("Min", "@lower{0.0}"),
        ("Q1", "@q1{0.0}"),
        ("Median", "@q2{0.0}"),
        ("Q3", "@q3{0.0}"),
        ("Max", "@upper{0.0}")
    ]
)
p.add_tools(hover)
p.xaxis.major_label_orientation = 1.0
p.xgrid.grid_line_color = None
show(p)


Question 5: Interactive Time Range Selection

5.  Create an interactive line plot where the user can select a specific time range
    to view using a date range slider.

    * Create a basic line plot of 'Temperature' over 'Date'.
    * Implement a date range slider using Bokeh widgets to allow users to select a start and end date.
    * Update the plot dynamically based on the selected date range.
    * Add tooltips to display the date and temperature.
    * Enable pan, wheel zoom, and reset tools.

In [None]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, DateRangeSlider, CustomJS
from bokeh.layouts import column
output_notebook()
df_sorted = df.sort_values("Date").copy()
df_sorted["Date_ms"] = df_sorted["Date"].astype("int64") // 10**6  # En millisecondes pour JS
source_all = ColumnDataSource(data=dict(
    Date=df_sorted["Date"],
    Date_ms=df_sorted["Date_ms"],
    Temperature=df_sorted["Temperature"]
))
source_filtered = ColumnDataSource(data=source_all.data.copy())
p = figure(
    title="Interactive Daily Minimum Temperatures",
    x_axis_type="datetime",
    width=950,
    height=400,
    tools="pan,wheel_zoom,reset",
    x_axis_label="Date",
    y_axis_label="Temperature (°C)"
)
p.line("Date", "Temperature", source=source_filtered, line_width=2, color="orange")
hover = HoverTool(
    tooltips=[("Date", "@Date{%F}"), ("Temp", "@Temperature{0.0} °C")],
    formatters={"@Date": "datetime"},
)
p.add_tools(hover)
slider = DateRangeSlider(
    title="Sélectionne une plage de dates",
    start=df_sorted["Date"].min(),
    end=df_sorted["Date"].max(),
    value=(df_sorted["Date"].min(), df_sorted["Date"].max()),
    step=1,
    width=900
)
callback = CustomJS(
    args=dict(source_all=source_all, source_filtered=source_filtered, slider=slider),
    code="""
    const data_all = source_all.data;
    const data_filtered = source_filtered.data;

    const start = slider.value[0];
    const end = slider.value[1];

    const dates = data_all['Date'];
    const dates_ms = data_all['Date_ms'];
    const temps = data_all['Temperature'];

    data_filtered['Date'] = [];
    data_filtered['Date_ms'] = [];
    data_filtered['Temperature'] = [];

    for (let i = 0; i < dates.length; i++) {
        if (dates_ms[i] >= start && dates_ms[i] <= end) {
            data_filtered['Date'].push(dates[i]);
            data_filtered['Date_ms'].push(dates_ms[i]);
            data_filtered['Temperature'].push(temps[i]);
        }
    }

    source_filtered.change.emit();
    """
)
slider.js_on_change("value", callback)
show(column(slider, p))

Question 6: Time Series Decomposition Visualization

6.  Perform a simple time series decomposition to visualize the trend and seasonality
    components of the temperature data.

    * Resample the data to monthly frequency and calculate the monthly average temperature.
    * Use a simple moving average to estimate the trend component.
    * Calculate the seasonal component by subtracting the trend from the original monthly data.
    * Create three separate Bokeh plots: one for the original monthly data, one for the trend,
        and one for the seasonal component.
    * Ensure the plots are aligned and share the same x-axis (Date).
    * Add tooltips to each plot to display the date and corresponding value.
    * Enable pan, wheel zoom, and reset tools for each plot.

In [None]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    DatetimeTickFormatter,
    NumeralTickFormatter,
)
from bokeh.layouts import row, column
from bokeh.transform import factor_cmap
output_notebook() 
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])
df

Unnamed: 0,Date,Temperature
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
3645,1990-12-27,14.0
3646,1990-12-28,13.6
3647,1990-12-29,13.5
3648,1990-12-30,15.7


In [None]:
output_notebook()
df.columns = ['Date', 'Temperature']
df['Date'] = pd.to_datetime(df['Date'])
df['Temperature'] = df['Temperature'].astype(str).str.replace('?', '', regex=False)
df['Temperature'] = pd.to_numeric(df['Temperature'])
monthly_avg = df.resample('ME', on='Date').mean().reset_index()
monthly_avg['Trend'] = monthly_avg['Temperature'].rolling(window=12, center=True).mean()
monthly_avg['Seasonality'] = monthly_avg['Temperature'] - monthly_avg['Trend']
source_original = ColumnDataSource(monthly_avg)
source_trend = ColumnDataSource(monthly_avg)
source_seasonality = ColumnDataSource(monthly_avg)
TOOLS = "pan,wheel_zoom,reset"
p1 = figure(
    title="Original Monthly Average Temperature",
    x_axis_type="datetime",
    width=950,
    height=250,
    tools=TOOLS,
    x_axis_label="Date",
    y_axis_label="Temperature (°C)"
)
p1.line("Date", "Temperature", source=source_original, color="navy", line_width=2)
p1.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Temp", "@Temperature{0.0} °C")],
                       formatters={"@Date": "datetime"}))
p2 = figure(
    title="Trend (12-month Moving Average)",
    x_axis_type="datetime",
    width=950,
    height=250,
    tools=TOOLS,
    x_axis_label="Date",
    y_axis_label="Trend (°C)",
    x_range=p1.x_range 
)
p2.line("Date", "Trend", source=source_trend, color="green", line_width=2)
p2.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Trend", "@Trend{0.0} °C")],
                       formatters={"@Date": "datetime"}))
p3 = figure(
    title="Seasonal Component",
    x_axis_type="datetime",
    width=950,
    height=250,
    tools=TOOLS,
    x_axis_label="Date",
    y_axis_label="Seasonality (°C)",
    x_range=p1.x_range
)
p3.line("Date", "Seasonality", source=source_seasonality, color="orange", line_width=2)
p3.add_tools(HoverTool(tooltips=[("Date", "@Date{%F}"), ("Seasonality", "@Seasonality{0.0} °C")],
                       formatters={"@Date": "datetime"}))
show(column(p1, p2, p3))

  monthly_avg = df.resample('M', on='Date').mean().reset_index()
