
# Bokeh for Time Series Analysis
<hr style="border: 2px solid black;">


<img src="./images/bokeh.png" alt="bokeh Logo" width="1000"/>
<hr style="border: 2px solid black;">

<img src="./images/bokeh_at_ag_glance.png" alt="bokeh Logo" width="1000"/>
<hr style="border: 2px solid black;">
**Introduction to Bokeh**
Bokeh is an interactive visualization library for Python that targets modern web browsers for presentation.
Unlike Matplotlib, which is primarily designed for static plots, Bokeh excels at creating
interactive plots and dashboards. It can handle large datasets and streaming data,
making it suitable for real-time applications.

**Key Features of Bokeh:**

* **Interactivity:** Built-in support for zooming, panning, hovering, and other interactive tools.
* **Web-Focused:** Generates HTML and JavaScript, making it easy to embed plots in web pages.
* **High Performance:** Can handle large datasets efficiently.
* **Versatility:** Supports a wide range of plot types (lines, bars, scatter plots, etc.).

<hr style="border: 2px solid black;">


**Documentation:**

For comprehensive documentation, please refer to the official Bokeh website: [https://docs.bokeh.org/en/latest/](https://docs.bokeh.org/en/latest/)


<hr style="border: 2px solid black;">


**Lab Exercise:**

Your task is to recreate the time series analysis lab we previously conducted using Pandas,
Matplotlib, and Seaborn, but this time, utilize the Bokeh library for visualization.
This will involve:

1.  Loading and preprocessing the "AirPassengersDates.csv" dataset.
2.  Creating interactive Bokeh plots for:
    * Time series line plots
    * Bar plots of aggregated data
    * Visualizing mean and standard deviation
    * Outlier detection
    * Resampling (upsampling and downsampling)
    * Lag analysis
    * Autocorrelation

Pay close attention to Bokeh's features for interactivity (tools, hover effects) and
its handling of data sources. Aim to replicate the insights and visualizations
from the previous lab while leveraging Bokeh's strengths.

Good luck!
<hr style="border: 2px solid black;">

In [10]:
import pandas as pd

# Chargement des données
df = pd.read_csv("./datasets/AirPassengersDates.csv")

# S'assurer que la colonne 'Date' est de type datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Réaffecter 'Date' comme index s'il ne l'est pas déjà
df.set_index('Date', inplace=True)

# Affichage rapide
print(df.head())


            #Passengers
Date                   
1949-01-12          112
1949-02-24          118
1949-03-22          132
1949-04-05          129
1949-05-24          121


In [4]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.io import curdoc
output_notebook()  # pour affichage dans Jupyter


In [5]:
from bokeh.models import DatetimeTickFormatter

source = ColumnDataSource(data={
    'x': df.index,
    'y': df['#Passengers']
})

p = figure(title="Nombre de passagers par mois",
           x_axis_type='datetime', width=800, height=300,
           tools="pan,wheel_zoom,box_zoom,reset,save,hover")

p.line('x', 'y', source=source, line_width=2, color="navy", legend_label="Passagers")
p.circle('x', 'y', source=source, size=5, color="red", alpha=0.5)

p.hover.tooltips = [("Date", "@x{%F}"), ("Passagers", "@y")]
p.hover.formatters = {"@x": "datetime"}

p.xaxis.axis_label = "Date"
p.yaxis.axis_label = "Nombre de passagers"
p.legend.location = "top_left"

show(p)




In [None]:
# On ajoute une colonne 'Year'
df['Year'] = df.index.year

# Moyenne annuelle
annual_avg = df.groupby('Year')['#Passengers'].mean().reset_index()

# Convertir l'année en string pour x_range et ColumnDataSource
annual_avg['Year_str'] = annual_avg['Year'].astype(str)
source_bar = ColumnDataSource(annual_avg)

# Créer le bar plot
p_bar = figure(x_range=annual_avg['Year_str'].tolist(),
               title="Moyenne annuelle des passagers",
               x_axis_label="Année", y_axis_label="Passagers",
               width=800, height=350,
               tools="hover,pan,wheel_zoom,reset,save")

p_bar.vbar(x='Year_str', top='#Passengers', width=0.8, source=source_bar, color='skyblue')

# Hover tool
p_bar.hover.tooltips = [("Année", "@Year_str"), ("Moyenne", "@#Passengers")]
p_bar.xaxis.major_label_orientation = 0.8

show(p_bar)

In [16]:
# 1. Recalculer la moyenne mobile et l’écart-type sur 12 mois
rolling_mean = df['#Passengers'].rolling(window=12).mean()
rolling_std = df['#Passengers'].rolling(window=12).std()

df_rolling = pd.DataFrame({
    'mean': rolling_mean,
    'std_plus': rolling_mean + rolling_std,
    'std_minus': rolling_mean - rolling_std
}).dropna()

df_rolling.reset_index(inplace=True)  # ajoute la colonne 'Date' sans duplication


# 3. Convertir pour ColumnDataSource
source_stats = ColumnDataSource(df_rolling)

# 4. Créer la figure
p_rolling = figure(title="Moyenne mobile (12 mois) et écart-type",
                   x_axis_type="datetime", width=800, height=350,
                   x_axis_label="Date", y_axis_label="Passagers",
                   tools="pan,wheel_zoom,box_zoom,reset,save,hover")

# 5. Courbes
p_rolling.line('Date', 'mean', source=source_stats, color='green', legend_label="Moyenne mobile", line_width=2)
p_rolling.line('Date', 'std_plus', source=source_stats, color='orange', line_dash='dashed', legend_label="+1σ")
p_rolling.line('Date', 'std_minus', source=source_stats, color='orange', line_dash='dashed', legend_label="-1σ")

# 6. Hover info
p_rolling.hover.tooltips = [
    ("Date", "@Date{%F}"),
    ("Moyenne", "@mean{0.0}"),
    ("+1σ", "@std_plus{0.0}"),
    ("-1σ", "@std_minus{0.0}")
]
p_rolling.hover.formatters = {"@Date": "datetime"}

# 7. Légende
p_rolling.legend.location = "top_left"

# 8. Affichage
show(p_rolling)


In [20]:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# 1. Calcul des statistiques
rolling_mean = df['#Passengers'].rolling(window=12).mean()
rolling_std = df['#Passengers'].rolling(window=12).std()

# 2. Création du DataFrame de travail
df_outliers = pd.DataFrame({
    'Passengers': df['#Passengers'],
    'mean': rolling_mean,
    'std': rolling_std
}).dropna()

# 3. Détection des outliers
df_outliers['is_outlier'] = np.abs(df_outliers['Passengers'] - df_outliers['mean']) > 2 * df_outliers['std']

# 4. Séparer les points outliers
outliers_only = df_outliers[df_outliers['is_outlier']]
source_all = ColumnDataSource(df_outliers)
source_outliers = ColumnDataSource(outliers_only)

# 5. Tracer la courbe avec Bokeh
p_outliers = figure(title="Détection des outliers dans le trafic aérien",
                    x_axis_type="datetime", width=800, height=350,
                    tools="pan,wheel_zoom,box_zoom,reset,save,hover")

# Courbe principale
p_outliers.line('Date', 'Passengers', source=source_all, color='gray', line_width=2, legend_label="Passagers")

# Points outliers
p_outliers.circle('Date', 'Passengers', source=source_outliers,
                  size=8, color='red', alpha=0.8, legend_label="Outliers")

# Hover
p_outliers.hover.tooltips = [
    ("Date", "@Date{%F}"),
    ("Passagers", "@Passengers"),
    ("Outlier", "@is_outlier")
]
p_outliers.hover.formatters = {"@Date": "datetime"}

p_outliers.legend.location = "top_left"
p_outliers.xaxis.axis_label = "Date"
p_outliers.yaxis.axis_label = "Passagers"

# Affichage
show(p_outliers)



In [21]:
# Downsampling : moyenne par trimestre
df_down = df['#Passengers'].resample('Q').mean().reset_index()

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

source_down = ColumnDataSource(df_down)

p_down = figure(title="Downsampling - Moyenne trimestrielle des passagers",
                x_axis_type="datetime", width=800, height=350,
                tools="pan,wheel_zoom,box_zoom,reset,save,hover")

p_down.line('Date', '#Passengers', source=source_down, line_width=2, color='green')
p_down.circle('Date', '#Passengers', source=source_down, size=6, color='green', alpha=0.7)

p_down.xaxis.axis_label = "Date"
p_down.yaxis.axis_label = "Passagers"
p_down.hover.tooltips = [("Date", "@Date{%F}"), ("Passagers", "@#Passengers")]
p_down.hover.formatters = {"@Date": "datetime"}

show(p_down)


  df_down = df['#Passengers'].resample('Q').mean().reset_index()


In [22]:
# Upsampling : interpolation à la semaine
df_up = df['#Passengers'].resample('W').interpolate(method='linear').reset_index()

source_up = ColumnDataSource(df_up)

p_up = figure(title="Upsampling - Passagers interpolés hebdomadairement",
              x_axis_type="datetime", width=800, height=350,
              tools="pan,wheel_zoom,box_zoom,reset,save,hover")

p_up.line('Date', '#Passengers', source=source_up, line_width=2, color='purple')

p_up.xaxis.axis_label = "Date"
p_up.yaxis.axis_label = "Passagers"
p_up.hover.tooltips = [("Date", "@Date{%F}"), ("Passagers", "@#Passengers")]
p_up.hover.formatters = {"@Date": "datetime"}

show(p_up)


In [23]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# 1. Décalage de la série d’un mois
df_lag = pd.DataFrame()
df_lag['y'] = df['#Passengers']
df_lag['y_lag1'] = df['#Passengers'].shift(1)  # lag de 1
df_lag = df_lag.dropna()

source_lag = ColumnDataSource(df_lag)

# 2. Graphe de y(t) vs y(t-1)
p_lag = figure(title="Lag plot (décalage de 1 mois)", 
               x_axis_label="Passagers à t-1", 
               y_axis_label="Passagers à t",
               width=600, height=400,
               tools="pan,box_zoom,reset,save,hover")

p_lag.circle('y_lag1', 'y', source=source_lag, size=6, color="teal", alpha=0.6)
p_lag.hover.tooltips = [("t-1", "@y_lag1"), ("t", "@y")]

show(p_lag)




In [24]:
from statsmodels.tsa.stattools import acf
import numpy as np

# 1. Calculer l’ACF
acf_values = acf(df['#Passengers'], nlags=20)  # jusqu'à lag 20
lags = list(range(len(acf_values)))

# 2. Préparer la source
acf_data = pd.DataFrame({'lag': lags, 'acf': acf_values})
source_acf = ColumnDataSource(acf_data)

# 3. Bar plot avec Bokeh
p_acf = figure(title="Autocorrelation Function (ACF)", 
               x_axis_label="Lag", y_axis_label="ACF",
               width=800, height=350,
               tools="pan,wheel_zoom,box_zoom,reset,save")

p_acf.vbar(x='lag', top='acf', width=0.5, source=source_acf, color='steelblue')

show(p_acf)
