In [134]:
import yfinance as yf  
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

import plotly.express as px
import plotly.graph_objects as go

from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing

In [126]:
# Télécharger les données
data = yf.download('AAPL', '2019-01-01', '2020-01-01')
data

[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2019-01-02,37.708591,39.480000,39.712502,38.557499,38.722500,148158800
2019-01-03,33.952545,35.547501,36.430000,35.500000,35.994999,365248800
2019-01-04,35.401951,37.064999,37.137501,35.950001,36.132500,234428400
2019-01-07,35.323151,36.982498,37.207500,36.474998,37.174999,219111200
2019-01-08,35.996525,37.687500,37.955002,37.130001,37.389999,164101200
...,...,...,...,...,...,...
2019-12-24,68.898682,71.067497,71.222504,70.730003,71.172501,48478800
2019-12-26,70.265671,72.477501,72.495003,71.175003,71.205002,93121200
2019-12-27,70.239006,72.449997,73.492500,72.029999,72.779999,146266000
2019-12-30,70.655884,72.879997,73.172501,71.305000,72.364998,144114400


Let's create a new column that contains the average level of the variable open and plot it in these two columns in the same graph, as well as calculate the MSE between both columns.

In [127]:
# Vérifie les colonnes MultiIndex
print("Colonnes originales :")
print(data.columns)
print()

# Simplifie les colonnes
data.columns = ['_'.join(col).strip() for col in data.columns]
print("Colonnes simplifiées :")
print(data.columns)
print()

# Vérifie la colonne "Open_AAPL"
print("Premières valeurs de Open_AAPL :")
print(data["Open_AAPL"].head())
print()

Colonnes originales :
MultiIndex([('Adj Close', 'AAPL'),
            (    'Close', 'AAPL'),
            (     'High', 'AAPL'),
            (      'Low', 'AAPL'),
            (     'Open', 'AAPL'),
            (   'Volume', 'AAPL')],
           names=['Price', 'Ticker'])

Colonnes simplifiées :
Index(['Adj Close_AAPL', 'Close_AAPL', 'High_AAPL', 'Low_AAPL', 'Open_AAPL',
       'Volume_AAPL'],
      dtype='object')

Premières valeurs de Open_AAPL :
Date
2019-01-02    38.722500
2019-01-03    35.994999
2019-01-04    36.132500
2019-01-07    37.174999
2019-01-08    37.389999
Name: Open_AAPL, dtype: float64



# Averaging

In [128]:
# Calcul de la moyenne globale
open_mean = data["Open_AAPL"].mean()  # Moyenne comme une variable
print(f"Moyenne globale (open_mean) : {open_mean}")

# Création de la figure
fig = go.Figure()

# Ajout de la ligne "Open_AAPL"
fig.add_trace(go.Scatter(
    x=data.index,
    y=data["Open_AAPL"],
    mode="lines",
    name="Open_AAPL",
    line=dict(color="blue", width=2)  # Ligne bleue épaisse
))

# Ajout de la ligne "Mean Open"
fig.add_trace(go.Scatter(
    x=data.index,
    y=[open_mean] * len(data),  # Ligne constante avec une variable
    mode="lines",
    name="Mean Open",
    line=dict(color="red", dash="dot", width=3)  # Ligne rouge pointillée
))

# Personnalisation du graphique
fig.update_layout(
    title="Open Prices (AAPL) with Mean Open",
    xaxis_title="Date",
    yaxis_title="Price",
    template="plotly_white"
)

# Affichage
fig.show()

# Calcul de l'erreur quadratique moyenne (MSE)
mse = np.mean((data["Open_AAPL"] - open_mean) ** 2)
print(f"The MSE for this global average smoothing is: {mse:.2f}")

Moyenne globale (open_mean) : 51.96727168370807


The MSE for this global average smoothing is: 73.55


Of course the global average smoothing is a rather naive smoothing technique for estimating a time Series characteristics or make forcasts for future values. In the present case, where the data shows a positive growth trend, the further away from the middle of the period we look, the higher the errors commited by this smoothing method.

# Single moving average

In [129]:
# Calcul de la moyenne mobile simple (SMA) sur 7 jours
sma_7 = data["Open_AAPL"].rolling(window=7).mean()

# Vérifie les premières valeurs de la SMA
print("Premières valeurs de la SMA sur 7 jours :")
print(sma_7.head(10))
print()

# Création de la figure
fig = go.Figure()

# Ajout de la ligne "Open_AAPL"
fig.add_trace(go.Scatter(
    x=data.index,
    y=data["Open_AAPL"],
    mode="lines",
    name="Open_AAPL",
    line=dict(color="blue", width=2)  # Ligne bleue
))

# Ajout de la ligne "SMA_7"
fig.add_trace(go.Scatter(
    x=data.index,
    y=sma_7,  # Utilise la variable pour la SMA
    mode="lines",
    name="7-Day SMA",
    line=dict(color="#4BE8E0", dash="solid", width=3)  # Ligne cyan
))

# Personnalisation du graphique
fig.update_layout(
    title="Open Prices (AAPL) with 7-Day Moving Average",
    xaxis_title="Date",
    yaxis_title="Price",
    template="plotly_white"
)

# Affichage
fig.show()

Premières valeurs de la SMA sur 7 jours :
Date
2019-01-02          NaN
2019-01-03          NaN
2019-01-04          NaN
2019-01-07          NaN
2019-01-08          NaN
2019-01-09          NaN
2019-01-10    37.337499
2019-01-11    37.265714
2019-01-14    37.511071
2019-01-15    37.716072
Name: Open_AAPL, dtype: float64



As you can see, moving averages are much better at following the general trend of the data depending on the order selected (you can try different orders and witness the consequences on the visualization). However moving averages present a major drawback : Since the moving average is calculated over the
T previous dates (current date included) it is always lagged compared to the actual time series, and the higher the order the longer the lag.

# Exponential Smoothing

In [132]:
print(data.head())  # Vérifie les premières lignes
print(data.index.freq)  # Confirme la fréquence

print()

#Si data.index.freq renvoie None, réapplique une fréquence :
data = data.asfreq('B')  # Fréquence journalière pour les jours ouvrables

            Adj Close_AAPL  Close_AAPL  High_AAPL   Low_AAPL  Open_AAPL  \
Date                                                                      
2019-01-02       37.708591   39.480000  39.712502  38.557499  38.722500   
2019-01-03       33.952545   35.547501  36.430000  35.500000  35.994999   
2019-01-04       35.401951   37.064999  37.137501  35.950001  36.132500   
2019-01-07       35.323151   36.982498  37.207500  36.474998  37.174999   
2019-01-08       35.996525   37.687500  37.955002  37.130001  37.389999   

            Volume_AAPL  
Date                     
2019-01-02  148158800.0  
2019-01-03  365248800.0  
2019-01-04  234428400.0  
2019-01-07  219111200.0  
2019-01-08  164101200.0  
<BusinessDay>



In [133]:
# Vérifie les valeurs manquantes
data["Open_AAPL"].fillna(method="ffill", inplace=True)

# Application de SES avec une initialisation cohérente et un ajustement du paramètre alpha
alpha = 0.3
ses_model = SimpleExpSmoothing(data["Open_AAPL"], initialization_method="heuristic").fit(
    smoothing_level=alpha, optimized=False
)

# Prévisions
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=30, freq='B')
ses_forecast = ses_model.predict(start=len(data), end=len(data) + len(future_dates) - 1)
ses_forecast.index = future_dates

# Vérifie les prévisions
print("Prévisions SES :")
print(ses_forecast.head())

# Création du graphique
fig = go.Figure()

# Données réelles
fig.add_trace(go.Scatter(
    x=data.index,
    y=data["Open_AAPL"],
    mode="lines",
    name="Observed Open_AAPL",
    line=dict(color="blue", width=2)
))

# Prévisions SES
fig.add_trace(go.Scatter(
    x=ses_forecast.index,
    y=ses_forecast,
    mode="lines",
    name="SES Forecast",
    line=dict(color="#4BE8E0", dash="dot", width=3)
))

# Mise à jour de l'échelle Y
fig.update_layout(
    title="Simple Exponential Smoothing (SES) Forecast",
    xaxis_title="Date",
    yaxis_title="Open Price",
    yaxis=dict(range=[
        min(data["Open_AAPL"].min(), ses_forecast.min()), 
        max(data["Open_AAPL"].max(), ses_forecast.max())
    ]),
    template="plotly_white"
)

# Affichage
fig.show()

Prévisions SES :
2020-01-01    71.888933
2020-01-02    71.888933
2020-01-03    71.888933
2020-01-06    71.888933
2020-01-07    71.888933
Freq: B, dtype: float64


# Double exponential smoothing

In [135]:
# Préparation des données (assurez-vous qu'elles sont bien nettoyées)
data.index = pd.to_datetime(data.index)
data = data.asfreq('B')  # Fréquence journalière pour les jours ouvrables
data["Open_AAPL"].fillna(method="ffill", inplace=True)

# Application du Double Exponential Smoothing
des_model = ExponentialSmoothing(data["Open_AAPL"], trend="add", seasonal=None).fit(smoothing_level=0.3, smoothing_slope=0.2)
des_forecast = des_model.predict(start=data.index[0], end=data.index[-1] + pd.Timedelta(days=30))

# Vérification des prévisions
print("Prévisions DES :")
print(des_forecast.head())

# Création du graphique
fig = go.Figure()

# Données réelles
fig.add_trace(go.Scatter(
    x=data.index,
    y=data["Open_AAPL"],
    mode="lines",
    name="Observed Open_AAPL",
    line=dict(color="blue", width=2)
))

# Prévisions DES
fig.add_trace(go.Scatter(
    x=des_forecast.index,
    y=des_forecast,
    mode="lines",
    name="DES Forecast",
    line=dict(color="#4BE8E0", dash="dot", width=3)
))

# Mise à jour de l'échelle Y
fig.update_layout(
    title="Double Exponential Smoothing (DES) Forecast",
    xaxis_title="Date",
    yaxis_title="Open Price",
    template="plotly_white"
)

# Affichage
fig.show()

Prévisions DES :
2019-01-02    36.979123
2019-01-03    37.692809
2019-01-04    37.272270
2019-01-07    36.950757
2019-01-08    37.051902
Freq: B, dtype: float64


# Triple exponential smoothing

In [136]:
# Vérification des données
data.index = pd.to_datetime(data.index)
data = data.asfreq('B')  # Fréquence journalière pour les jours ouvrables
data["Open_AAPL"].fillna(method="ffill", inplace=True)  # Remplir les valeurs manquantes si nécessaire

# Application du Triple Exponential Smoothing (Holt-Winters)
tes_model = ExponentialSmoothing(
    data["Open_AAPL"],
    trend="add",  # Tendance additive
    seasonal="add",  # Saisonnalité additive
    seasonal_periods=20  # Période saisonnière (ajuster selon les données)
).fit()

# Prévisions sur 60 périodes supplémentaires
tes_forecast = tes_model.forecast(steps=60)

# Vérification des prévisions
print("Prévisions Holt-Winters :")
print(tes_forecast.head())

# Création du graphique avec Plotly
fig = go.Figure()

# Ajout des données réelles
fig.add_trace(go.Scatter(
    x=data.index,
    y=data["Open_AAPL"],
    mode="lines",
    name="Observed Open_AAPL",
    line=dict(color="blue", width=2)  # Ligne bleue pour les données réelles
))

# Ajout des prévisions
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=60, freq='B')
fig.add_trace(go.Scatter(
    x=future_dates,
    y=tes_forecast,
    mode="lines",
    name="Holt-Winters Forecast",
    line=dict(color="#4BE8E0", dash="dot", width=3)  # Ligne cyan pointillée pour les prévisions
))

# Personnalisation du graphique
fig.update_layout(
    title="Triple Exponential Smoothing (Holt-Winters) Forecast",
    xaxis_title="Date",
    yaxis_title="Open Price",
    template="plotly_white"
)

# Affichage
fig.show()

Prévisions Holt-Winters :
2020-01-01    72.880197
2020-01-02    72.901925
2020-01-03    73.009812
2020-01-06    73.261553
2020-01-07    73.529242
Freq: B, dtype: float64


# Univariate time series modeling

Stationarity

In [None]:
# Here's an example of a stationary time series :
sample = np.random.randn(200)
px.line(x=range(200),y=sample)