In [73]:
import pandas as pd
import plotly.graph_objs as go 
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from tqdm import tqdm 
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Import des données

In [74]:
apple = pd.read_csv("../data/AAPL.csv")
amazon = pd.read_csv("../data/AMZN.csv")
berkshire = pd.read_csv("../data/BRK-B.csv")
google = pd.read_csv("../data/GOOG.csv")
meta = pd.read_csv("../data/META.csv")
microsoft = pd.read_csv("../data/MSFT.csv")
nvidia = pd.read_csv("../data/NVDA.csv")
tesla = pd.read_csv("../data/TSLA.csv")
unitedhealth = pd.read_csv("../data/UNH.csv")
exxon = pd.read_csv("../data/XOM.csv")

In [75]:
titres = {"Apple" : apple, 
          "Amazon" : amazon,
          "Berkshire" : berkshire,
          "Google" : google,
          "Meta" : meta,
          "Microsoft" : microsoft,
          "Nvidia" : nvidia,
          "Tesla" : tesla,
          "UnitedHealth" : unitedhealth,
          "Exxon" : exxon}

# Calcul des prédictions

In [76]:
def stock_volatility_forecast(stock):
    stock['Date'] = pd.to_datetime(stock['Date'])
    stock['Volatility_HighLow_10days'] = stock['High'].rolling(window=10).std() / stock['Low'].rolling(window=10).mean()

    stock['Return'] = stock['Close'].pct_change()
    stock = stock.dropna()
    stock = stock.drop(columns=["Open", 'Adj Close'])

    # Calcul de la volatilité sur une fenêtre mobile
    stock['Volatility'] = stock['Return'].rolling(window=10).std()

    # Définition de la cible comme la volatilité future
    stock['Target'] = stock['Volatility'].shift(-10)
    stock = stock.dropna()

    window_size = 90
    step_size = 7
    prediction_days = 7

    prediction_dates = []
    predictions = []
    for start in tqdm(range(0, len(stock) - window_size - prediction_days, step_size)):
        end = start + window_size
        train = stock.iloc[start:end]
        test = stock.iloc[end:end + prediction_days]

        model = XGBRegressor(n_estimators=400, max_depth=35, learning_rate=0.2, verbosity=0)
        model.fit(train.drop(['Date', 'Target', 'Volume'], axis=1), train['Target'])

        pred = model.predict(test.drop(['Date', 'Target', 'Volume'], axis=1))
        predictions.append(pred)

        prediction_date = stock.iloc[end + prediction_days - 1]['Date']
        prediction_dates.append(prediction_date)

    predictions = [value[6] for value in predictions]

    df_predictions = pd.DataFrame({
        'Date': prediction_dates,
        'Prediction': predictions
    })

    merged_df = stock.merge(df_predictions, on='Date', how='inner')

    merged_df = merged_df.drop(columns = ["Target"])

    return(merged_df)


In [77]:
predictions_dic = {}

In [78]:
for key in titres :
    predictions_dic[key] = stock_volatility_forecast(titres[key])

100%|██████████| 378/378 [01:02<00:00,  6.08it/s]
100%|██████████| 378/378 [03:50<00:00,  1.64it/s]
100%|██████████| 378/378 [2:14:25<00:00, 21.34s/it]     
100%|██████████| 378/378 [7:21:40<00:00, 70.11s/it]     
100%|██████████| 378/378 [7:42:55<00:00, 73.48s/it]     
100%|██████████| 378/378 [6:42:31<00:00, 63.89s/it]     
100%|██████████| 378/378 [2:44:55<00:00, 26.18s/it]    
100%|██████████| 378/378 [02:20<00:00,  2.69it/s]
100%|██████████| 378/378 [01:55<00:00,  3.28it/s]
100%|██████████| 378/378 [01:53<00:00,  3.32it/s]


In [79]:
predictions_dic

{'Apple':           Date        High         Low       Close     Volume  \
 0   2013-06-17   15.560714   15.370000   15.428571  259414400   
 1   2013-06-26   14.456786   14.130714   14.216786  367724000   
 2   2013-07-08   15.035714   14.666071   14.823214  298138400   
 3   2013-07-17   15.436429   15.293571   15.368214  198990400   
 4   2013-07-26   15.751429   15.512143   15.749643  200152400   
 ..         ...         ...         ...         ...        ...   
 373 2023-10-30  171.169998  168.869995  170.289993   51131000   
 374 2023-11-08  183.449997  181.589996  182.889999   49340300   
 375 2023-11-17  190.380005  188.570007  189.690002   50922700   
 376 2023-11-29  192.089996  188.970001  189.369995   43014200   
 377 2023-12-08  195.990005  193.669998  195.710007   53377300   
 
      Volatility_HighLow_10days    Return  Volatility  Prediction  
 0                     0.014814  0.004534    0.008744    0.024781  
 1                     0.031309 -0.011326    0.011250    0.01

In [80]:
dfs_to_concat = []

for name, df in predictions_dic.items():
    df['Date'] = pd.to_datetime(df['Date'])
    dfs_to_concat.append(df[['Date', 'Prediction', "Volatility"]])

portfolio_predictions = pd.concat(dfs_to_concat)
portfolio_avg_predictions = portfolio_predictions.groupby('Date').mean().reset_index()

print(portfolio_avg_predictions.head())

        Date  Prediction  Volatility
0 2013-06-17    0.020319    0.014440
1 2013-06-26    0.017279    0.014275
2 2013-07-08    0.018986    0.012786
3 2013-07-17    0.018454    0.014141
4 2013-07-26    0.016022    0.027492


In [81]:
fig = go.Figure()

# Ajout de la série financière
fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions['Prediction'],
                         mode = 'lines',
                         name ='Prédiction', 
                         fill = "tozeroy"))

fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions['Volatility'],
                         mode = 'lines',
                         name ='Réel', 
                         fill = "tozeroy"))

# Mise à jour des titres
fig.update_layout(title="Évolution de la volatilité du ptf",
                  xaxis_title='Date',
                  yaxis_title='Valeur')

# Affichage du graphique
fig.show()