In [132]:
import pandas as pd
import plotly.graph_objs as go 
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from tqdm import tqdm 
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Import des données

In [74]:
apple = pd.read_csv("../data/AAPL.csv")

# Feature engineering

In [75]:
apple['Date'] = pd.to_datetime(apple['Date'])

apple['Volatility_HighLow_10days'] = apple['High'].rolling(window=10).std() / apple['Low'].rolling(window=10).mean()    

#apple['Delta_HighLow_10days'] = apple['High'].diff(periods=10) / apple['Low'].diff(periods=10)
#apple['Delta_OpenClose_10days'] = apple['Open'].diff(periods=10) / apple['Close'].diff(periods=10)
#apple[f'Delta_Volume_10days'] = apple[f'Volume'].diff(periods=10)

In [76]:
apple['Return'] = apple['Close'].pct_change()

In [77]:
apple = apple.dropna()

In [78]:
apple = apple.drop(columns = ["Open", 'Adj Close'])

# Création de la variable cible

In [79]:
apple['Target'] = apple['Return'].shift(-7)
apple = apple.dropna()

In [80]:
fig = go.Figure()

# Ajout de la série financière
fig.add_trace(go.Scatter(x = apple.Date, 
                         y = apple['Return'],
                         mode = 'lines',
                         name ='Return', 
                         fill = "tozeroy"))

# Mise à jour des titres
fig.update_layout(title="Évolution de la valeur de ptf",
                  xaxis_title='Date',
                  yaxis_title='Valeur')

# Affichage du graphique
fig.show()

# Modélisation

In [107]:
window_size = 90  
step_size = 7  
prediction_days = 7

prediction_dates = []
predictions = []
for start in tqdm(range(0, len(apple) - window_size - prediction_days, step_size)):
    end = start + window_size
    train = apple.iloc[start:end]
    test = apple.iloc[end:end + prediction_days]

    # Entraînement du modèle
    model = XGBRegressor()
    model.fit(train.drop(['Date', 'Target', 'Volume'], axis=1), train['Target'])

    # Prédiction
    pred = model.predict(test.drop(['Date', 'Target', 'Volume'], axis=1))
    predictions.append(pred)

    # Récupération du timestamp pour end + 7 jours
    prediction_date = apple.iloc[end + prediction_days - 1]['Date']
    prediction_dates.append(prediction_date)


100%|██████████| 380/380 [00:13<00:00, 28.25it/s]


In [None]:
predictions = [value[6] for value in predictions]

In [116]:
df_predictions = pd.DataFrame({
    'Date': prediction_dates,
    'Prediction': predictions
})

In [117]:
df_predictions

Unnamed: 0,Date,Prediction
0,2013-06-04,-0.003572
1,2013-06-13,0.008898
2,2013-06-24,0.026835
3,2013-07-03,0.018711
4,2013-07-15,-0.020562
...,...,...
375,2023-11-06,-0.006177
376,2023-11-15,0.001616
377,2023-11-27,-0.003009
378,2023-12-06,0.003740


In [121]:
fig = go.Figure()

# Ajout de la série financière
fig.add_trace(go.Scatter(x = df_predictions.Date, 
                         y = df_predictions.Prediction,
                         mode = 'lines',
                         name ='Prédiction', 
                         fill = "tozeroy"))

fig.add_trace(go.Scatter(x = df_predictions.Date, 
                         y = apple['Return'],
                         mode = 'lines',
                         name ='Réel', 
                         fill = "tozeroy"))

# Mise à jour des titres
fig.update_layout(title="Évolution de la valeur de ptf",
                  xaxis_title='Date',
                  yaxis_title='Valeur')

# Affichage du graphique
fig.show()

In [127]:
merged_df = apple.merge(df_predictions, on='Date', how='inner')

In [128]:
merged_df

Unnamed: 0,Date,High,Low,Close,Volume,Volatility_HighLow_10days,Return,Target,Prediction
0,2013-06-04,16.229643,15.978214,16.046785,292728800,0.009536,-0.003128,0.008723,-0.003572
1,2013-06-13,15.612143,15.312500,15.570000,285832400,0.014517,0.008723,-0.026505,0.008898
2,2013-06-24,14.595000,14.216071,14.376429,480746000,0.024815,-0.026505,0.005520,0.026835
3,2013-07-03,15.106429,14.908929,15.028571,240928800,0.023424,0.005520,0.002180,0.018711
4,2013-07-15,15.409286,15.171429,15.265714,241917200,0.012989,0.002180,0.051362,-0.020562
...,...,...,...,...,...,...,...,...,...
375,2023-11-06,179.429993,176.210007,179.229996,63841300,0.019725,0.014605,0.003041,-0.006177
376,2023-11-15,189.500000,187.779999,188.009995,53790500,0.023912,0.003041,-0.000948,0.001616
377,2023-11-27,190.669998,188.899994,189.789993,40552600,0.010557,-0.000948,-0.005687,-0.003009
378,2023-12-06,194.759995,192.110001,192.320007,41089700,0.008752,-0.005687,-0.002726,0.003740


In [133]:
np.sqrt(mean_squared_error(merged_df["Return"], merged_df["Prediction"]))

0.020495679519965757

In [137]:
def stock_forecast(stock):
    stock['Date'] = pd.to_datetime(stock['Date'])
    stock['Volatility_HighLow_10days'] = stock['High'].rolling(window=10).std() / stock['Low'].rolling(window=10).mean()
    stock['Return'] = stock['Close'].pct_change()
    stock = stock.dropna()
    stock = stock.drop(columns=["Open", 'Adj Close'])
    
    stock['Target'] = stock['Return'].shift(-7)
    stock = stock.dropna()
    
    window_size = 90  
    step_size = 7  
    prediction_days = 7
    
    prediction_dates = []
    predictions = []
    for start in tqdm(range(0, len(apple) - window_size - prediction_days, step_size)):
        end = start + window_size
        train = apple.iloc[start:end]
        test = apple.iloc[end:end + prediction_days]
    
        model = XGBRegressor()
        model.fit(train.drop(['Date', 'Target', 'Volume'], axis=1), train['Target'])
    
        pred = model.predict(test.drop(['Date', 'Target', 'Volume'], axis=1))
        predictions.append(pred)
    
        prediction_date = apple.iloc[end + prediction_days - 1]['Date']
        prediction_dates.append(prediction_date)

    predictions = [value[6] for value in predictions]


    df_predictions = pd.DataFrame({
        'Date': prediction_dates,
        'Prediction': predictions
    })
    
    merged_df = stock.merge(df_predictions, on='Date', how='inner')
    
    merged_df = merged_df.drop(columns = ["Target"])
    
    return(merged_df)