In [297]:
import pandas as pd 
import numpy as np
import xgboost as xgb

import plotly.graph_objs as go 
import matplotlib.pyplot as plt

In [298]:
apple = pd.read_csv("../data/AAPL.csv", sep=",")

In [299]:
apple.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2013-01-02,19.779285,19.821428,19.343929,19.608213,16.769093,560518000
1,2013-01-03,19.567142,19.631071,19.321428,19.360714,16.557426,352965200
2,2013-01-04,19.1775,19.236786,18.779642,18.821428,16.096228,594333600
3,2013-01-07,18.642857,18.90357,18.4,18.710714,16.001543,484156400
4,2013-01-08,18.900356,18.996071,18.616072,18.76107,16.044611,458707200


In [301]:
apple["Date"] = pd.to_datetime(apple["Date"])

# XGBOOST

In [303]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error
import plotly.express as px

def create_rolling_features(data, window_size=2):
    data.sort_values('Date', inplace=True)
    data.set_index('Date', inplace=True)

    data['Rolling_Close_Mean'] = data['Close'].rolling(f'{window_size*30}D').mean()
    data['Rolling_Close_Std'] = data['Close'].rolling(f'{window_size*30}D').std()

    return data


apple = create_rolling_features(apple)

X = apple.drop("Close", axis=1)
y = apple["Close"]

tscv = TimeSeriesSplit()

predictions_list = []
actual_values_list = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = XGBRegressor(n_estimators=500, max_depth=30, learning_rate=0.1)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    predictions_list.extend(y_pred)
    actual_values_list.extend(y_test)

mae_final = mean_absolute_error(actual_values_list, predictions_list)
print(f'MAE Finale: {mae_final}')

results_df = pd.DataFrame({
    'Date': apple.index[-len(predictions_list):],
    'Actual Values': actual_values_list,
    'Predictions': predictions_list
})


fig = px.line(results_df, x='Date', y=['Actual Values', 'Predictions'],
              labels={'value': 'Valeur', 'variable': 'Type'},
              title='Prédictions vs Valeurs Réelles')

fig.update_traces(line=dict(color='blue'), selector=dict(name='Actual values'))
fig.update_traces(line=dict(color='red'), selector=dict(name='Predictions'))

fig.show()


MAE Finale: 13.859924334189008
