In [9]:
import pandas as pd
import plotly.graph_objs as go 
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from tqdm import tqdm 
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV

# Import des données

In [10]:
apple = pd.read_csv("../data/AAPL.csv")
amazon = pd.read_csv("../data/AMZN.csv")
berkshire = pd.read_csv("../data/BRK-B.csv")
google = pd.read_csv("../data/GOOG.csv")
meta = pd.read_csv("../data/META.csv")
microsoft = pd.read_csv("../data/MSFT.csv")
nvidia = pd.read_csv("../data/NVDA.csv")
tesla = pd.read_csv("../data/TSLA.csv")
unitedhealth = pd.read_csv("../data/UNH.csv")
exxon = pd.read_csv("../data/XOM.csv")

In [11]:
titres = {"Apple" : apple, 
          "Amazon" : amazon,
          "Berkshire" : berkshire,
          "Google" : google,
          "Meta" : meta,
          "Microsoft" : microsoft,
          "Nvidia" : nvidia,
          "Tesla" : tesla,
          "UnitedHealth" : unitedhealth,
          "Exxon" : exxon}

In [12]:
apple.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

## test de FE

# Calcul des prédictions

In [13]:

def stock_forecast(stock):
    # Prétraitement des données
    stock['Date'] = pd.to_datetime(stock['Date'])
    stock['Adj Close'] = stock['Adj Close'].pct_change()
    stock['High'] = stock['High'].pct_change()
    stock['Low'] = stock['Low'].pct_change()
    stock['Volume'] = stock['Volume'].pct_change()
    stock['dailyChange'] = (stock['Adj Close'] - stock['Open']) / stock['Open']
    stock['VolMA10'] = stock['Volume'].rolling(window=10).mean()
    stock['priceDirection'] = (stock['Adj Close'].shift(-1) - stock['Adj Close'])
    stock['Std_dev'] = stock['Adj Close'].rolling(10).std()
    stock['Williams%R'] = (stock['High'].max() - stock['Adj Close']) / (stock['High'].max() - stock['Low'].min()) * -100
    stock['Volatility_HighLow_10days'] = stock['High'].rolling(window=10).std() / stock['Low'].rolling(window=10).mean()
    stock['Return'] = stock['Close'].pct_change()
    stock = stock.replace([np.inf, -np.inf], np.nan).dropna()
    stock['Target'] = stock['Return'].shift(-10)
    stock = stock.dropna()

    train_size = int(len(stock) * 0.8)
    train_stock = stock.iloc[:train_size]
    test_stock = stock.iloc[train_size:]

    window_size = 90
    prediction_days = 7
    features_to_drop = ['Date', 'Target', 'Volume']
    prediction_dates = []
    predictions = []
    for start in tqdm(range(0, len(train_stock) - window_size - prediction_days)):
        end = start + window_size
        train = train_stock.iloc[start:end]
        test = train_stock.iloc[end:end + prediction_days]

        model = XGBRegressor(n_estimators=400, max_depth=30, learning_rate=0.01, verbosity=0)
        model.fit(train.drop(features_to_drop, axis=1), train['Target'])

        pred = model.predict(test.drop(['Date', 'Target', 'Volume'], axis=1))
        predictions.append(pred)

        prediction_date = train_stock.iloc[end + prediction_days - 1]['Date']
        prediction_dates.append(prediction_date)

    predictions = [value[6] for value in predictions]
    df_predictions = pd.DataFrame({'Date': prediction_dates, 'Prediction': predictions})
    merged_train_df = train_stock.merge(df_predictions, on='Date', how='inner').drop(columns=["Target"])

    return merged_train_df, test_stock

In [14]:
predictions_dic = {}

In [15]:
predictions_dic = {}
for key in titres:
    predictions_dic[key], _ = stock_forecast(titres[key])


  0%|          | 0/2100 [00:00<?, ?it/s]

100%|██████████| 2100/2100 [1:57:48<00:00,  3.37s/it]      
100%|██████████| 2100/2100 [27:55<00:00,  1.25it/s]
100%|██████████| 2100/2100 [10:38<00:00,  3.29it/s]
100%|██████████| 2100/2100 [10:58<00:00,  3.19it/s]
100%|██████████| 2100/2100 [11:40<00:00,  3.00it/s]
 82%|████████▏ | 1726/2100 [18:51:25<11:39,  1.87s/it]      

In [None]:
dfs_to_concat = []
for name, df in predictions_dic.items():
    df['Date'] = pd.to_datetime(df['Date'])
    dfs_to_concat.append(df[['Date', 'Prediction', "Return"]])

portfolio_predictions = pd.concat(dfs_to_concat)
portfolio_avg_predictions = portfolio_predictions.groupby('Date').mean().reset_index()

print(portfolio_avg_predictions.head())

In [None]:
portfolio_avg_predictions['Real_Portfolio_Value'] = 1000 * (1 + portfolio_avg_predictions['Return']).cumprod()
portfolio_avg_predictions['Predicted_Portfolio_Value'] = 1000 * (1 + portfolio_avg_predictions['Prediction']).cumprod()

In [None]:
fig = go.Figure()

# Ajout de la série financière
fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions['Predicted_Portfolio_Value'],
                         mode = 'lines',
                         name ='Return_predict', 
                         fill = "tozeroy"))

fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions["Real_Portfolio_Value"],
                         mode = 'lines',
                         name ='Return_reel', 
                         fill = "tozeroy"))
# Mise à jour des titres
fig.update_layout(title="Évolution de la valeur de ptf",
                  xaxis_title='Date',
                  yaxis_title='Valeur')

# Affichage du graphique
fig.show()