In [33]:
import pandas as pd
import plotly.graph_objs as go 
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from tqdm import tqdm 
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV

# Import des données

In [34]:
apple = pd.read_csv("../data/AAPL.csv")
amazon = pd.read_csv("../data/AMZN.csv")
berkshire = pd.read_csv("../data/BRK-B.csv")
google = pd.read_csv("../data/GOOG.csv")
meta = pd.read_csv("../data/META.csv")
microsoft = pd.read_csv("../data/MSFT.csv")
nvidia = pd.read_csv("../data/NVDA.csv")
tesla = pd.read_csv("../data/TSLA.csv")
unitedhealth = pd.read_csv("../data/UNH.csv")
exxon = pd.read_csv("../data/XOM.csv")

In [35]:
titres = {"Apple" : apple, 
          "Amazon" : amazon,
          "Berkshire" : berkshire,
          "Google" : google,
          "Meta" : meta,
          "Microsoft" : microsoft,
          "Nvidia" : nvidia,
          "Tesla" : tesla,
          "UnitedHealth" : unitedhealth,
          "Exxon" : exxon}

In [36]:
apple.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

## test de FE

# Calcul des prédictions

In [37]:
def stock_forecast(stock):

    stock['Date'] = pd.to_datetime(stock['Date'])
    stock['Adj Close'] = stock['Adj Close'].pct_change()
    stock['High'] = stock['High'].pct_change()
    stock['Low'] = stock['Low'].pct_change()
    stock['Volume'] = stock['Volume'].pct_change()
    stock['dailyChange'] = (stock['Adj Close'] - stock['Open']) / stock['Open']

    # stock['Volatility10'] = stock['Adj Close'].rolling(window=10).std()
    stock['VolMA10'] = stock['Volume'].rolling(window=10).mean()

    stock['priceDirection'] = (stock['Adj Close'].shift(-1) - stock['Adj Close'])
    # rsi_period = 10
    # chg = stock['Adj Close'].diff(1)
    # gain = chg.mask(chg<0,0)
    # loss = chg.mask(chg>0,0)
    # avg_gain = gain.ewm(com = rsi_period - 1, min_periods = rsi_period).mean()
    # avg_loss = loss.ewm(com = rsi_period - 1, min_periods = rsi_period).mean()
    # rs = abs(avg_gain/avg_loss)
    # stock['rsi'] = 100-(100/(1+ rs))
    stock['Std_dev']=  stock['Adj Close'].rolling(10).std()

    stock['Williams%R'] = (stock['High'].max() - stock['Adj Close'])/(stock['High'] - stock['Low'].min()) * -100
    stock = stock.replace([np.inf, -np.inf], np.nan)
    # stock.dropna(inplace=True)

    stock.drop(columns =['Open','priceDirection' ], axis=1, inplace=True)
    stock['Volatility_HighLow_10days'] = stock['High'].rolling(window=10).std() / stock['Low'].rolling(window=10).mean()

    stock['Return'] = stock['Close'].pct_change()
    stock = stock.dropna()
    stock = stock.drop(columns=['Adj Close'])

    stock['Target'] = stock['Return'].shift(-10)
    stock = stock.dropna()
    
    window_size = 90
    step_size = 7  
    prediction_days = 7
    
    prediction_dates = []
    predictions = []
  
    features_to_drop = ['Date', 'Target', 'Volume']
    for start in tqdm(range(0, len(stock) - window_size - prediction_days, step_size)):
        end = start + window_size
        train = stock.iloc[start:end]
        test = stock.iloc[end:end + prediction_days]
    
        model = XGBRegressor(n_estimators=400, max_depth=30, learning_rate=0.01, verbosity=0)

        model.fit(train.drop(features_to_drop, axis=1), train['Target'])

    
        pred = model.predict(test.drop(['Date', 'Target', 'Volume'], axis=1))
        predictions.append(pred)
    
        prediction_date = stock.iloc[end + prediction_days - 1]['Date']
        prediction_dates.append(prediction_date)

    predictions = [value[6] for value in predictions]


    df_predictions = pd.DataFrame({
        'Date': prediction_dates,
        'Prediction': predictions
    })
    
    merged_df = stock.merge(df_predictions, on='Date', how='inner')
    
    merged_df = merged_df.drop(columns = ["Target"])
    
    return merged_df

In [38]:
predictions_dic = {}

In [39]:
for key in titres :
    predictions_dic[key] = stock_forecast(titres[key])

  0%|          | 0/379 [00:00<?, ?it/s]

100%|██████████| 379/379 [09:04<00:00,  1.44s/it]
100%|██████████| 379/379 [24:23<00:00,  3.86s/it]   
100%|██████████| 379/379 [12:11<00:00,  1.93s/it] 
100%|██████████| 379/379 [08:19<00:00,  1.32s/it]
100%|██████████| 379/379 [09:00<00:00,  1.43s/it]
100%|██████████| 379/379 [10:46<00:00,  1.71s/it]
100%|██████████| 379/379 [08:50<00:00,  1.40s/it]
100%|██████████| 379/379 [07:29<00:00,  1.19s/it]
100%|██████████| 379/379 [06:13<00:00,  1.01it/s]
100%|██████████| 379/379 [5:24:15<00:00, 51.33s/it]     


In [40]:
dfs_to_concat = []

# Boucle sur chaque DataFrame dans le dictionnaire
for name, df in predictions_dic.items():
    # Assurez-vous que la colonne 'Date' est de type datetime
    df['Date'] = pd.to_datetime(df['Date'])
    print(df.columns)
    # Ajout du DataFrame à la liste
    dfs_to_concat.append(df[['Date', 'Prediction', "Return"]])

# Concaténation des DataFrames
portfolio_predictions = pd.concat(dfs_to_concat)

# Calcul de la prédiction moyenne pour chaque date
portfolio_avg_predictions = portfolio_predictions.groupby('Date').mean().reset_index()

# Affichage des premières lignes pour vérifier
print(portfolio_avg_predictions.head())

Index(['Date', 'High', 'Low', 'Close', 'Volume', 'dailyChange', 'VolMA10',
       'rsi', 'Std_dev', 'Williams%R', 'Volatility_HighLow_10days', 'Return',
       'Prediction'],
      dtype='object')
Index(['Date', 'High', 'Low', 'Close', 'Volume', 'dailyChange', 'VolMA10',
       'rsi', 'Std_dev', 'Williams%R', 'Volatility_HighLow_10days', 'Return',
       'Prediction'],
      dtype='object')
Index(['Date', 'High', 'Low', 'Close', 'Volume', 'dailyChange', 'VolMA10',
       'rsi', 'Std_dev', 'Williams%R', 'Volatility_HighLow_10days', 'Return',
       'Prediction'],
      dtype='object')
Index(['Date', 'High', 'Low', 'Close', 'Volume', 'dailyChange', 'VolMA10',
       'rsi', 'Std_dev', 'Williams%R', 'Volatility_HighLow_10days', 'Return',
       'Prediction'],
      dtype='object')
Index(['Date', 'High', 'Low', 'Close', 'Volume', 'dailyChange', 'VolMA10',
       'rsi', 'Std_dev', 'Williams%R', 'Volatility_HighLow_10days', 'Return',
       'Prediction'],
      dtype='object')
Index(['Date', 

In [41]:
portfolio_avg_predictions['Real_Portfolio_Value'] = 1000 * (1 + portfolio_avg_predictions['Return']).cumprod()
portfolio_avg_predictions['Predicted_Portfolio_Value'] = 1000 * (1 + portfolio_avg_predictions['Prediction']).cumprod()


In [42]:
fig = go.Figure()

# Ajout de la série financière
fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions['Predicted_Portfolio_Value'],
                         mode = 'lines',
                         name ='Return_predict', 
                         fill = "tozeroy"))

fig.add_trace(go.Scatter(x = portfolio_avg_predictions.Date, 
                         y = portfolio_avg_predictions["Real_Portfolio_Value"],
                         mode = 'lines',
                         name ='Return_reel', 
                         fill = "tozeroy"))
# Mise à jour des titres
fig.update_layout(title="Évolution de la valeur de ptf",
                  xaxis_title='Date',
                  yaxis_title='Valeur')

# Affichage du graphique
fig.show()