In [None]:
# We import the relevant modules
import pandas as pd
import sklearn
import xgboost
import yfinance as yf
from xgboost import XGBRegressor

# We define the 'Dataset' function, which will output a DataFrame and the list of features which will be used to train the model
def Dataset(ticker, start, end, days_list):
  
  df = yf.download(ticker, start=start, end=end)
  
  # We change the name of the column, from 'Adj Close' to 'adjclose'
  df = df.rename(columns={'Close':'adjclose'})
  
  # We initialize the 'features' list, which will contain 'adjclose' and the other independent variables
  features = ['adjclose']
  
  # We iterate over the elements of 'days_list'
  for day in days_list:
    # We create columns named according to and whose values are obtained by shifting the elements of 'days_list'
    df['adjclose_'+str(day)+'d']=df.adjclose.shift(day)
    # We add the name of the new column to 'features'
    features.append('adjclose_'+str(day)+'d')

  # We ensure that there are no indexes with missing values due to the shift to obtain features
  df = df[df['adjclose_'+str(max(days_list))+'d'].notna()]
  # We shift the 'adjclose' value, so that the 'target' of each day is the following day's 'adjclose'
  df['target'] = df.adjclose.shift(-1)
  # We ensure that there are no indexes with missing values due to the shift to obtain the label
  df = df[df['target'].notna()]

  # We output both the dataset and the list of features
  return df, features

# We obtain the dataset and the list of features for some given input variables, feel free to modify them
df, features = Dataset(ticker='AAPL', start='2010-01-01', end='2020-01-01', days_list=[1,2,3,5,10])

# We define the 'XGBoostTimeSeriesForecastingModel', which will save the trained model
def XGBoostTimeSeriesForecastingModel(df, features, label, perc, n_estimators, learning_rate):

  # We define the number threshold to split the 'train' and 'test' subsets
  n = int(len(df)*(1-perc))
  
  # We split the DataFrame into the 'train' subset
  train = df[:n]
  # We split the DataFrame into the 'test' subset
  test = df[n:]

  # We define the training set into the dependent and independent variable
  X_train, y_train = train[features], train[label]
  # We define the testing set into the dependent and independent variable
  X_test, y_test = test[features], test[label]

  # We declare the model we will be employing and set its hyperparameters
  model = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate)

  # We fit our model to our training data and define the evaluation set as the training and testing subsets 
  model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
  model.evals_result()
  # We save the trained model in a JSON format
  trained_model = model.save_model('XGBoostTimeSeriesForecastingModel.json')
  
  # We download the trained model
  return model

# We call the main function for some values for the hyperparameters, feel free to modify them
model = XGBoostTimeSeriesForecastingModel(df, features, 'target', perc=0.2, n_estimators=2000, learning_rate=0.1)



[*********************100%***********************]  1 of 1 completed

[0]	validation_0-rmse:7.76324	validation_1-rmse:26.82702
[1]	validation_0-rmse:6.99907	validation_1-rmse:25.20412
[2]	validation_0-rmse:6.31003	validation_1-rmse:23.64470
[3]	validation_0-rmse:5.68964	validation_1-rmse:22.24674
[4]	validation_0-rmse:5.13065	validation_1-rmse:21.05218
[5]	validation_0-rmse:4.62757	validation_1-rmse:20.00398
[6]	validation_0-rmse:4.17375	validation_1-rmse:18.97117
[7]	validation_0-rmse:3.76490	validation_1-rmse:18.07504
[8]	validation_0-rmse:3.39651	validation_1-rmse:17.27370
[9]	validation_0-rmse:3.06472	validation_1-rmse:16.52830
[10]	validation_0-rmse:2.76602	validation_1-rmse:15.86172
[11]	validation_0-rmse:2.49718	validation_1-rmse:15.31093





[12]	validation_0-rmse:2.25476	validation_1-rmse:14.77403
[13]	validation_0-rmse:2.03664	validation_1-rmse:14.30324
[14]	validation_0-rmse:1.84056	validation_1-rmse:13.87375
[15]	validation_0-rmse:1.66395	validation_1-rmse:13.49004
[16]	validation_0-rmse:1.50532	validation_1-rmse:13.14709
[17]	validation_0-rmse:1.36287	validation_1-rmse:12.85765
[18]	validation_0-rmse:1.23482	validation_1-rmse:12.58203
[19]	validation_0-rmse:1.11998	validation_1-rmse:12.32099
[20]	validation_0-rmse:1.01707	validation_1-rmse:12.09947
[21]	validation_0-rmse:0.92492	validation_1-rmse:11.90131
[22]	validation_0-rmse:0.84247	validation_1-rmse:11.71100
[23]	validation_0-rmse:0.76894	validation_1-rmse:11.53305
[24]	validation_0-rmse:0.70323	validation_1-rmse:11.37940
[25]	validation_0-rmse:0.64472	validation_1-rmse:11.23497
[26]	validation_0-rmse:0.59284	validation_1-rmse:11.14236
[27]	validation_0-rmse:0.54697	validation_1-rmse:11.04023
[28]	validation_0-rmse:0.50635	validation_1-rmse:10.96707
[29]	validatio

In [16]:
df, features = Dataset(ticker='AAPL', start='2020-01-01', end='2024-01-01', days_list=[1,2,3,5,10])
pred = model.predict(df[features])

pred

[*********************100%***********************]  1 of 1 completed


array([ 6.36887  ,  6.2635274,  5.9545827, ..., 40.12327  , 40.12327  ,
       40.12327  ], shape=(2505,), dtype=float32)

In [11]:
model.evals_result()

{'validation_0': OrderedDict([('rmse',
               [7.7632448896665265,
                6.999068150170133,
                6.310027848433646,
                5.689636395296905,
                5.130654064773812,
                4.627567881109855,
                4.173754986791033,
                3.7649001357590226,
                3.3965110016155755,
                3.064720533735228,
                2.766015680876418,
                2.497177083126409,
                2.2547567446040375,
                2.0366429637083012,
                1.8405613152768916,
                1.6639511247368546,
                1.5053172516976177,
                1.3628714645650646,
                1.2348210731597136,
                1.1199840932028784,
                1.0170661571092596,
                0.9249185956762184,
                0.8424650570868132,
                0.7689431022806315,
                0.7032343166724193,
                0.644718859496137,
                0.5928366005848426,