In [4]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import pmdarima as pm

# Function to download data for a list of tickers
def download_and_adjust_data(tickers, start_date, end_date):
    dataframes = {}
    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        data["Adjustment Multiplier"] = data["Adj Close"] / data["Close"]
        data["Adj Open"] = data["Open"] * data["Adjustment Multiplier"]
        data["returns"] = ((data["Adj Open"] - data["Adj Close"].shift(1)) / data["Adj Close"].shift(1)).fillna(0)
        dataframes[ticker] = data
    return dataframes

# Function to merge and prepare returns
def prepare_merged_data(dataframes):
    Merged = pd.concat([dataframes[ticker].add_prefix(f"{ticker}_") for ticker in dataframes], axis=1).dropna()
    returns_columns = [f"{ticker}_returns" for ticker in dataframes]

    if "SPY" in dataframes:
        returns_columns.append("SP500_returns")
        Merged["SP500_returns"] = Merged["SPY_returns"].shift(-1).dropna()

    Merged_Returns = Merged[returns_columns].copy().dropna()
    Merged_Returns = round(Merged_Returns * 100, 2)
    return Merged_Returns.tail(30)

# Function to train the ARIMAX model
def train_arimax_model(X, Y):
    model = pm.arima.auto_arima(y=Y, X=X)
    return model

# Function to make a prediction
def make_prediction(model, tickers, production_start_date, production_end_date):
    production_dataframes = download_and_adjust_data(tickers, production_start_date, production_end_date)
    
    Production_Merged = pd.concat(
        [production_dataframes[ticker].add_prefix(f"{ticker}_") for ticker in production_dataframes],
        axis=1
    ).dropna()

    # Ensure all necessary returns columns are included
    returns_columns = [f"{ticker}_returns" for ticker in production_dataframes]
    Production_Merged_Returns = Production_Merged[returns_columns].copy().dropna()
    Production_Shifted_Merged_Returns = round(Production_Merged_Returns.tail(1) * 100, 2)
    SP500_Production_Dataset = Production_Shifted_Merged_Returns.copy()
    print(f"{SP500_Production_Dataset}")
    # Ensure the shape matches the model's expected exogenous input shape
    prediction = model.predict(X=SP500_Production_Dataset, n_periods=1)
    next_day = (SP500_Production_Dataset.index[0] + timedelta(days=1)).strftime("%Y-%m-%d")
    print(f"The model predicts the S&P 500 to return {prediction[0]:.2f}% on the open of {next_day}")


In [5]:
start_date = "2024-09-01"
end_date = (datetime.today() + timedelta(days=1)).strftime("%Y-%m-%d")
tickers = ["SPY", "XLC", "XLY", "XLP", "XLE", "XLF", "XLV", "XLI", "XLB", "XLRE", "XLK", "XLU", "^VIX"]

# Step 1: Download and process the data
dataframes = download_and_adjust_data(tickers, start_date, end_date)

# Step 2: Prepare merged returns
One_Month_Merged_Returns = prepare_merged_data(dataframes)

# Step 3: Train the ARIMAX model
X = One_Month_Merged_Returns.drop(["SPY_returns", "SP500_returns"], axis=1)
Y = One_Month_Merged_Returns["SP500_returns"]

print(X.columns)
print(Y)
model = train_arimax_model(X, Y)

# Step 4: Make a production prediction
production_start_date = (datetime.today() - timedelta(days=4)).strftime("%Y-%m-%d")
production_end_date = (datetime.today() + timedelta(days=1)).strftime("%Y-%m-%d")
make_prediction(model, tickers[1:], production_start_date, production_end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Index(['XLC_returns', 'XLY_returns', 'XLP_returns', 'XLE_returns',
       'XLF_returns', 'XLV_returns', 'XLI_returns', 'XLB_returns',
       'XLRE_returns', 'XLK_returns', 'XLU_returns', '^VIX_returns'],
      dtype='object')
Date
2024-10-04   -0.29
2024-10-07    0.46
2024-10-08   -0.00
2024-10-09   -0.24
2024-10-10   -0.01
2024-10-11    0.28
2024-10-14    0.05
2024-10-15    0.00
2024-10-16    0.62
2024-10-17    0.30
2024-10-18   -0.13
2024-10-21   -0.44
2024-10-22   -0.35
2024-10-23    0.34
2024-10-24    0.39
2024-10-25    0.61
2024-10-28   -0.17
2024-10-29   -0.08
2024-10-30   -0.77
2024-10-31    0.47
2024-11-01    0.02
2024-11-04    0.16
2024-11-05    2.17
2024-11-06    0.35
2024-11-07    0.09
2024-11-08    0.27
2024-11-11   -0.01
2024-11-12    0.08
2024-11-13    0.02
2024-11-14   -0.61
Name: SP500_returns, dtype: float64


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

            XLC_returns  XLY_returns  XLP_returns  XLE_returns  XLF_returns  \
Date                                                                          
2024-11-15         -0.5        -0.56        -0.09        -0.09          0.0   

            XLV_returns  XLI_returns  XLB_returns  XLRE_returns  XLK_returns  \
Date                                                                           
2024-11-15        -0.55        -0.35        -0.04         -0.47        -1.24   

            XLU_returns  ^VIX_returns  
Date                                   
2024-11-15         0.08          4.96  
The model predicts the S&P 500 to return -0.23% on the open of 2024-11-16



  print(f"The model predicts the S&P 500 to return {prediction[0]:.2f}% on the open of {next_day}")
