In [1]:
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing

import warnings
warnings.filterwarnings("ignore")

data = pd.read_excel("C://Users//sar81//Desktop//sample_stock_data.xlsx")

def forecast_model(data, target_variable, exogenous_variables, forecast_steps):
    target = data[target_variable]
    exogenous = data[exogenous_variables]

    # Fit the SARIMAX model
    model = SARIMAX(target, exog=exogenous, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    model_fit = model.fit(disp=False)

    # Forecast the next 'forecast_steps' days for exogenous variables using Exponential Smoothing
    exogenous_forecast = pd.DataFrame()
    for column in exogenous.columns:
        model_exog = ExponentialSmoothing(exogenous[column], trend='add', seasonal='add', seasonal_periods=12)
        model_exog_fit = model_exog.fit()
        exogenous_forecast[column] = model_exog_fit.forecast(steps=forecast_steps)

    # Forecast the next 'forecast_steps' days for the target variable using the forecasted exogenous variables
    forecast = model_fit.forecast(steps=forecast_steps, exog=exogenous_forecast)

    return forecast, exogenous_forecast

def explain_forecast_with_decision_tree(exogenous_forecast, forecast_df):
    # Prepare the data for the Decision Tree Regressor
    X = exogenous_forecast
    y = forecast_df
    
    # Fit the Decision Tree Regressor
    tree_model = DecisionTreeRegressor(random_state=42)
    tree_model.fit(X, y)

    # Feature importance to understand the influence of exogenous variables
    feature_importance = tree_model.feature_importances_
    for i, column in enumerate(exogenous_forecast.columns):
        print(f"Feature: {column}, Importance: {feature_importance[i]}")

# Our target variable is the 'Close' price, and we will use 'Open','High','Low','Volume' as exogenous variables
target = 'Close'
exogenous = ['Open', 'High', 'Low', 'Volume']

# Run forecast
forecast, exogenous_forecast = forecast_model(data, target_variable=target, exogenous_variables=exogenous, forecast_steps=30)

# Print the forecasted values
print(forecast)

# Explain the forecast
explain_forecast_with_decision_tree(exogenous_forecast, forecast)


2201    6.313257e+35
2202    6.516299e+35
2203    6.719270e+35
2204    6.922295e+35
2205    7.125315e+35
2206    7.328207e+35
2207    7.531295e+35
2208    7.734223e+35
2209    7.937112e+35
2210    8.140277e+35
2211    8.343307e+35
2212    8.546259e+35
2213    8.749284e+35
2214    8.952326e+35
2215    9.155297e+35
2216    9.358323e+35
2217    9.561342e+35
2218    9.764234e+35
2219    9.967322e+35
2220    1.017025e+36
2221    1.037314e+36
2222    1.057630e+36
2223    1.077933e+36
2224    1.098229e+36
2225    1.118531e+36
2226    1.138835e+36
2227    1.159132e+36
2228    1.179435e+36
2229    1.199737e+36
2230    1.220026e+36
Name: predicted_mean, dtype: float64
Feature: Open, Importance: 0.8571735269628639
Feature: High, Importance: 0.13771019314673108
Feature: Low, Importance: 0.0024468978537955737
Feature: Volume, Importance: 0.0026693820366095345


In [2]:
# Scenario planning by using post-hoc linear regression model to understand the impact of all exogenous variables on the target variable
from sklearn.linear_model import LinearRegression

# Fit a linear regression model to the original data
linear_model = LinearRegression()
linear_model.fit(exogenous_forecast, forecast)

# Simulate a scenario where 'Open' increases by 10% and predict the impact on 'Close' price
exogenous_scenario_linear = exogenous_forecast.copy()
exogenous_scenario_linear['Open'] *= 1.10  # Simulate a 10% increase in 'Open'
scenario_prediction_linear = linear_model.predict(exogenous_scenario_linear)
print("Scenario Prediction with 10% increase in Open using Linear Regression:")
print(scenario_prediction_linear)


Scenario Prediction with 10% increase in Open using Linear Regression:
[6.51565336e+35 6.72397651e+35 6.93222576e+35 7.14052970e+35
 7.34882728e+35 7.55699811e+35 7.76536320e+35 7.97356959e+35
 8.18173486e+35 8.39018190e+35 8.59849446e+35 8.80672219e+35
 9.01502658e+35 9.22334972e+35 9.43159897e+35 9.63990291e+35
 9.84820050e+35 1.00563713e+36 1.02647364e+36 1.04729428e+36
 1.06811081e+36 1.08895551e+36 1.10978677e+36 1.13060954e+36
 1.15143998e+36 1.17227229e+36 1.19309722e+36 1.21392761e+36
 1.23475737e+36 1.25557445e+36]
