## **Finding Best Windowing Mechanism for ARIMA**

In [7]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', UserWarning)

# Loading the dataset
df = pd.read_csv("Microsoft_Stock.csv")
close_series = df["Close"]

########################### Single Split ###########################

# Train-Test 80-20 split
train_size = round(0.8 * len(close_series))

# Specifying Train and Test data
train_data = close_series[:train_size]
test_data = close_series[train_size:]

# Training ARIMA on the train data with fixed order
arima = ARIMA(endog=train_data, order=(0,1,0))
arima_trained_model = arima.fit()

# Predicting the future prices, using trained ARIMA model
pred_arima = arima_trained_model.predict(start=train_size, end=(len(close_series) - 1))

# Calculating the Error of ARIMA's prediction
pred_err_arima = mean_squared_error(test_data, pred_arima)

# Calculating the average Error of ARIMA's prediction
print(f"ARIMA's prediction error (MSE) for 80-20 Single Split is: {pred_err_arima}")

########################### Sliding Window ###########################

# Sliding window strategy parameters
train_size_list = [7, 30, 100, 30, 100]
pred_size_list = [1, 1, 1, 7, 7]
slide_size_list = [1, 1, 1, 7, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using sliding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test data
        train_data = close_series[(i-train_size):i]
        test_data = close_series[i:(i+pred_size)]

        # Training ARIMA on the train data with fixed order
        arima = ARIMA(endog=train_data, order=(0,1,0))
        arima_trained_model = arima.fit()

        # Predicting the future prices, using trained ARIMA model
        pred_arima = arima_trained_model.predict(start=i, end=(i + pred_size - 1))

        # Calculating the Error of ARIMA's prediction
        pred_err_arima = mean_squared_error(test_data, pred_arima)
        error_list.append(pred_err_arima)

    # Calculating the average Error of ARIMA's prediction
    print(f"Average ARIMA's prediction error (MSE) for Sliding Window with size \
    {train_size}, Prediction Size {pred_size}, and Slide Size {slide_size}: {np.mean(error_list)}")

########################### Expanding Window ###########################

# Expanding window strategy parameters
train_size_list = [30, 100, 100]
pred_size_list = [1, 1, 7]
slide_size_list = [1, 1, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using expanding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test data
        train_data = close_series[:i]
        test_data = close_series[i:(i+pred_size)]

        # Training ARIMA on the train data with fixed order
        arima = ARIMA(endog=train_data, order=(0,1,0))
        arima_trained_model = arima.fit()

        # Predicting the future prices, using trained ARIMA model
        pred_arima = arima_trained_model.predict(start=i, end=(i + pred_size - 1))

        # Calculating the Error of ARIMA's prediction
        pred_err_arima = mean_squared_error(test_data, pred_arima)
        error_list.append(pred_err_arima)

    # Calculating the average Error of ARIMA's prediction
    print(f"Average ARIMA's prediction error (MSE) for Expanding Window with intitial size \
    {train_size}, Prediction Size {pred_size}, and Expand Size {slide_size}: {np.mean(error_list)}")

ARIMA's prediction error (MSE) for 80-20 Single Split is: 1870.9150672185433


100%|██████████| 1504/1504 [00:30<00:00, 49.82it/s]


Average ARIMA's prediction error (MSE) for Sliding Window with size     7, Prediction Size 1, and Slide Size 1: 5.586767752659575


100%|██████████| 1481/1481 [00:28<00:00, 52.18it/s]


Average ARIMA's prediction error (MSE) for Sliding Window with size     30, Prediction Size 1, and Slide Size 1: 5.654279945982443


100%|██████████| 1411/1411 [00:27<00:00, 51.54it/s]


Average ARIMA's prediction error (MSE) for Sliding Window with size     100, Prediction Size 1, and Slide Size 1: 5.916390503189229


100%|██████████| 211/211 [00:04<00:00, 49.51it/s]


Average ARIMA's prediction error (MSE) for Sliding Window with size     30, Prediction Size 7, and Slide Size 7: 17.115529451591065


100%|██████████| 201/201 [00:03<00:00, 50.98it/s]


Average ARIMA's prediction error (MSE) for Sliding Window with size     100, Prediction Size 7, and Slide Size 7: 17.93261336176262


100%|██████████| 1481/1481 [00:23<00:00, 61.76it/s]


Average ARIMA's prediction error (MSE) for Expanding Window with intitial size     30, Prediction Size 1, and Expand Size 1: 5.654279945982445


100%|██████████| 1411/1411 [00:23<00:00, 60.62it/s]


Average ARIMA's prediction error (MSE) for Expanding Window with intitial size     100, Prediction Size 1, and Expand Size 1: 5.916390503189229


100%|██████████| 201/201 [00:03<00:00, 61.84it/s]

Average ARIMA's prediction error (MSE) for Expanding Window with intitial size     100, Prediction Size 7, and Expand Size 7: 17.93261336176262





## **Finding Best Windowing Mechanism for Exponential Smoothing**

In [8]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import SimpleExpSmoothing
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', UserWarning)

# Loading the dataset
df = pd.read_csv("Microsoft_Stock.csv")
close_series = df["Close"]

########################### Single Split ###########################

# Train-Test 80-20 split
train_size = round(0.8 * len(close_series))

# Specifying Train and Test data
train_data = close_series[:train_size]
test_data = close_series[train_size:]

# Training Exponential Smoothing on the train data with fixed smoothing_level
exp_smoothing = SimpleExpSmoothing(train_data)
exp_trained_model = exp_smoothing.fit(smoothing_level=0.8)

# Predicting the future prices, using trained Exponential Smoothing model
pred_exp = exp_trained_model.forecast(len(test_data))

# Calculating the Error of Exponential Smoothing's prediction
pred_err_exp = mean_squared_error(test_data, pred_exp)

# Calculating the average Error of Exponential Smoothing's prediction
print(f"Exponential Smoothing's prediction error (MSE) for 80-20 Single Split is: {pred_err_exp}")

########################### Sliding Window ###########################

# Sliding window strategy parameters
train_size_list = [7, 30, 100, 30, 100]
pred_size_list = [1, 1, 1, 7, 7]
slide_size_list = [1, 1, 1, 7, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using sliding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test data
        train_data = close_series[(i-train_size):i]
        test_data = close_series[i:(i+pred_size)]

        # Training Exponential Smoothing on the train data with fixed smoothing_level
        exp_smoothing = SimpleExpSmoothing(train_data)
        exp_trained_model = exp_smoothing.fit(smoothing_level=0.8)

        # Predicting the future prices, using trained Exponential Smoothing model
        pred_exp = exp_trained_model.forecast(len(test_data))

        # Calculating the Error of Exponential Smoothing's prediction
        pred_err_exp = mean_squared_error(test_data, pred_exp)
        error_list.append(pred_err_exp)

    # Calculating the average Error of Exponential Smoothing's prediction
    print(f"Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size \
    {train_size}, Prediction Size {pred_size}, and Slide Size {slide_size}: {np.mean(error_list)}")

########################### Expanding Window ###########################

# Expanding window strategy parameters
train_size_list = [30, 100, 100]
pred_size_list = [1, 1, 7]
slide_size_list = [1, 1, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using expanding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test data
        train_data = close_series[:i]
        test_data = close_series[i:(i+pred_size)]

        # Training Exponential Smoothing on the train data with fixed smoothing_level
        exp_smoothing = SimpleExpSmoothing(train_data)
        exp_trained_model = exp_smoothing.fit(smoothing_level=0.8)

        # Predicting the future prices, using trained Exponential Smoothing model
        pred_exp = exp_trained_model.forecast(len(test_data))

        # Calculating the Error of Exponential Smoothing's prediction
        pred_err_exp = mean_squared_error(test_data, pred_exp)
        error_list.append(pred_err_exp)

    # Calculating the average Error of Exponential Smoothing's prediction
    print(f"Average Exponential Smoothing's prediction error (MSE) for Expanding Window with initial size \
    {train_size}, Prediction Size {pred_size}, and Expand Size {slide_size}: {np.mean(error_list)}")

Exponential Smoothing's prediction error (MSE) for 80-20 Single Split is: 1892.9343169631973


100%|██████████| 1504/1504 [00:02<00:00, 601.23it/s]


Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size     7, Prediction Size 1, and Slide Size 1: 5.227991150111377


100%|██████████| 1481/1481 [00:02<00:00, 610.97it/s]


Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size     30, Prediction Size 1, and Slide Size 1: 5.288308559748616


100%|██████████| 1411/1411 [00:02<00:00, 601.76it/s]


Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size     100, Prediction Size 1, and Slide Size 1: 5.531357621364127


100%|██████████| 211/211 [00:00<00:00, 624.11it/s]


Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size     30, Prediction Size 7, and Slide Size 7: 16.642436933242326


100%|██████████| 201/201 [00:00<00:00, 597.02it/s]


Average Exponential Smoothing's prediction error (MSE) for Sliding Window with size     100, Prediction Size 7, and Slide Size 7: 17.43480790075782


100%|██████████| 1481/1481 [00:03<00:00, 433.35it/s]


Average Exponential Smoothing's prediction error (MSE) for Expanding Window with initial size     30, Prediction Size 1, and Expand Size 1: 5.288308559748616


100%|██████████| 1411/1411 [00:03<00:00, 444.88it/s]


Average Exponential Smoothing's prediction error (MSE) for Expanding Window with initial size     100, Prediction Size 1, and Expand Size 1: 5.531357621364127


100%|██████████| 201/201 [00:00<00:00, 436.35it/s]

Average Exponential Smoothing's prediction error (MSE) for Expanding Window with initial size     100, Prediction Size 7, and Expand Size 7: 17.43480790075782





## **Finding Best Windowing Mechanism for Random Forest**

In [9]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

# Loading the dataset
df = pd.read_csv("Microsoft_Stock.csv")
close_series = df["Close"]

########################### Single Split ###########################

# Train-Test 80-20 split
train_size = round(0.8 * len(close_series))

# Specifying Train and Test Inputs
train_input = np.arange(train_size)
test_input = np.arange(train_size,len(close_series))


# Specifying Train and Test targets
train_data = close_series[:train_size]
test_data = close_series[train_size:]

# Training Random Forest on the train data with fixed n_estimators and random_state
rf = RandomForestRegressor(n_estimators=150, random_state=42)
rf.fit(train_input.reshape(-1,1), train_data)

# Predicting the future prices, using trained Random Forest model
pred_rf = rf.predict(test_input.reshape(-1, 1))

# Calculating the Error of Random Forest's prediction
pred_err_rf = mean_squared_error(test_data, pred_rf)

# Calculating the average Error of Random Forest's prediction
print(f"Random Forest's prediction error (MSE) for 80-20 Single Split is: {pred_err_rf}")

########################### Sliding Window ###########################

# Sliding window strategy parameters
train_size_list = [7, 30, 100, 30, 100]
pred_size_list = [1, 1, 1, 7, 7]
slide_size_list = [1, 1, 1, 7, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using sliding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test Inputs
        train_input = np.arange((i-train_size),i)
        test_input = np.arange(i,(i+pred_size))

        # Specifying Train and Test targets
        train_data = close_series[(i-train_size):i]
        test_data = close_series[i:(i+pred_size)]

        # Training Random Forest on the train data with fixed n_estimators and random_state
        rf = RandomForestRegressor(n_estimators=150, random_state=42)
        rf.fit(train_input.reshape(-1,1), train_data)

        # Predicting the future prices, using trained Random Forest model
        pred_rf = rf.predict(test_input.reshape(-1, 1))

        # Calculating the Error of Random Forest's prediction
        pred_err_rf = mean_squared_error(test_data, pred_rf)
        error_list.append(pred_err_rf)

    # Calculating the average Error of Random Forest's prediction
    print(f"Average Random Forest's prediction error (MSE) for Sliding Window with size \
    {train_size}, Prediction Size {pred_size}, and Slide Size {slide_size}: {np.mean(error_list)}")

########################### Expanding Window ###########################

# Expanding window strategy parameters
train_size_list = [30, 100, 100]
pred_size_list = [1, 1, 7]
slide_size_list = [1, 1, 7]


# The fine-tuning loop
for train_size, pred_size, slide_size in zip(train_size_list, pred_size_list, slide_size_list):
    # Error list
    error_list = []

    # Looping over the data using expanding window
    for i in tqdm(range(train_size, len(close_series) - pred_size + 1, slide_size)):
        # Specifying Train and Test Inputs
        train_input = np.arange(i)
        test_input = np.arange(i,(i+pred_size))

        # Specifying Train and Test targets
        train_data = close_series[:i]
        test_data = close_series[i:(i+pred_size)]

        # Training Random Forest on the train data with fixed n_estimators and random_state
        rf = RandomForestRegressor(n_estimators=150, random_state=42)
        rf.fit(train_input.reshape(-1,1), train_data)

        # Predicting the future prices, using trained Random Forest model
        pred_rf = rf.predict(test_input.reshape(-1, 1))

        # Calculating the Error of Random Forest's prediction
        pred_err_rf = mean_squared_error(test_data, pred_rf)
        error_list.append(pred_err_rf)

    # Calculating the average Error of Random Forest's prediction
    print(f"Average Random Forest's prediction error (MSE) for Expanding Window with intitial size \
    {train_size}, Prediction Size {pred_size}, and Expand Size {slide_size}: {np.mean(error_list)}")

Random Forest's prediction error (MSE) for 80-20 Single Split is: 1920.0768251471586


100%|██████████| 1504/1504 [03:05<00:00,  8.10it/s]


Average Random Forest's prediction error (MSE) for Sliding Window with size     7, Prediction Size 1, and Slide Size 1: 5.208270037644769


100%|██████████| 1481/1481 [03:04<00:00,  8.02it/s]


Average Random Forest's prediction error (MSE) for Sliding Window with size     30, Prediction Size 1, and Slide Size 1: 5.312137955081355


100%|██████████| 1411/1411 [03:01<00:00,  7.79it/s]


Average Random Forest's prediction error (MSE) for Sliding Window with size     100, Prediction Size 1, and Slide Size 1: 5.627801259733792


100%|██████████| 211/211 [00:26<00:00,  8.10it/s]


Average Random Forest's prediction error (MSE) for Sliding Window with size     30, Prediction Size 7, and Slide Size 7: 16.513817194216642


100%|██████████| 201/201 [00:25<00:00,  7.82it/s]


Average Random Forest's prediction error (MSE) for Sliding Window with size     100, Prediction Size 7, and Slide Size 7: 17.30115693926888


100%|██████████| 1481/1481 [04:42<00:00,  5.25it/s]


Average Random Forest's prediction error (MSE) for Expanding Window with intitial size     30, Prediction Size 1, and Expand Size 1: 5.390008503467574


100%|██████████| 1411/1411 [04:35<00:00,  5.12it/s]


Average Random Forest's prediction error (MSE) for Expanding Window with intitial size     100, Prediction Size 1, and Expand Size 1: 5.636422542981283


100%|██████████| 201/201 [00:38<00:00,  5.15it/s]

Average Random Forest's prediction error (MSE) for Expanding Window with intitial size     100, Prediction Size 7, and Expand Size 7: 17.312980438237467



