In [11]:
#@title Packages

import random
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy import stats
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score  
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, GRU, Dense, LeakyReLU, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adagrad, Adadelta
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError
from tensorflow.keras.backend import sqrt, mean, square
import tensorflow.keras.backend as K

In [12]:
#@tile Read and Prepare Data

def read_prepare_data(symbol):
    #read
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    train = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/train.csv')
    test = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/test.csv')
    
    #we're going to use only one symbol
    data = data[data['Symbol'] == symbol].copy()
    train = train[train['Symbol'] == symbol].copy()
    test = test[test['Symbol'] == symbol].copy()
    
    #we're going to use the price variable
    data = data[['Date', 'Close']].copy()
    train = train[['Date', 'Close']].copy()
    test = test[['Date', 'Close']].copy()
    
    #set date as index
    data.set_index('Date', inplace=True)
    train.set_index('Date', inplace=True)
    test.set_index('Date', inplace=True)

    #normalize
    scaler = MinMaxScaler(feature_range=(0, 1))
    train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index) 

    return scaler, data, train, test

scaler, data, train, test = read_prepare_data('AAPL')

#verify
#print(data.head())
#print(data.index)   
#print(data.columns)

In [13]:
#@title Create Dataset

def create_dataset(dataframe, look_back):
    dataset = dataframe.values
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
        
    return np.array(dataX), np.array(dataY)

In [14]:
#@title Reshape

def reshape(train, test, look_back):
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], trainX.shape[2]))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], testX.shape[2]))

    return trainX, trainY, testX, testY

In [15]:
#@title Forecast

def forecast_values(testY, look_back, horizon, model):
    testY_copy = testY.copy()
    for val in range(0, horizon+1):
        a = testY_copy[-(1+look_back):-1]
        a = np.reshape(a, (1, look_back, 1)) 
        a_predict = model.predict(a, verbose=0)[0]
        a_predict = np.reshape(a_predict, (1, 1))
        testY_copy = np.concatenate((testY_copy, a_predict), axis=0)
    
    forecast = testY_copy[len(testY):]
    return forecast

In [16]:
#@title Auxiliary Function

def predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model):
    #make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    #forecast
    forecast = forecast_values(testY, look_back, horizon, model)

    #invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform(trainY)
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform(testY)
    forecast = scaler.inverse_transform(forecast)

    #calculate root mean squared error
    trainScore = np.sqrt(mean_squared_error(trainY, trainPredict))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = np.sqrt(mean_squared_error(testY, testPredict))
    print('Test Score: %.2f RMSE' % (testScore))

    #plot predictions
    if plot_predictions==True: 
        #shift train predictions for plotting
        trainPredictPlot = np.empty_like(data)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
        
        #shift test predictions for plotting
        testPredictPlot = np.empty_like(data)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[len(trainPredict)+(look_back*2)+1:len(data)-1, :] = testPredict
        
        #shift forecast for plotting
        forecastPlot = np.empty_like(pd.concat([data, pd.DataFrame(forecast)]))
        forecastPlot[:, :] = np.nan
        forecastPlot[len(data):len(forecastPlot),:] = forecast
        
        #plot baseline, predictions and forecast
        plt.figure(figsize=(15,7))
        plt.plot(scaler.inverse_transform(data), label='real')
        plt.plot(trainPredictPlot, label='train set prediction')
        plt.plot(testPredictPlot, label='test set prediction')
        plt.plot(forecastPlot, label='forecast')
        plt.legend()
        plt.show()

    return testScore

In [23]:
#@title Train and Predict

def rmse_loss(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))
    
def model(data, train, test, look_back=30, nepochs=50, horizon=7, plot_predictions=False, 
          batch_size=1, learning_rate=0.001, optimizer='adam', activation='relu', loss='mse'):
    
    #reshape 
    trainX, trainY, testX, testY = reshape(train, test, look_back)

    #build model
    input_layer = Input(shape=(trainX.shape[1], trainX.shape[2]))

    if activation == 'leaky_relu':
        x = LSTM(16)(input_layer)
        x = LeakyReLU(alpha=0.01)(x)
    else:
        x = LSTM(16, activation=activation)(input_layer)

    output = Dense(1, activation='linear')(x)
    model_instance = Model(inputs=input_layer, outputs=output)

    #optimizer
    optimizer = optimizer.lower()
    optimizers_dict = {
        'adam': Adam(learning_rate=learning_rate),
        'sgd': SGD(learning_rate=learning_rate),
        'rmsprop': RMSprop(learning_rate=learning_rate),
        'adagrad': Adagrad(learning_rate=learning_rate),
        'adadelta': Adadelta(learning_rate=learning_rate)
    }
    if optimizer not in optimizers_dict:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    opt = optimizers_dict[optimizer]

    #loss function
    loss_map = {
        'mse': MeanSquaredError(),
        'mean_squared_error': MeanSquaredError(),
        'rmse': rmse_loss,
        'mae': MeanAbsoluteError(),
        'mean_absolute_error': MeanAbsoluteError(),
        'mape': MeanAbsolutePercentageError(),
        'mean_absolute_percentage_error': MeanAbsolutePercentageError()
    }
    loss = loss.lower()
    if loss not in loss_map:
        raise ValueError(f"Unsupported loss function: {loss}")
    loss_fn = loss_map[loss]

    #compile model
    model_instance.compile(loss=loss_fn, optimizer=opt)

    #train
    model_instance.fit(trainX, trainY, epochs=nepochs, batch_size=batch_size, verbose=1)

    #predict and evaluate
    testScore = predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model_instance)

    return testScore
    
#model(data, train, test, look_back=30, nepochs=50, horizon=7, plot_predictions=False, batch_size=1, learning_rate=0.001, optimizer='adam', activation='relu', loss='mean_squared_error')

In [24]:
#@title Optimize Outlier Handling Techniques - PART 1

def winsorization(data, lower_percentile=5, upper_percentile=95):
    data_winsorized = data.copy()
    for column in data.columns:
        lower_bound = np.percentile(data[column], lower_percentile)
        upper_bound = np.percentile(data[column], upper_percentile)
        data_winsorized[column] = np.clip(data[column], lower_bound, upper_bound)
    return data_winsorized

def clipping(data, lower_percentile=1, upper_percentile=99):
    data_clipped = data.copy()
    for column in data.columns:
        lower_bound = np.percentile(data[column], lower_percentile)
        upper_bound = np.percentile(data[column], upper_percentile)
        data_clipped[column] = np.clip(data[column], lower_bound, upper_bound)
    return data_clipped

def read_prepare_data_with_outliers(symbol, outlier_technique=None, **kwargs):
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    train = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/train.csv')
    test = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/test.csv')

    data = data[data['Symbol'] == symbol].copy()
    train = train[train['Symbol'] == symbol].copy()
    test = test[test['Symbol'] == symbol].copy()

    data = data[['Date', 'Close']].copy()
    train = train[['Date', 'Close']].copy()
    test = test[['Date', 'Close']].copy()

    data.set_index('Date', inplace=True)
    train.set_index('Date', inplace=True)
    test.set_index('Date', inplace=True)

    if outlier_technique == 'winsorization':
        train = winsorization(train, **kwargs)
        print(f"Applied winsorization to training data with parameters: {kwargs}")
    elif outlier_technique == 'clipping':
        train = clipping(train, **kwargs)
        print(f"Applied clipping to training data with parameters: {kwargs}")

    scaler = MinMaxScaler(feature_range=(0, 1))
    train_normalized = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test_normalized = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data_normalized = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index)

    return scaler, data_normalized, train_normalized, test_normalized

In [26]:
#@title Optimize Outlier Handling Techniques - PART 2

def run_outlier_experiments(symbol='AAPL', n_runs=5, look_back=30, nepochs=50, horizon=7):
    configurations = [
        {'name': 'Winsorization (5%-95%)', 'technique': 'winsorization',
         'params': {'lower_percentile': 5, 'upper_percentile': 95}},
        {'name': 'Clipping (1%-99%)', 'technique': 'clipping',
         'params': {'lower_percentile': 1, 'upper_percentile': 99}}
    ]

    results = {}

    for config in configurations:
        config_scores = []

        for run in range(n_runs):
            print(f"\nRun {run + 1}/{n_runs} for {config['name']}")
            try:
                scaler, data_normalized, train_normalized, test_normalized = read_prepare_data_with_outliers(
                    symbol=symbol, outlier_technique=config['technique'], **config['params']
                )
                score = model(
                    data_normalized, train_normalized, test_normalized,
                    look_back=look_back,
                    nepochs=nepochs,
                    horizon=horizon,
                    plot_predictions=False,
                    batch_size=1,
                    learning_rate=0.001,
                    optimizer='adam',
                    activation='relu',
                    loss='mse'
                )
                config_scores.append(score)

            except Exception as e:
                print(f"Error in run {run + 1}: {str(e)}")
                continue

        if config_scores:
            results[config['name']] = {
                'scores': config_scores,
                'mean': np.mean(config_scores)
            }
            print(f"\n{config['name']} Results:")
            print(f"Mean RMSE: {results[config['name']]['mean']:.2f}\n")
        else:
            print(f"No successful runs for {config['name']}")

    return results

results = run_outliers_experiments(symbol='AAPL', n_runs=5)
print("Final results for Outliers Techniques:")
for config_name, config_results in results.items():
    print(f"{config_name}: Mean RMSE = {config_results['mean']:.2f}")
best_config = min(results.items(), key=lambda x: x[1]['mean'])
print(f"\nBest Configuration: {best_config[0]}")
print(f"Best Mean RMSE: {best_config[1]['mean']:.2f}")

Testing Configuration: No Outlier Handling

Run 1/1 for No Outlier Handling
Epoch 1/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.0186
Epoch 2/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0011
Epoch 3/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 9.2172e-04
Epoch 4/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.4277e-04
Epoch 5/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 6.4017e-04
Epoch 6/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 6.4921e-04
Epoch 7/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 5.4543e-04
Epoch 8/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 5.2642e-04
Epoch 9/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[

In [27]:
#@title Optimize Smoothing Techniques - PART 1

def simple_moving_average(data, window=7):
    data_sma = data.copy()
    for column in data.columns:
        data_sma[column] = data[column].rolling(window=window, center=True).mean()
        data_sma[column] = data_sma[column].fillna(data[column])
    return data_sma

def rolling_median(data, window=7):
    data_rmedian = data.copy()
    for column in data.columns:
        data_rmedian[column] = data[column].rolling(window=window, center=True).median()
        data_rmedian[column] = data_rmedian[column].fillna(data[column])
    return data_rmedian

def gaussian_filter(data, sigma=1):
    from scipy.ndimage import gaussian_filter1d
    data_gaussian = data.copy()
    for column in data.columns:
        data_gaussian[column] = gaussian_filter1d(data[column].values, sigma=sigma)
    return data_gaussian

def savitzky_golay_filter(data, window_length=7, polyorder=2):
    from scipy.signal import savgol_filter
    data_savgol = data.copy()
    for column in data.columns:
        if window_length % 2 == 0:
            window_length += 1
        if window_length <= polyorder:
            window_length = polyorder + 2
        data_savgol[column] = savgol_filter(data[column].values, window_length, polyorder)
    return data_savgol

def read_prepare_data_with_smoothing(symbol, smoothing_technique=None, **kwargs):
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    train = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/train.csv')
    test = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/test.csv')
    
    data = data[data['Symbol'] == symbol].copy()
    train = train[train['Symbol'] == symbol].copy()
    test = test[test['Symbol'] == symbol].copy()
    
    data = data[['Date', 'Close']].copy()
    train = train[['Date', 'Close']].copy()
    test = test[['Date', 'Close']].copy()
    
    data.set_index('Date', inplace=True)
    train.set_index('Date', inplace=True)
    test.set_index('Date', inplace=True)
        
    if smoothing_technique == 'sma':
        train = simple_moving_average(train, **kwargs)
        print(f"Applied Simple Moving Average to training data with parameters: {kwargs}")
    elif smoothing_technique == 'rolling_median':
        train = rolling_median(train, **kwargs)
        print(f"Applied Rolling Median to training data with parameters: {kwargs}")
    elif smoothing_technique == 'gaussian':
        train = gaussian_filter(train, **kwargs)
        print(f"Applied Gaussian Filter to training data with parameters: {kwargs}")
    elif smoothing_technique == 'savgol':
        train = savitzky_golay_filter(train, **kwargs)
        print(f"Applied Savitzky-Golay Filter to training data with parameters: {kwargs}")
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_normalized = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test_normalized = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data_normalized = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index)
    
    return scaler, data_normalized, train_normalized, test_normalized

In [30]:
#@title Optimize Smoothing Techniques - PART 2

def run_smoothing_experiments(symbol='AAPL', n_runs=5, look_back=30, nepochs=50, horizon=7):
    configurations = [
        {'name': 'Simple Moving Average', 'technique': 'sma', 'params': {'window': 7}},
        {'name': 'Rolling Median', 'technique': 'rolling_median', 'params': {'window': 7}},
        {'name': 'Gaussian Filter', 'technique': 'gaussian', 'params': {'sigma': 1.5}},
        {'name': 'Savitzky-Golay', 'technique': 'savgol', 'params': {'window_length': 7, 'polyorder': 2}}
    ]
    
    results = {}
    
    for config in configurations:
        config_scores = []
        
        for run in range(n_runs):
            print(f"\nRun {run + 1}/{n_runs} for {config['name']}")
            try:
                scaler, data, train, test = read_prepare_data_with_smoothing(
                    symbol=symbol, 
                    smoothing_technique=config['technique'], 
                    **config['params']
                )
                score = model(
                    data, train, test,
                    look_back=look_back,
                    nepochs=nepochs,
                    horizon=horizon,
                    plot_predictions=False,
                    batch_size=1,
                    learning_rate=0.001,
                    optimizer='adam',
                    activation='relu',
                    loss='mse' 
                )
                config_scores.append(score)  
                
            except Exception as e:
                print(f"Error in run {run + 1}: {str(e)}")
                continue
        
        if config_scores:
            results[config['name']] = {
                'scores': config_scores,
                'mean': np.mean(config_scores)
            }
            print(f"\n{config['name']} Results:")
            print(f"Mean RMSE: {results[config['name']]['mean']:.2f}\n")
        else:
            print(f"No successful runs for {config['name']}")
    
    return results

results = run_smoothing_experiments(symbol='AAPL', n_runs=5)
print("Final results for Smoothing Techniques:")
for config_name, config_results in results.items():
    print(f"{config_name}: Mean RMSE = {config_results['mean']:.2f}")
best_config = min(results.items(), key=lambda x: x[1]['mean'])
print(f"\nBest Configuration: {best_config[0]}")
print(f"Best Mean RMSE: {best_config[1]['mean']:.2f}")

Testing Configuration: No Smoothing

Run 1/1 for No Smoothing
Epoch 1/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - loss: 0.0520
Epoch 2/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0014
Epoch 3/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.4435e-04
Epoch 4/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.9760e-04
Epoch 5/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 7.1012e-04
Epoch 6/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 6.7807e-04
Epoch 7/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 5.9436e-04
Epoch 8/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 6.1037e-04
Epoch 9/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - 

In [34]:
#@title Optimize Normalization Techniques - PART 1

def zscore_normalization(train, test, data):
    scaler = StandardScaler()
    train_scaled = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data_scaled = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index)
    return scaler, data_scaled, train_scaled, test_scaled

def log_normalization(train, test, data):
    def log_transform(df):
        df_log = df.copy()
        for column in df.columns:
            min_val = df[column].min()
            if min_val <= 0:
                df_log[column] = np.log(df[column] - min_val + 1)
            else:
                df_log[column] = np.log(df[column])
        return df_log

    train_log = log_transform(train)
    test_log = log_transform(test)
    data_log = log_transform(data)

    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = pd.DataFrame(scaler.fit_transform(train_log), columns=train_log.columns, index=train_log.index)
    test_scaled = pd.DataFrame(scaler.transform(test_log), columns=test_log.columns, index=test_log.index)
    data_scaled = pd.DataFrame(scaler.transform(data_log), columns=data_log.columns, index=data_log.index)
    return scaler, data_scaled, train_scaled, test_scaled


def read_prepare_data_with_normalization(symbol, normalization_technique='minmax', **kwargs):
    #read data
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    train = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/train.csv')
    test = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/test.csv')

    #filter by symbol
    data = data[data['Symbol'] == symbol].copy()
    train = train[train['Symbol'] == symbol].copy()
    test = test[test['Symbol'] == symbol].copy()

    #select price variable
    data = data[['Date', 'Close']].copy()
    train = train[['Date', 'Close']].copy()
    test = test[['Date', 'Close']].copy()

    #set date as index
    data.set_index('Date', inplace=True)
    train.set_index('Date', inplace=True)
    test.set_index('Date', inplace=True)

    #apply normalization to the ENTIRE dataset
    if normalization_technique == 'zscore':
        data_normalized, scaler = zscore_normalization(data)
        print(f"Applied Z-Score Normalization to entire dataset")
    elif normalization_technique == 'log':
        data_normalized, scaler = log_normalization(data)
        print(f"Applied Log Normalization to entire dataset")

    train_normalized = data_normalized.loc[train.index]
    test_normalized = data_normalized.loc[test.index]

    return scaler, data_normalized, train_normalized, test_normalized

In [37]:
#@title Optimize Normalization Techniques - PART 2

def run_normalization_experiments(symbol='AAPL', n_runs=5, look_back=30, nepochs=50, horizon=7):
    configurations = [
        {'name': 'MinMax', 'technique': 'minmax', 'params': {'feature_range': (0, 1)}},
        {'name': 'Z-Score', 'technique': 'zscore', 'params': {}}
    ]

    results = {}

    for config in configurations:
        config_scores = []

        for run in range(n_runs):
            print(f"\nRun {run + 1}/{n_runs} for {config['name']}")
            try:
                scaler, data, train, test = read_prepare_data_with_normalization(
                    symbol=symbol,
                    normalization_technique=config['technique'],
                    **config['params']
                )
                score = model(
                    data, train, test,
                    look_back=look_back,
                    nepochs=nepochs,
                    horizon=7,
                    plot_predictions=False,
                    batch_size=1,
                    learning_rate=0.001,
                    optimizer='adam',
                    activation='relu',
                    loss='mse')
                config_scores.append(score)

            except Exception as e:
                print(f"Error in run {run + 1}: {str(e)}")
                continue

        if config_scores:
            results[config['name']] = {
                'scores': config_scores,
                'mean': np.mean(config_scores)
            }
            print(f"\n{config['name']} Results:")
            print(f"Mean RMSE: {results[config['name']]['mean']:.2f}\n")
        else:
            print(f"No successful runs for {config['name']}")

    return results

results = run_normalization_experiments(symbol='AAPL', n_runs=5)
print("Final results for Normalization Techniques:")
for config_name, config_results in results.items():
    print(f"{config_name}: Mean RMSE = {config_results['mean']:.2f}")
best_config = min(results.items(), key=lambda x: x[1]['mean'])
print(f"\nBest Configuration: {best_config[0]}")
print(f"Best Mean RMSE: {best_config[1]['mean']:.2f}")

Testing Configuration: MinMax

Run 1/1 for MinMax
Epoch 1/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0793
Epoch 2/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0022
Epoch 3/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0016
Epoch 4/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0014
Epoch 5/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0013
Epoch 6/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0011
Epoch 7/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0011
Epoch 8/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 9.5839e-04
Epoch 9/10
[1m1428/1428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 8.0118e-04
Epoch 10/10
[1

In [39]:
#@title Optimize Sliding Window Sizes

window_sizes = [1, 2, 3, 4, 5, 6, 7, 15, 60, 90]
n_runs = 5
results = {}

for look_back in window_sizes:
    print(f"\nTesting Sliding Window Size: {look_back}\n")
    rmse_list = []
    for run in range(1, n_runs + 1):
        print(f"Run {run}/{n_runs} for Sliding Window Size {look_back}")
        rmse = model(
            data, train, test,
            look_back=look_back,
            nepochs=50,
            horizon=7,
            plot_predictions=False,
            batch_size=1,
            learning_rate=0.001,
            optimizer='adam',
            activation='relu',
            loss='mean_squared_error'
        )
        rmse_list.append(rmse)
        mean_rmse_so_far = sum(rmse_list) / len(rmse_list)
        print(f"Sliding Window Size {look_back}: Mean = {mean_rmse_so_far:.2f} RMSE\n")
        
    mean_rmse = sum(rmse_list) / n_runs
    results[f"Sliding Window = {look_back}"] = {'mean': mean_rmse}

print("\nFinal results for Sliding Window Sizes:")
for config_name, config_results in results.items():
    print(f"{config_name}: Mean RMSE = {config_results['mean']:.2f}")
best_config = min(results.items(), key=lambda x: x[1]['mean'])
print(f"\nBest Configuration: {best_config[0]}")
print(f"Best Mean RMSE: {best_config[1]['mean']:.2f}")

Running tests for Sliding Window Size = 1
Run 1/1 for Sliding Window Size 1
Epoch 1/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.1266
Epoch 2/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0025
Epoch 3/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 7.0771e-04
Epoch 4/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 6.9293e-04
Epoch 5/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 6.8137e-04
Epoch 6/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 6.2953e-04
Epoch 7/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 5.8759e-04
Epoch 8/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 5.7719e-04
Epoch 9/10
[1m1457/1457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[