In [1]:
from sklearn.preprocessing import MinMaxScaler



In [2]:
import numpy as np
import pandas as pd

In [3]:
mm_scaler = MinMaxScaler().set_output(transform="pandas")

In [4]:
df_data = pd.read_csv("raw_data/AAPL.csv")

In [5]:
df_macroeconomic = pd.read_csv("raw_data/macro_economic_indicators.csv")

In [6]:
df_technical = pd.read_csv("raw_data/AAPL_technical_analysis.csv")

# Supporting functions

In [7]:
def rename_macroeconomic(df_macroeconomic):
    columns_macroeconomic = ["datetime", "interest_rate", "GDP", "inflation"]
    df_macroeconomic.columns = columns_macroeconomic
    return df_macroeconomic

In [8]:
def merge_macroeconomic(df_technical, df_macroeconomic):
    df_merged = df_technical.merge(df_macroeconomic, on="datetime", how="inner")
    return df_merged

In [9]:
def rename_columns(df):
    columns = ["datetime", "open", "high", "low", "close", "volume"]
    df.columns = columns
    return df

In [20]:
def rename_technical(df_technical):
    df_technical = df_technical.rename(columns = {"Unnamed: 0": "datetime"})
    return df_technical

In [21]:
def convert_datetime(df):
    data = df.copy()
    data["datetime"] = pd.to_datetime(data["datetime"])
    return data

In [22]:
def merge_columns(df_values, df_technical):
    df_merged = df_values.merge(df_technical, how="inner", on="datetime")
    return df_merged

In [23]:
def clean_data(df):
    df_inter = df.interpolate(method='linear')
    df_clean = df_inter.dropna()
    return df_clean

In [24]:
def create_target(df, column_name, new_column_name='Target'):   
    result = []
    for i in range(len(df) - 1):
        if df[column_name].iloc[i] > df[column_name].iloc[i-1]:
            result.append(1)
        else:
            result.append(0)
    result.append(float('nan'))
    df[new_column_name] = pd.Series(result, index=df.index)
    return df

In [25]:
def target_drop(df_final):
    df_final = df_final.dropna()
    return df_final

In [26]:
def convert_time_sin_cos(df):
    data = df.copy()
    data['day_of_year'] = data['datetime'].dt.dayofyear
    data['time_of_day'] = data['datetime'].dt.hour * 3600 + data['datetime'].dt.minute * 60 + data['datetime'].dt.second
    data['day_of_year_norm'] = data['day_of_year'] / 365.0
    data['time_of_day_norm'] = data['time_of_day'] / 86400.0
    data['cos_time_of_day'] = np.cos(2 * np.pi * data['time_of_day_norm'])
    data['sin_time_of_day'] = np.sin(2 * np.pi * data['time_of_day_norm'])
    data["cos_day_of_year"] = np.cos(2 * np.pi * data['day_of_year_norm'])
    data["sin_day_of_year"] = np.sin(2 * np.pi * data['day_of_year_norm'])
    df_converted = data.drop(columns = ["day_of_year", "time_of_day", "day_of_year_norm", "time_of_day_norm", "datetime"])
    return df_converted

In [27]:
def log_divide_next(df):
    log_returns = np.log(df_data['close'] / df_data['close'].shift(1))
    return log_returns

# Final function that prepares all the data

In [28]:
def create_x_y(df_data, df_technical, df_macroeconomic):
    df_macroeconomic = rename_macroeconomic(df_macroeconomic)
    df_technical = rename_technical(df_technical)
    df_data = rename_columns(df_data)
    df_technical = clean_data(df_technical)
    df_data = convert_datetime(df_data)
    df_technical = convert_datetime(df_technical)
    df_macroeconomic = convert_datetime(df_macroeconomic)
    df_technical = merge_macroeconomic(df_technical, df_macroeconomic)
    df_merged = merge_columns(df_data, df_technical)
    df_target = create_target(df_merged, "close")
    df_final = target_drop(df_target)
    df_time = convert_time_sin_cos(df_target)
    X = df_time.drop(columns = "Target")
    y = df_time[["Target"]]
    log_df = log_divide_next(df_data["close"]).dropna()
    return X, y, log_df

In [109]:
X, y, log_df = create_x_y(df_data, df_technical, df_macroeconomic)

In [48]:
X.shape

(139310, 27)

In [134]:
def create_lstm_windows(X, y, window_size):
    
    X_windows = []
    y_windows = []
    
#     mm_scaler = MinMaxScaler().set_output(transform="pandas")
#     X_scaled = mm_scaler.fit_transform(X)
    
    for i in range(len(X) - window_size):
        X_windows.append(X.iloc[i:i + window_size].values)
        y_windows.append(y.iloc[i + window_size])
    
    return np.array(X_windows), np.array(y_windows)


In [135]:
X_windows, y_windows = create_lstm_windows(X, y, 10)

In [128]:
y_windows.shape

(139300, 1)

In [112]:
X_windows.shape

(139300, 10, 27)

In [124]:
y_2 = y_windows.flatten()

In [55]:
from tensorflow.keras import models
from tensorflow.keras import Sequential, layers
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras.layers import Dense, Reshape
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Normalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

2024-08-27 15:38:52.004795: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-27 15:38:52.026463: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-27 15:38:52.150188: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-27 15:38:52.349471: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-27 15:38:52.480881: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been 

In [139]:
model = Sequential()
model.add(layers.LSTM(units = 128, return_sequences = True))
model.add(layers.Dropout(0.2))
model.add(layers.LSTM(units = 64, return_sequences = True))
model.add(layers.Dropout(0.2))
model.add(layers.LSTM(units = 64, return_sequences = True))
model.add(layers.Dropout(0.2))
model.add(layers.LSTM(units = 32, return_sequences = True))
model.add(layers.Dropout(0.2))
model.add(layers.LSTM(units = 8))
model.add(layers.Dense(1, activation="sigmoid"))

In [140]:
model.compile(loss="binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [141]:
model.fit(X_windows, y_windows, epochs = 5, batch_size = 64)

Epoch 1/5


InaccessibleTensorError: <tf.Tensor 'sequential_21/normalization_1/Reshape:0' shape=(1, 1, 27) dtype=float32> is out of scope and cannot be used here. Use return values, explicit Python locals or TensorFlow collections to access it.
Please see https://www.tensorflow.org/guide/function#all_outputs_of_a_tffunction_must_be_return_values for more information.

<tf.Tensor 'sequential_21/normalization_1/Reshape:0' shape=(1, 1, 27) dtype=float32> was defined here:
    File "/home/yurko/.pyenv/versions/3.10.6/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    File "/home/yurko/.pyenv/versions/3.10.6/lib/python3.10/runpy.py", line 86, in _run_code
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/traitlets/config/application.py", line 982, in launch_instance
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    File "/home/yurko/.pyenv/versions/3.10.6/lib/python3.10/asyncio/base_events.py", line 600, in run_forever
    File "/home/yurko/.pyenv/versions/3.10.6/lib/python3.10/asyncio/base_events.py", line 1896, in _run_once
    File "/home/yurko/.pyenv/versions/3.10.6/lib/python3.10/asyncio/events.py", line 80, in _run
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2885, in run_cell
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2940, in _run_cell
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3139, in run_cell_async
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3318, in run_ast_nodes
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code
    File "/tmp/ipykernel_176839/3393239127.py", line 1, in <module>
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 51, in train_step
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 826, in __call__
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 1367, in _maybe_build
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 226, in build_wrapper
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/models/sequential.py", line 186, in build
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/preprocessing/tf_data_layer.py", line 49, in __call__
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 826, in __call__
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 1367, in _maybe_build
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/layer.py", line 226, in build_wrapper
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/preprocessing/normalization.py", line 187, in build
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/layers/preprocessing/normalization.py", line 291, in finalize_state
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/ops/numpy.py", line 4440, in reshape
    File "/home/yurko/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/src/backend/tensorflow/numpy.py", line 1789, in reshape

The tensor <tf.Tensor 'sequential_21/normalization_1/Reshape:0' shape=(1, 1, 27) dtype=float32> cannot be accessed from FuncGraph(name=one_step_on_iterator, id=139964062410432), because it was defined in FuncGraph(name=one_step_on_data, id=139964062412608), which is out of scope.