In [1]:
def rename_macroeconomic(df_macroeconomic):
    columns_macroeconomic = ["datetime", "interest_rate", "GDP", "inflation"]
    df_macroeconomic.columns = columns_macroeconomic
    return df_macroeconomic

def merge_macroeconomic(df_technical, df_macroeconomic):
    df_merged = df_technical.merge(df_macroeconomic, on="datetime", how="inner")
    return df_merged

def rename_columns(df):
    columns = ["datetime", "open", "high", "low", "close", "volume"]
    df.columns = columns
    return df

def rename_technical(df_technical):
    df_technical = df_technical.rename(columns = {"Unnamed: 0": "datetime"})
    return df_technical

def convert_datetime(df):
    data = df.copy()
    data["datetime"] = pd.to_datetime(data["datetime"])
    return data

def merge_columns(df_values, df_technical):
    df_merged = df_values.merge(df_technical, how="inner", on="datetime")
    return df_merged

def clean_data(df):
    df_inter = df.interpolate(method='linear')
    df_clean = df_inter.dropna()
    return df_clean

def create_target(df, column_name, new_column_name='Target'):
    result = []
    for i in range(1, len(df)):
        change_percent = (df[column_name].iloc[i] / df[column_name].iloc[i-1] - 1) * 100
        if change_percent > 0.07:
            result.append(1)
        elif change_percent < -0.07:
            result.append(-1)
        else:
            result.append(0)
    
    result.append(float('nan'))

    df[new_column_name] = pd.Series(result, index=df.index)

    return df

def target_drop(df_final):
    df_final = df_final.dropna()
    return df_final

def convert_time_sin_cos(df):
    data = df.copy()
    data['day_of_year'] = data['datetime'].dt.dayofyear
    data['time_of_day'] = data['datetime'].dt.hour * 3600 + data['datetime'].dt.minute * 60 + data['datetime'].dt.second
    data['day_of_year_norm'] = data['day_of_year'] / 365.0
    data['time_of_day_norm'] = data['time_of_day'] / 86400.0
    data['cos_time_of_day'] = np.cos(2 * np.pi * data['time_of_day_norm'])
    data['sin_time_of_day'] = np.sin(2 * np.pi * data['time_of_day_norm'])
    data["cos_day_of_year"] = np.cos(2 * np.pi * data['day_of_year_norm'])
    data["sin_day_of_year"] = np.sin(2 * np.pi * data['day_of_year_norm'])
    df_converted = data.drop(columns = ["day_of_year", "time_of_day", "day_of_year_norm", "time_of_day_norm", "datetime"])
    return df_converted

def log_divide_next(df):
    log_returns = np.log(df['close'] / df['close'].shift(1))
    return log_returns

def split_into_categories(df, column_name, prefix='Category'):

    df[f'{prefix}_1'] = (df[column_name] == 1).astype(int)
    df[f'{prefix}_0'] = (df[column_name] == 0).astype(int)
    df[f'{prefix}_-1'] = (df[column_name] == -1).astype(int)
    
    return df

In [2]:
def create_x_y(df_data, df_technical, df_macroeconomic):
    df_macroeconomic = rename_macroeconomic(df_macroeconomic)
    df_technical = rename_technical(df_technical)
    df_data = rename_columns(df_data)
    df_technical = clean_data(df_technical)
    df_data = convert_datetime(df_data)
    df_technical = convert_datetime(df_technical)
    df_macroeconomic = convert_datetime(df_macroeconomic)
    df_technical = merge_macroeconomic(df_technical, df_macroeconomic)
    df_merged = merge_columns(df_data, df_technical)
    df_target = create_target(df_merged, "close")
    df_final = target_drop(df_target)
    df_time = convert_time_sin_cos(df_final)
    X = df_time.drop(columns = "Target")
    y_initial = df_time[["Target"]]
    y = split_into_categories(y_initial, "Target")
    y = y.drop(columns = "Target")
    log_df = log_divide_next(df_data).dropna()
    return X, y, log_df

In [3]:
from logic.getting import *

In [4]:
from logic.scaler import *

In [5]:
from logic.windows_preproc import *

In [6]:
from tensorflow.keras.layers import Normalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Flatten, LSTM, GRU, Embedding
import tensorflow.keras.layers as layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import hinge, Recall, Precision, PrecisionAtRecall
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.utils as utils
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [15]:
price = get_price_data_raw('JNJ')

In [16]:
price = price.sort_values(by='timestamp_field_0')

In [17]:
tech = get_technical_analysis('JNJ')

In [18]:
tech = tech.sort_values(by='timestamp_field_0')

In [19]:
macro = get_macro_data()

In [20]:
macro = macro.sort_values(by='timestamp')

In [24]:
macro.drop(columns = 'int64_field_0', inplace=True)

In [31]:
tech.columns = ['datetime', 'EMA', 'SlowK', 'SlowD', 'Real Upper Band',
       'Real Middle Band', 'Real Lower Band', 'WILLR', 'ATR', 'CMO', 'MACD',
       'MACD_Signal', 'MACD_Hist', 'RSI', 'OBV', 'SAR']

In [32]:
X, y, log_df = create_x_y(price, tech, macro)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{prefix}_1'] = (df[column_name] == 1).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{prefix}_0'] = (df[column_name] == 0).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{prefix}_-1'] = (df[column_name] == -1).astype(int)


In [33]:
X.shape

(95357, 27)

In [34]:
X = X.drop(columns = ["interest_rate", "GDP", "inflation", "cos_time_of_day", "sin_time_of_day", "cos_day_of_year", "sin_day_of_year"])

In [35]:
X_train = X[:70000]

In [36]:
X_val = X[70000:80000]

In [37]:
X_test = X[80000:]

In [38]:
y_train = y[:70000]

In [39]:
y_val = y[70000:80000]

In [40]:
y_test = y[80000:]

In [41]:
scaler = initialize_scaler(X_train)

In [42]:
X_train_sc = transform_scaler(scaler, X_train)

In [43]:
X_val_sc = transform_scaler(scaler, X_val)

In [44]:
X_test_sc = transform_scaler(scaler, X_test)

In [45]:
X_tr_w, y_tr_w = create_lstm_windows(X_train_sc, y_train, 20)

In [46]:
X_val_w, y_val_w = create_lstm_windows(X_val_sc, y_val, 20)

In [48]:
X_te_w, y_te_w = create_lstm_windows(X_test_sc, y_test, 20)

In [127]:
price

Unnamed: 0,datetime,open,high,low,close,volume
33256,2005-01-03 09:30:00+00:00,36.738,36.981,36.460,36.730,909200
33255,2005-01-03 10:00:00+00:00,36.738,36.865,36.373,36.521,941700
7779,2005-01-03 10:30:00+00:00,36.535,36.661,36.327,36.515,435700
40536,2005-01-03 11:00:00+00:00,36.523,36.667,36.292,36.469,551800
86311,2005-01-03 11:30:00+00:00,36.483,36.615,36.298,36.446,443200
...,...,...,...,...,...,...
91592,2023-08-31 17:30:00+00:00,159.466,159.492,159.002,159.167,10146
77034,2023-08-31 18:00:00+00:00,159.150,159.501,158.992,159.315,764
92327,2023-08-31 18:30:00+00:00,159.180,159.472,159.140,159.197,2252469
92326,2023-08-31 19:00:00+00:00,159.180,159.442,159.140,159.305,2252570


In [131]:
for elem in price:
    print(elem)
    

datetime
open
high
low
close
volume


In [132]:
df = price.copy()

In [133]:
df['next_open'] = df['open'].shift(-1)

In [134]:
df

Unnamed: 0,datetime,open,high,low,close,volume,next_open
33256,2005-01-03 09:30:00+00:00,36.738,36.981,36.460,36.730,909200,36.738
33255,2005-01-03 10:00:00+00:00,36.738,36.865,36.373,36.521,941700,36.535
7779,2005-01-03 10:30:00+00:00,36.535,36.661,36.327,36.515,435700,36.523
40536,2005-01-03 11:00:00+00:00,36.523,36.667,36.292,36.469,551800,36.483
86311,2005-01-03 11:30:00+00:00,36.483,36.615,36.298,36.446,443200,36.448
...,...,...,...,...,...,...,...
91592,2023-08-31 17:30:00+00:00,159.466,159.492,159.002,159.167,10146,159.150
77034,2023-08-31 18:00:00+00:00,159.150,159.501,158.992,159.315,764,159.180
92327,2023-08-31 18:30:00+00:00,159.180,159.472,159.140,159.197,2252469,159.180
92326,2023-08-31 19:00:00+00:00,159.180,159.442,159.140,159.305,2252570,159.377


In [135]:
def classify_movement(current, next_val):
    if next_val > current:
        return 'up'
    elif next_val < current:
        return 'down'
    else:
        return 'neutral'


In [136]:
df['up_down_neutral'] = df.apply(lambda row: classify_movement(row['open'], row['next_open']), axis=1)

In [137]:
df = df.dropna(subset=['next_open'])


In [138]:
df = df.drop(columns=['next_open'])


In [141]:
df_small = df[['datetime', 'open', 'up_down_neutral']]

In [142]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

In [143]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['up_down_neutral'])


In [144]:
X = df[['open', 'high', 'low', 'close', 'volume']].values
y = df['label'].values

In [145]:
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

In [146]:
X = X.reshape((X.shape[0], 1, X.shape[1]))


In [147]:
X_windows = []
y_windows = []

In [150]:
window_size =30

In [151]:
for i in range(len(X) - window_size):
    X_windows.append(X[i:i + window_size])
    y_windows.append(y[i + window_size])

X_windows = np.array(X_windows)
y_windows = np.array(y_windows)

In [152]:
X_windows = np.array(X_windows)
y_windows = np.array(y_windows)

In [158]:
X_windows = X_windows.reshape((95387,30,5))

In [163]:
y_windows = y_windows.reshape((95387,1))

In [164]:
X_train, X_test, y_train, y_test = train_test_split(X_windows, y_windows, test_size=0.2, shuffle=False)


In [165]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(window_size, X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25, activation='relu'))
model.add(Dense(units=3, activation='softmax'))  # 3 classes: up, down, neutral


  super().__init__(**kwargs)


In [166]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [167]:
model.summary()

In [168]:

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 9ms/step - accuracy: 0.4653 - loss: 0.8913 - val_accuracy: 0.4762 - val_loss: 0.8605
Epoch 2/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.4693 - loss: 0.8806 - val_accuracy: 0.4727 - val_loss: 0.8603
Epoch 3/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.4687 - loss: 0.8827 - val_accuracy: 0.4727 - val_loss: 0.8596
Epoch 4/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.4720 - loss: 0.8815 - val_accuracy: 0.4762 - val_loss: 0.8611
Epoch 5/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.4681 - loss: 0.8806 - val_accuracy: 0.4727 - val_loss: 0.8595
Epoch 6/20
[1m2385/2385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.4700 - loss: 0.8806 - val_accuracy: 0.4762 - val_loss: 0.8597
Epoch 7/20

<keras.src.callbacks.history.History at 0x16b4ea9b0>