In [24]:
import pandas as pd
import yfinance as yf
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Valid Time Periods: 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max

data_ticker = yf.download("XOM", period="5y")
data_SP500 = yf.download("SPY", period="5y")
data_TM = yf.download("VTI", period="5y")

data_ticker["Percent Change"] = data_ticker["Close"].pct_change() * 100
data_ticker = data_ticker.dropna(subset=["Percent Change"])

data_SP500['Percent Change'] = data_SP500['Close'].pct_change() * 100
data_SP500 = data_SP500.dropna(subset=['Percent Change'])

data_TM['Percent Change'] = data_TM['Close'].pct_change() * 100
data_TM = data_TM.dropna(subset=['Percent Change'])

n_days = 5
candlestick_data_10_days = pd.DataFrame()

for i in range(1, n_days + 1):
    candlestick_data_10_days[f"Day_{i} Ticker Open"] = data_ticker["Open"].shift(i)
    candlestick_data_10_days[f"Day_{i} Ticker High"] = data_ticker["High"].shift(i)
    candlestick_data_10_days[f"Day_{i} Ticker Low"] = data_ticker["Low"].shift(i)
    candlestick_data_10_days[f"Day_{i} Ticker Adj Close"] = data_ticker["Adj Close"].shift(i)
    candlestick_data_10_days[f"Day_{i} Ticker Volume"] = data_ticker["Volume"].shift(i)
    candlestick_data_10_days[f"Day_{i} Ticker Percent Change"] = data_ticker["Percent Change"].shift(i)

    candlestick_data_10_days[f"Day_{i} SP500 Open"] = data_SP500["Open"].shift(i)
    candlestick_data_10_days[f"Day_{i} SP500 High"] = data_SP500["High"].shift(i)
    candlestick_data_10_days[f"Day_{i} SP500 Low"] = data_SP500["Low"].shift(i)
    candlestick_data_10_days[f"Day_{i} SP500 Adj Close"] = data_SP500["Adj Close"].shift(i)
    candlestick_data_10_days[f"Day_{i} SP500 Volume"] = data_SP500["Volume"].shift(i)
    candlestick_data_10_days[f"Day_{i} SP500 Percent Change"] = data_SP500["Percent Change"].shift(i)

    candlestick_data_10_days[f"Day_{i} TM Open"] = data_TM["Open"].shift(i)
    candlestick_data_10_days[f"Day_{i} TM High"] = data_TM["High"].shift(i)
    candlestick_data_10_days[f"Day_{i} TM Low"] = data_TM["Low"].shift(i)
    candlestick_data_10_days[f"Day_{i} TM Adj Close"] = data_TM["Adj Close"].shift(i)
    candlestick_data_10_days[f"Day_{i} TM Volume"] = data_TM["Volume"].shift(i)
    candlestick_data_10_days[f"Day_{i} TM Percent Change"] = data_TM["Percent Change"].shift(i)

def categorize_movement(change):
    if change <= -5:
        return "Large Downward"
    elif change <= -2.5:
        return "Mid-Sized Downward"
    elif change >= 5:
        return "Large Upward"
    elif change >= 2.5:
        return "Mid-Sized Upward"
    else:
        return "No Significance"

candlestick_data_10_days["Movement"] = data_ticker["Percent Change"].apply(categorize_movement)
candlestick_data_10_days = candlestick_data_10_days.dropna().reset_index(drop=True)

frequency = candlestick_data_10_days["Movement"].value_counts()
print(frequency)

candlestick_data_10_days_encoded = pd.get_dummies(candlestick_data_10_days, columns=["Movement"])
candlestick_data_10_days_encoded = candlestick_data_10_days_encoded.astype(int)

print(candlestick_data_10_days_encoded.columns)
classification_columns = ['Movement_Large Downward', 
                          'Movement_Large Upward', 
                          'Movement_Mid-Sized Downward',
                          'Movement_Mid-Sized Upward', 
                          'Movement_No Significance']

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Movement
No Significance       1013
Mid-Sized Upward       111
Mid-Sized Downward      94
Large Upward            20
Large Downward          14
Name: count, dtype: int64
Index(['Day_1 Ticker Open', 'Day_1 Ticker High', 'Day_1 Ticker Low',
       'Day_1 Ticker Adj Close', 'Day_1 Ticker Volume',
       'Day_1 Ticker Percent Change', 'Day_1 SP500 Open', 'Day_1 SP500 High',
       'Day_1 SP500 Low', 'Day_1 SP500 Adj Close', 'Day_1 SP500 Volume',
       'Day_1 SP500 Percent Change', 'Day_1 TM Open', 'Day_1 TM High',
       'Day_1 TM Low', 'Day_1 TM Adj Close', 'Day_1 TM Volume',
       'Day_1 TM Percent Change', 'Day_2 Ticker Open', 'Day_2 Ticker High',
       'Day_2 Ticker Low', 'Day_2 Ticker Adj Close', 'Day_2 Ticker Volume',
       'Day_2 Ticker Percent Change', 'Day_2 SP500 Open', 'Day_2 SP500 High',
       'Day_2 SP500 Low', 'Day_2 SP500 Adj Close', 'Day_2 SP500 Volume',
       'Day_2 SP500 Percent Change', 'Day_2 TM Open', 'Day_2 TM High',
       'Day_2 TM Low', 'Day_2 TM Adj Close', 




In [25]:
test_size = 200
train_df = candlestick_data_10_days_encoded.iloc[:-test_size]
test_df = candlestick_data_10_days_encoded.iloc[-test_size:]

x_train = train_df.drop(columns=classification_columns)
y_train = train_df[classification_columns]
x_test = test_df.drop(columns=classification_columns)
y_test = test_df[classification_columns]

feature_number = x_train.shape[1]
print(feature_number)

90


In [26]:
import keras
from scikeras.wrappers import KerasClassifier

def my_model(num_neurons, num_features):
    # Create a Sequential Neutral Network
    model = keras.models.Sequential()
    model.add(keras.Input(shape=(num_features,)))
    model.add(keras.layers.Dense(num_neurons, activation="relu"))
    model.add(keras.layers.Dense(30, activation="relu"))
    model.add(keras.layers.Dense(50, activation="relu"))
    model.add(keras.layers.Dense(5, activation="sigmoid"))

    optimizer = keras.optimizers.Adam()
    model.compile(optimizer, loss="categorical_crossentropy", metrics=[""])
    return model

model = my_model(60, num_features=feature_number)
model.fit(x_train, y_train, epochs = 100, batch_size=50, verbose = 1)
scores = model.evaluate(x_test, y_test)

print(scores)

Epoch 1/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 526us/step - accuracy: 0.6260 - loss: 2084418.2500
Epoch 2/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501us/step - accuracy: 0.6626 - loss: 840404.3125
Epoch 3/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501us/step - accuracy: 0.6892 - loss: 729874.8125
Epoch 4/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 495us/step - accuracy: 0.5021 - loss: 530913.0625
Epoch 5/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 467us/step - accuracy: 0.6141 - loss: 342590.2188
Epoch 6/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 454us/step - accuracy: 0.6105 - loss: 263765.2188
Epoch 7/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464us/step - accuracy: 0.6425 - loss: 261222.1719
Epoch 8/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/step - accuracy: 0.6525 - loss: 257

In [27]:
from sklearn.metrics import confusion_matrix

y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
# y_true_indices = [classification_columns.index(label) for label in y_true]
cm = confusion_matrix(y_true, y_pred_classes)

TP = {}
FP = {}
FN = {}
TN = {}

# Calculate the metrics for each class
for i in range(len(cm)):
    TP[i] = cm[i, i]
    FP[i] = sum(cm[:, i]) - cm[i, i]
    FN[i] = sum(cm[i, :]) - cm[i, i]
    TN[i] = cm.sum() - (FP[i] + FN[i] + TP[i])

# Output the results
for i in range(len(cm)):
    print(f"Class {i}:")
    print(f"TP: {TP[i]}, FP: {FP[i]}, TN: {TN[i]}, FN: {FN[i]}\n")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Class 0:
TP: 0, FP: 190, TN: 10, FN: 0

Class 1:
TP: 0, FP: 1, TN: 199, FN: 0

Class 2:
TP: 0, FP: 5, TN: 193, FN: 2

Class 3:
TP: 0, FP: 4, TN: 191, FN: 5

Class 4:
TP: 0, FP: 0, TN: 7, FN: 193

