In [11]:
import keras
import numpy as np
import requests
from datetime import datetime
from typing import List, Dict, Any

from keras.src.backend.config import keras_home
from prompt_toolkit.input import Input
from stock_indicators.indicators.common.quote import Quote
import pandas as pd

In [12]:
class StockResponse:
    stock_data: List[Quote]

    def __init__(self, stock_data: List[Quote]):
        self.stock_data = stock_data

    @classmethod
    def from_json(cls, data: List[Dict[str, Any]]) -> 'StockResponse':
        stock_data = [
            Quote(datetime.strptime(item['date'], '%Y-%m-%d'), item['open'], item['high'], item['low'], item['close'],
                  item['volume'])
            for item in data
        ]
        return cls(stock_data=stock_data)

    def to_dataframe(self) -> pd.DataFrame:
        data = [[
            stock.date,
            stock.open,
            stock.high,
            stock.low,
            stock.close,
            stock.volume
        ] for stock in self.stock_data]
        return pd.DataFrame(data, columns=['date', 'open', 'high', 'low', 'close', 'volume'])

    def to_numpy(self) -> np.ndarray:
        data = [[
            stock.date,
            stock.open,
            stock.high,
            stock.low,
            stock.close,
            stock.volume
        ] for stock in self.stock_data]
        return np.array(data)

In [13]:
stock_url = "https://exodus.stockbit.com/chartbit/BBCA/price/daily?from=2024-12-31&to=2020-1-1&limit=0"
bearer_token = "eyJhbGciOiJSUzI1NiIsImtpZCI6IjU3MDc0NjI3LTg4MWItNDQzZC04OTcyLTdmMmMzOTNlMzYyOSIsInR5cCI6IkpXVCJ9.eyJkYXRhIjp7InVzZSI6InJpVkZlcmQiLCJlbWEiOiJ2aXJneWZlcmRpYW4yMTNAZ21haWwuY29tIiwiZnVsIjoiVmlyZ3kgRmVyZGlhbiBTdXJ5YSBGaXJtYW5zeWFoIiwic2VzIjoiSVduQ0xLRDEyOEhQY0VITiIsImR2YyI6IiIsInVpZCI6MjA1MjI0NiwiY291IjoiU0cifSwiZXhwIjoxNzQ0MjM1NTQ5LCJpYXQiOjE3NDQxNDkxNDksImlzcyI6IlNUT0NLQklUIiwianRpIjoiMjYyNTFjYTYtOTdiNi00MzEwLTgwYWEtZGU0OGY1NmE2ZTVjIiwibmJmIjoxNzQ0MTQ5MTQ5LCJ2ZXIiOiJ2MSJ9.kQkHDaOj1VsMdWcGad1hL5jZZ2n3JVVsyOC1UbEKG6ULBF2LhlksiiqKmbKFAA1YqikYIXW2JiJJhll6kC-5vReiAyDXMRCHo5IH9CBrTu_rx7KnslcB3VbKjwtC3HJZy9DgA-qxDMXhuQaVtf4QNbHVXIo3AIYYL94qC3SHrYQS0DeT9Ao3uEV0PSxRTdAteP6oRxuCwddOv-7bAtYpJwN59sccsWiCp9djDoR6_6424E65x8-x4tT3Z7bAh-ko8edXa_olyOrKabhugp6q05P-BfuXob3R3u_RPOt6xiH8Y-6kCvwlu7DQlZr6ml2pF0yNby5LVVzuFsSUHTY1oQ"
headers = {
    "Authorization": f"Bearer {bearer_token}",
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

r = requests.get(stock_url, headers=headers)
r

<Response [200]>

# Checkpoint to restart from

In [38]:
json_data = r.json()['data']['chartbit']
stock_response = StockResponse.from_json(json_data)
df   = stock_response.to_dataframe()
df

Unnamed: 0,date,open,high,low,close,volume
0,2024-12-30,9800,9825,9675,9675,56350100
1,2024-12-27,9800,9825,9725,9800,24016700
2,2024-12-24,9850,9900,9750,9750,32415700
3,2024-12-23,9700,9850,9700,9775,43292100
4,2024-12-20,9650,9750,9625,9650,91576900
...,...,...,...,...,...,...
1206,2020-01-08,6670,6745,6670,6680,53691500
1207,2020-01-07,6735,6770,6730,6735,45022500
1208,2020-01-06,6720,6750,6690,6735,27300000
1209,2020-01-03,6750,6800,6725,6800,47755500


In [39]:
# init variable for dataset to be used
# remove all column except for date and move the close price to y
df = stock_response.to_dataframe()
df = df.drop(columns=['open', 'high', 'low', 'volume'])

# Add column for the next day close price
df['next_day_price_move'] = df['close'].shift(-1)

# Drop the last row as it will have NaN in 'next_day_price_move'
df = df.drop(df.index[-1])

# Create the label with 3 classes
df['next_day_price_move'] = np.where(df['close'] < df['next_day_price_move'], 2, 
                                     np.where(df['close'] == df['next_day_price_move'], 1, 0))

# Ensure there are no invalid labels
assert df['next_day_price_move'].isin([0, 1, 2]).all(), "Invalid label found"

df

Unnamed: 0,date,close,next_day_price_move
0,2024-12-30,9675,2
1,2024-12-27,9800,0
2,2024-12-24,9750,2
3,2024-12-23,9775,0
4,2024-12-20,9650,2
...,...,...,...
1205,2020-01-09,6740,0
1206,2020-01-08,6680,2
1207,2020-01-07,6735,1
1208,2020-01-06,6735,2


# Define the Technical Indicators

In [40]:
from stock_indicators import indicators
stock_data = stock_response.stock_data # Holds the data in Quote object to be used for indicators
sma = indicators.get_sma(stock_data, 14)
rsi = indicators.get_rsi(stock_data, 14)
macd = indicators.get_macd(stock_data)
bollinger = indicators.get_bollinger_bands(stock_data, 20)
atr = indicators.get_atr(stock_data, 14)
wma = indicators.get_wma(stock_data, 14)
tr = indicators.get_tr(stock_data)
stoch_oscillator = indicators.get_stoch(stock_data)
william = indicators.get_williams_r(stock_data)
ema = indicators.get_ema(stock_data, 14)
obv = indicators.get_obv(stock_data)
ichimoku = indicators.get_ichimoku(stock_data)
vwap = indicators.get_vwap(stock_data)
smi = indicators.get_smi(stock_data)
dema = indicators.get_dema(stock_data, 14)
mfi = indicators.get_mfi(stock_data)
cci = indicators.get_cci(stock_data)
cmo = indicators.get_cmo(stock_data, 14)

In [41]:
# add above indicators to the dataset
for i in range(0, len(df)):
    df.loc[i, 'SMA'] = sma[i].sma
    df.loc[i, 'RSI'] = rsi[i].rsi
    df.loc[i, 'MACD'] = macd[i].macd
    df.loc[i, 'bollinger_upper'] = bollinger[i].upper_band
    df.loc[i, 'bollinger_lower'] = bollinger[i].lower_band
    df.loc[i, 'ATR'] = atr[i].atr
    df.loc[i, 'WMA'] = wma[i].wma
    df.loc[i, 'TR'] = tr[i].tr
    df.loc[i, '%K'] = stoch_oscillator[i].k
    df.loc[i, '%D'] = stoch_oscillator[i].d
    df.loc[i, '%R'] = william[i].williams_r
    df.loc[i, 'EMA'] = ema[i].ema
    df.loc[i, 'OBV'] = obv[i].obv
    df.loc[i, 'Ichimoku'] = ichimoku[i].kijun_sen
    df.loc[i, 'VWAP'] = vwap[i].vwap
    df.loc[i, 'SMI'] = smi[i].smi
    df.loc[i, 'DEMA'] = dema[i].dema
    df.loc[i, 'MFI'] = mfi[i].mfi
    df.loc[i, 'CCI'] = cci[i].cci
    df.loc[i, 'CMO'] = cmo[i].cmo

df

Unnamed: 0,date,close,next_day_price_move,SMA,RSI,MACD,bollinger_upper,bollinger_lower,ATR,WMA,...,%R,EMA,OBV,Ichimoku,VWAP,SMI,DEMA,MFI,CCI,CMO
0,2024-12-30,9675,2,,,,,,,,...,,,0.0,,6716.666667,,,,,
1,2024-12-27,9800,0,,,,,,,,...,,,47755500.0,,6745.326370,,,,,
2,2024-12-24,9750,2,,,,,,,,...,,,20455500.0,,6740.869280,,,,,
3,2024-12-23,9775,0,,,,,,,,...,,,20455500.0,,6741.966331,,,,,
4,2024-12-20,9650,2,,,,,,,,...,,,-33236000.0,,6731.470951,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1205,2020-01-09,6740,0,10092.857143,37.424092,-84.331735,10469.016682,9648.483318,210.981941,10046.666667,...,-85.714286,10028.208032,-174579572.0,9987.5,7733.318980,-7.961127,9932.660208,44.421088,-192.664248,-15.294118
1206,2020-01-08,6680,2,10085.714286,36.879751,-106.950887,10487.633527,9609.866473,204.840374,9987.619048,...,-88.571429,9977.780294,-266156472.0,9987.5,7734.997905,-14.491946,9851.268141,52.276306,-166.85022,-5.263158
1207,2020-01-07,6735,1,10055.357143,41.464286,-113.48212,10491.822112,9598.177888,204.494633,9946.190476,...,-74.285714,9950.742922,-222864372.0,9987.5,7735.831450,-18.633865,9817.666666,48.336484,-115.452306,-26.984127
1208,2020-01-06,6735,2,10023.214286,40.825619,-119.300242,10484.042027,9563.457973,200.602159,9905.47619,...,-77.142857,9923.977199,-255280072.0,9987.5,7736.462782,-22.130776,9785.447484,44.704262,-91.914475,-28.125


## Handle dtypes

In [42]:
# Fill in the missing values or drop rows with NaN values
df = df.dropna()

# Convert columns to their appropriate data types
df = df.astype({
    'close': 'float64',
    'SMA': 'float64',
    'RSI': 'float64',
    'MACD': 'float64',
    'bollinger_upper': 'float64',
    'bollinger_lower': 'float64',
    'ATR': 'float64',
    'WMA': 'float64',
    'TR': 'float64',
    '%K': 'float64',
    '%D': 'float64',
    '%R': 'float64',
    'EMA': 'float64',
    'OBV': 'float64',
    'Ichimoku': 'float64',
    'VWAP': 'float64',
    'SMI': 'float64',
    'DEMA': 'float64',
    'MFI': 'float64',
    'CCI': 'float64',
    'CMO': 'float64',
    'next_day_price_move': 'int64'
})

# Verify the data types
print(df.dtypes)
df

date                   datetime64[ns]
close                         float64
next_day_price_move             int64
SMA                           float64
RSI                           float64
MACD                          float64
bollinger_upper               float64
bollinger_lower               float64
ATR                           float64
WMA                           float64
TR                            float64
%K                            float64
%D                            float64
%R                            float64
EMA                           float64
OBV                           float64
Ichimoku                      float64
VWAP                          float64
SMI                           float64
DEMA                          float64
MFI                           float64
CCI                           float64
CMO                           float64
dtype: object


Unnamed: 0,date,close,next_day_price_move,SMA,RSI,MACD,bollinger_upper,bollinger_lower,ATR,WMA,...,%R,EMA,OBV,Ichimoku,VWAP,SMI,DEMA,MFI,CCI,CMO
25,2024-11-20,10075.0,0,6735.714286,52.768361,-54.808806,6997.634173,6522.365827,127.769813,6691.666667,...,-38.842975,6704.490053,-106788488.0,6715.0,6746.214270,-29.020779,6659.708577,47.823049,-33.594260,-16.564417
26,2024-11-19,9925.0,2,6730.357143,53.940043,-45.824414,6998.842282,6524.657718,124.000541,6694.904762,...,-31.578947,6711.891379,-24179488.0,6715.0,6746.294752,-23.172825,6679.495250,47.337977,-21.580863,-9.433962
27,2024-11-18,10000.0,2,6727.142857,55.428474,-36.268842,7000.918189,6526.581811,126.214788,6702.190476,...,-27.192982,6721.639195,42399012.0,6715.0,6745.909790,-17.085685,6702.010657,48.725579,-32.457496,-5.521472
28,2024-11-15,10175.0,0,6722.857143,55.045349,-28.767824,6991.548838,6526.951162,120.056589,6709.238095,...,-28.070175,6729.420636,-35127484.0,6715.0,6746.715088,-11.486588,6719.153151,48.022587,-1.785183,-7.407407
29,2024-11-14,10100.0,2,6720.000000,56.345173,-20.967678,6988.010303,6526.989697,114.338261,6719.523810,...,-24.561404,6738.831218,34844020.0,6715.0,6747.885158,-6.000627,6738.088569,54.138490,16.701461,-4.819277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1205,2020-01-09,6740.0,0,10092.857143,37.424092,-84.331735,10469.016682,9648.483318,210.981941,10046.666667,...,-85.714286,10028.208032,-174579572.0,9987.5,7733.318980,-7.961127,9932.660208,44.421088,-192.664248,-15.294118
1206,2020-01-08,6680.0,2,10085.714286,36.879751,-106.950887,10487.633527,9609.866473,204.840374,9987.619048,...,-88.571429,9977.780294,-266156472.0,9987.5,7734.997905,-14.491946,9851.268141,52.276306,-166.850220,-5.263158
1207,2020-01-07,6735.0,1,10055.357143,41.464286,-113.482120,10491.822112,9598.177888,204.494633,9946.190476,...,-74.285714,9950.742922,-222864372.0,9987.5,7735.831450,-18.633865,9817.666666,48.336484,-115.452306,-26.984127
1208,2020-01-06,6735.0,2,10023.214286,40.825619,-119.300242,10484.042027,9563.457973,200.602159,9905.476190,...,-77.142857,9923.977199,-255280072.0,9987.5,7736.462782,-22.130776,9785.447484,44.704262,-91.914475,-28.125000


# Feature Selection with Recursive Feature Elimination (RFE)

In [43]:
import statsmodels.api as sm
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

X = df.loc[:, (df.columns != 'next_day_price_move') & (df.columns != 'date') & (df.columns != 'close')]
y = df['next_day_price_move']

model = LinearRegression()
selector = RFE(estimator=model, n_features_to_select=10)  # Pilih 5 fitur terbaik
selector = selector.fit(X, y)

selected_features = X.columns[selector.support_]
print("Fitur yang terpilih:", selected_features.tolist())

X = df[selected_features]

Fitur yang terpilih: ['SMA', 'RSI', 'MACD', 'ATR', 'WMA', '%D', '%R', 'DEMA', 'MFI', 'CMO']


# Prepare the Data for Training and Testing

In [44]:
# X = df.loc[:, (df.columns != 'next_day_price_move') & (df.columns != 'date') & (df.columns != 'close')]
# y = df['next_day_price_move']
# 
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# 
# X = scaler.fit_transform(X)
# 
# X

# Exclude the 'date' and 'next_day_price_move' columns before normalization (CHANGE BASED ON USAGE OF RFE)
# df_features = df.drop(columns=['date', 'next_day_price_move'])
df_features = df[selected_features]

# Normalize the DataFrame without the 'date' column
df_normalized = pd.DataFrame(scaler.fit_transform(df_features), columns=df_features.columns)

# Add the 'date' column back to the normalized DataFrame
df_normalized.insert(0, 'date', df['date'].values)

# Move 'next_day_price_move' to the last column
df_normalized['next_day_price_move'] = df['next_day_price_move'].values


print(df_normalized.head())

        date       SMA       RSI      MACD       ATR       WMA        %D  \
0 2024-11-20 -0.791110  0.047612 -0.726755 -0.676870 -0.822840 -0.691449   
1 2024-11-19 -0.794690  0.163279 -0.638829 -0.763245 -0.820680 -0.100762   
2 2024-11-18 -0.796839  0.310215 -0.545314 -0.712504 -0.815820  0.296266   
3 2024-11-15 -0.799703  0.272394 -0.471906 -0.853623 -0.811119  0.524626   
4 2024-11-14 -0.801613  0.400711 -0.395570 -0.984661 -0.804258  0.654957   

         %R      DEMA       MFI       CMO  next_day_price_move  
0  0.259676 -0.844993 -0.193749 -0.726091                    0  
1  0.498161 -0.831905 -0.225183 -0.479103                    2  
2  0.642156 -0.817011 -0.135262 -0.343580                    2  
3  0.613357 -0.805672 -0.180818 -0.408906                    0  
4  0.728554 -0.793146  0.215513 -0.319257                    2  


In [45]:
df_normalized.dtypes # just checking the dtypes

date                   datetime64[ns]
SMA                           float64
RSI                           float64
MACD                          float64
ATR                           float64
WMA                           float64
%D                            float64
%R                            float64
DEMA                          float64
MFI                           float64
CMO                           float64
next_day_price_move             int64
dtype: object

In [46]:
# from sklearn.model_selection import train_test_split
# 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

def create_sliding_window(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size, :-1])  # Include all features except the target
        y.append(data[i + window_size, -1])  # The target is the last column
    return np.array(X), np.array(y)

# Assuming df is your DataFrame and 'next_day_price_move' is the target column
window_size = 60  # Define the size of the sliding window

# Convert the DataFrame to a NumPy array (CHANGE BASED ON USAGE OF RFE)
# data = df_normalized.drop(columns=['date', 'close']).values
data = df_normalized.drop(columns=['date']).values

# Create the sliding window data
X, y = create_sliding_window(data, window_size)

# Split the data into training and testing sets
split_ratio = 0.8
split_index = int(len(X) * split_ratio)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (900, 60, 10), y_train shape: (900,)
X_test shape: (225, 60, 10), y_test shape: (225,)


# Train and Evaluate the Model with LSTM

In [47]:
import tensorflow as tf
from keras import Sequential
from keras.src.layers import LSTM, Dropout, Dense

# Define the LSTM model
# model = Sequential([
#     LSTM(50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),  # First LSTM layer
#     Dropout(0.2),
#     LSTM(50),  # Second LSTM layer
#     Dropout(0.2),
#     Dense(3, activation='softmax')  # Output layer with 3 classes
# ])
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))  # First LSTM layer
model.add(LSTM(64, return_sequences=False))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation="softmax"))

# Compile the model
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

history = model.fit(X_train, y_train, epochs=50, batch_size=32)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

  super().__init__(**kwargs)


Epoch 1/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.4069 - loss: 1.0635
Epoch 2/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.4629 - loss: 0.9743
Epoch 3/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.4292 - loss: 0.9899
Epoch 4/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.4827 - loss: 0.9586
Epoch 5/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.4415 - loss: 0.9698
Epoch 6/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.4339 - loss: 0.9490
Epoch 7/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.4214 - loss: 0.9546
Epoch 8/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.4794 - loss: 0.9386
Epoch 9/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━

# Evaluate the model

In [48]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Predict the classes for the test set
y_pred = model.predict(X_test)
y_pred_classes = np.where(y_pred > 0.5, 1, 0)  # Assuming binary classification

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)

# Print the classification report
print(classification_report(y_test, y_pred_classes))

# Visualize the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


ValueError: Classification metrics can't handle a mix of multiclass and multilabel-indicator targets

In [49]:
y_pred = np.argmax(model.predict(X_test), axis=1)

# Compare predictions with actual values
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=['Down', 'Same', 'Up']))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
              precision    recall  f1-score   support

        Down       0.47      0.16      0.23       103
        Same       0.00      0.00      0.00         8
          Up       0.51      0.85      0.64       114

    accuracy                           0.50       225
   macro avg       0.33      0.34      0.29       225
weighted avg       0.47      0.50      0.43       225



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
