In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import time
from datetime import datetime

# Scikit-learn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import ParameterSampler

# TensorFlow/Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

# Settings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("\n✓ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")



✓ All libraries imported successfully!
TensorFlow version: 2.15.0


In [2]:
# Download data
ticker = 'TLKM.JK'
start_date = '2015-01-01'
end_date = '2025-11-04'

print(f"\nDownloading {ticker} data from {start_date} to {end_date}...")
df_raw = yf.download(ticker, start=start_date, end=end_date, progress=False)

df_raw


Downloading TLKM.JK data from 2015-01-01 to 2025-11-04...


Price,Close,High,Low,Open,Volume
Ticker,TLKM.JK,TLKM.JK,TLKM.JK,TLKM.JK,TLKM.JK
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,1755.473389,1770.818436,1749.335370,1770.818436,18992100
2015-01-05,1740.128296,1749.335324,1730.921268,1749.335324,49940700
2015-01-06,1727.852295,1740.128333,1718.645267,1724.783286,47892100
2015-01-07,1724.783447,1761.611564,1715.576418,1758.542554,70076600
2015-01-08,1740.128296,1746.266315,1733.990277,1743.197305,56582500
...,...,...,...,...,...
2025-10-28,3400.000000,3480.000000,3270.000000,3330.000000,143082100
2025-10-29,3290.000000,3390.000000,3270.000000,3390.000000,93073800
2025-10-30,3250.000000,3310.000000,3200.000000,3290.000000,83523100
2025-10-31,3210.000000,3250.000000,3150.000000,3200.000000,126790400


In [3]:
# Fix multi-level columns issue
if isinstance(df_raw.columns, pd.MultiIndex):
    df_raw.columns = df_raw.columns.droplevel(1)

# Force convert to proper DataFrame with 1D Series
df = pd.DataFrame({
    'Open': df_raw['Open'].squeeze(),
    'High': df_raw['High'].squeeze(),
    'Low': df_raw['Low'].squeeze(),
    'Close': df_raw['Close'].squeeze(),
    'Volume': df_raw['Volume'].squeeze()
}, index=df_raw.index)

print(f"\n✓ Data downloaded successfully!")
print(f"Total records: {len(df)}")
print(f"Date range: {df.index[0]} to {df.index[-1]}")
print(f"Trading days: {len(df)} days")

# Verify data structure
print("\nData structure verification:")
print(f"Volume type: {type(df['Volume'])}")
print(f"Volume shape: {df['Volume'].shape}")


✓ Data downloaded successfully!
Total records: 2671
Date range: 2015-01-02 00:00:00 to 2025-11-03 00:00:00
Trading days: 2671 days

Data structure verification:
Volume type: <class 'pandas.core.series.Series'>
Volume shape: (2671,)


In [5]:
# Select OHLC features
features = ['Open', 'High', 'Low', 'Close']
data = df[features].copy()

print(f"\nSelected features: {features}")
print(f"Target variable: Close (for prediction)")
print(f"Data shape: {data.shape}")


Selected features: ['Open', 'High', 'Low', 'Close']
Target variable: Close (for prediction)
Data shape: (2671, 4)


In [6]:
# Date is already index from yfinance
print(f"Date is already set as index: {data.index.name}")
print(f"Index type: {type(data.index)}")

Date is already set as index: Date
Index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [8]:
# Calculate split indices
train_size = int(len(data) * 0.70)
val_size = int(len(data) * 0.15)
test_size = len(data) - train_size - val_size

# Split data
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size+val_size]
test_data = data.iloc[train_size+val_size:]

print(f"\nTotal data: {len(data)} days")
print(f"\nTrain set: {len(train_data)} days ({len(train_data)/len(data)*100:.1f}%)")
print(f"  Period: {train_data.index[0]} to {train_data.index[-1]}")
print(f"\nValidation set: {len(val_data)} days ({len(val_data)/len(data)*100:.1f}%)")
print(f"  Period: {val_data.index[0]} to {val_data.index[-1]}")
print(f"\nTest set: {len(test_data)} days ({len(test_data)/len(data)*100:.1f}%)")
print(f"  Period: {test_data.index[0]} to {test_data.index[-1]}")


Total data: 2671 days

Train set: 1869 days (70.0%)
  Period: 2015-01-02 00:00:00 to 2022-07-01 00:00:00

Validation set: 400 days (15.0%)
  Period: 2022-07-04 00:00:00 to 2024-02-19 00:00:00

Test set: 402 days (15.1%)
  Period: 2024-02-20 00:00:00 to 2025-11-03 00:00:00


In [9]:
# Initialize scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Fit ONLY on training data
train_scaled = scaler.fit_transform(train_data)
val_scaled = scaler.transform(val_data)
test_scaled = scaler.transform(test_data)

print("\n✓ Scaler fitted on TRAINING data only!")
print(f"\nOriginal value range:")
print(f"  Min: {train_data.min().values}")
print(f"  Max: {train_data.max().values}")
print(f"\nScaled value range: [0, 1]")
print(f"\nScaled data shapes:")
print(f"  Train: {train_scaled.shape}")
print(f"  Val: {val_scaled.shape}")
print(f"  Test: {test_scaled.shape}")


✓ Scaler fitted on TRAINING data only!

Original value range:
  Min: [1614.92643214 1649.75817871 1573.76164073 1646.59179688]
  Max: [3922.42499281 3922.42499281 3817.28796924 3857.7253418 ]

Scaled value range: [0, 1]

Scaled data shapes:
  Train: (1869, 4)
  Val: (400, 4)
  Test: (402, 4)


In [10]:
def create_sequences(data, window_size, horizon=3):
    """
    Create sequences for multi-horizon forecasting
    
    Parameters:
    -----------
    data : numpy array
        Scaled data with shape (samples, features)
    window_size : int
        Number of time steps to look back
    horizon : int
        Number of time steps to forecast (default: 3)
    
    Returns:
    --------
    X : numpy array
        Input sequences with shape (samples, window_size, features)
    y : numpy array
        Target values with shape (samples, horizon)
    """
    X, y = [], []
    
    for i in range(len(data) - window_size - horizon + 1):
        # Input: window_size time steps of all features
        X.append(data[i:i+window_size])
        
        # Output: next 'horizon' Close prices
        # Close is at index 3 in [Open, High, Low, Close]
        y.append(data[i+window_size:i+window_size+horizon, 3])
    
    return np.array(X), np.array(y)

In [12]:
# CREATE SEQUENCES WITH BEST WINDOW SIZE
best_window_size = 30
horizon = 3
print(f"\nUsing best window size: {best_window_size}")

# Create sequences for train, val, test with BEST window size
X_train, y_train = create_sequences(train_scaled, best_window_size, horizon)
X_val, y_val = create_sequences(val_scaled, best_window_size, horizon)
X_test, y_test = create_sequences(test_scaled, best_window_size, horizon)

print(f"\nSequences created:")
print(f"  Train: X={X_train.shape}, y={y_train.shape}")
print(f"  Val:   X={X_val.shape}, y={y_val.shape}")
print(f"  Test:  X={X_test.shape}, y={y_test.shape}")


Using best window size: 30

Sequences created:
  Train: X=(1837, 30, 4), y=(1837, 3)
  Val:   X=(368, 30, 4), y=(368, 3)
  Test:  X=(370, 30, 4), y=(370, 3)


In [14]:
FINAL_WINDOW_SIZE = 30
FINAL_HORIZON = 3
FINAL_UNITS = 128
FINAL_LR = 0.001
FINAL_BATCH = 16
N_FEATURES = 4  # Open, High, Low, Close

def build_final_model(model_type: str,
                      window_size: int = FINAL_WINDOW_SIZE,
                      n_features: int = N_FEATURES,
                      units: int = FINAL_UNITS,
                      lr: float = FINAL_LR,
                      horizon: int = FINAL_HORIZON):
    model_type = model_type.upper().strip()
    if model_type not in ["LSTM", "GRU"]:
        raise ValueError("model_type harus 'LSTM' atau 'GRU'")

    RNN = LSTM if model_type == "LSTM" else GRU

    model = Sequential([
        RNN(units, return_sequences=True, input_shape=(window_size, n_features)),
        Dropout(0.2),
        RNN(units),
        Dropout(0.2),
        Dense(horizon)
    ])

    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss="mse",
        metrics=["mae"]
    )
    return model

In [15]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

def train_final_fixed(X_train, y_train, X_val, y_val, model_type: str):
    model = build_final_model(model_type=model_type)

    early_stop = EarlyStopping(
        monitor="val_loss",
        patience=15,
        restore_best_weights=True,
        verbose=1
    )

    ckpt_path = f"best_model_{model_type.lower()}.keras"
    checkpoint = ModelCheckpoint(
        ckpt_path,
        monitor="val_loss",
        save_best_only=True,
        verbose=1
    )

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=FINAL_BATCH,
        callbacks=[early_stop, checkpoint],
        verbose=1
    )
    return model, history, ckpt_path

# Train final models (tanpa tuning apa pun)
final_model_lstm, hist_lstm, path_lstm = train_final_fixed(X_train, y_train, X_val, y_val, "LSTM")
final_model_gru,  hist_gru,  path_gru  = train_final_fixed(X_train, y_train, X_val, y_val, "GRU")

print("Saved:", path_lstm, "and", path_gru)


Epoch 1/150


Epoch 1: val_loss improved from inf to 0.00250, saving model to best_model_lstm.keras
Epoch 2/150
Epoch 2: val_loss improved from 0.00250 to 0.00193, saving model to best_model_lstm.keras
Epoch 3/150
Epoch 3: val_loss did not improve from 0.00193
Epoch 4/150
Epoch 4: val_loss did not improve from 0.00193
Epoch 5/150
Epoch 5: val_loss improved from 0.00193 to 0.00172, saving model to best_model_lstm.keras
Epoch 6/150
Epoch 6: val_loss improved from 0.00172 to 0.00120, saving model to best_model_lstm.keras
Epoch 7/150
Epoch 7: val_loss did not improve from 0.00120
Epoch 8/150
Epoch 8: val_loss did not improve from 0.00120
Epoch 9/150
Epoch 9: val_loss did not improve from 0.00120
Epoch 10/150
Epoch 10: val_loss did not improve from 0.00120
Epoch 11/150
Epoch 11: val_loss improved from 0.00120 to 0.00095, saving model to best_model_lstm.keras
Epoch 12/150
Epoch 12: val_loss did not improve from 0.00095
Epoch 13/150
Epoch 13: val_loss did not improve from 0.00095
Epoch 14/15

Epoch 28/150
Epoch 28: val_loss did not improve from 0.00079
Epoch 29/150
Epoch 29: val_loss did not improve from 0.00079
Epoch 30/150
Epoch 30: val_loss improved from 0.00079 to 0.00077, saving model to best_model_lstm.keras
Epoch 31/150
Epoch 31: val_loss did not improve from 0.00077
Epoch 32/150
Epoch 32: val_loss did not improve from 0.00077
Epoch 33/150
Epoch 33: val_loss did not improve from 0.00077
Epoch 34/150
Epoch 34: val_loss improved from 0.00077 to 0.00074, saving model to best_model_lstm.keras
Epoch 35/150
Epoch 35: val_loss did not improve from 0.00074
Epoch 36/150
Epoch 36: val_loss did not improve from 0.00074
Epoch 37/150
Epoch 37: val_loss did not improve from 0.00074
Epoch 38/150
Epoch 38: val_loss did not improve from 0.00074
Epoch 39/150
Epoch 39: val_loss did not improve from 0.00074
Epoch 40/150
Epoch 40: val_loss did not improve from 0.00074
Epoch 41/150
Epoch 41: val_loss did not improve from 0.00074
Epoch 42/150
Epoch 42: val_loss did not improve from 0.00074

Epoch 58/150
Epoch 58: val_loss did not improve from 0.00072
Epoch 59/150
Epoch 59: val_loss did not improve from 0.00072
Epoch 60/150
Epoch 60: val_loss did not improve from 0.00072
Epoch 61/150
Epoch 61: val_loss did not improve from 0.00072
Epoch 62/150
Epoch 62: val_loss did not improve from 0.00072
Epoch 63/150

Epoch 63: val_loss did not improve from 0.00072
Epoch 63: early stopping
Epoch 1/150
Epoch 1: val_loss improved from inf to 0.00163, saving model to best_model_gru.keras
Epoch 2/150
Epoch 2: val_loss improved from 0.00163 to 0.00140, saving model to best_model_gru.keras
Epoch 3/150
Epoch 3: val_loss did not improve from 0.00140
Epoch 4/150
Epoch 4: val_loss improved from 0.00140 to 0.00107, saving model to best_model_gru.keras
Epoch 5/150
Epoch 5: val_loss did not improve from 0.00107
Epoch 6/150
Epoch 6: val_loss did not improve from 0.00107
Epoch 7/150
Epoch 7: val_loss did not improve from 0.00107
Epoch 8/150
Epoch 8: val_loss did not improve from 0.00107
Epoch 9/150
Ep

Epoch 25/150
Epoch 25: val_loss did not improve from 0.00077
Epoch 26/150
Epoch 26: val_loss did not improve from 0.00077
Epoch 27/150
Epoch 27: val_loss improved from 0.00077 to 0.00075, saving model to best_model_gru.keras
Epoch 28/150
Epoch 28: val_loss did not improve from 0.00075
Epoch 29/150
Epoch 29: val_loss did not improve from 0.00075
Epoch 30/150
Epoch 30: val_loss improved from 0.00075 to 0.00071, saving model to best_model_gru.keras
Epoch 31/150
Epoch 31: val_loss did not improve from 0.00071
Epoch 32/150
Epoch 32: val_loss did not improve from 0.00071
Epoch 33/150
Epoch 33: val_loss did not improve from 0.00071
Epoch 34/150
Epoch 34: val_loss did not improve from 0.00071
Epoch 35/150
Epoch 35: val_loss did not improve from 0.00071
Epoch 36/150
Epoch 36: val_loss did not improve from 0.00071
Epoch 37/150
Epoch 37: val_loss did not improve from 0.00071
Epoch 38/150
Epoch 38: val_loss did not improve from 0.00071
Epoch 39/150
Epoch 39: val_loss did not improve from 0.00071
E

In [16]:
import json, joblib

# scaler = MinMaxScaler() yang Anda FIT pada TRAIN (bukan val/test)

joblib.dump(scaler, "scaler.pkl")

meta = {
    "features": ["Open", "High", "Low", "Close"],
    "target_col": "Close",
    "window_size": FINAL_WINDOW_SIZE,
    "horizon": FINAL_HORIZON,
    "units": FINAL_UNITS,
    "lr": FINAL_LR,
    "batch_size": FINAL_BATCH
}
with open("meta.json", "w") as f:
    json.dump(meta, f, indent=2)

print("Saved: scaler.pkl and meta.json")


Saved: scaler.pkl and meta.json
