# UJIAN AKHIR SEMESTER 
## TIMESERIES

### NAMA : Paul Wijaya Verda Kusuma
### NIM  : 215314051

### LSTM untuk Klasifikasi Trend Saham Evolutionary Algorithm`

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam, Adamax

from deap import base, creator, tools, algorithms
import joblib


In [2]:
# ===========================
# === STEP 1: Load Data ====
# ===========================
df = pd.read_csv('usd_idr_processed_data.csv', index_col='Date', parse_dates=True)

# Tambah fitur teknikal
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['ROC5'] = df['Close'].pct_change(periods=5)
def calculate_rsi(series, period=14):
    delta = series.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))
df['RSI'] = calculate_rsi(df['Close'])

# Drop NA
df.dropna(inplace=True)

# Ambil fitur
features = ['Close', 'High', 'Low', 'Open', 'MA5', 'MA10', 'ROC5', 'RSI']
data = df[features]


In [3]:
# ===========================
# === STEP 2: Normalisasi ===
# ===========================
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

In [4]:
# ===========================
# === STEP 3: Dataset ===
# ===========================
def generate_dataset(data_scaled, window_size=20, threshold=0.0035):
    X, y = [], []
    for i in range(len(data_scaled) - window_size - 1):
        X.append(data_scaled[i:i + window_size])
        diff = data_scaled[i + window_size][0] - data_scaled[i + window_size - 1][0]
        if diff > threshold:
            label = [0, 0, 1]  # Naik
        elif diff < -threshold:
            label = [1, 0, 0]  # Turun
        else:
            label = [0, 1, 0]  # Stasioner
        y.append(label)
    return np.array(X), np.array(y)

def split_dataset(X, y, ratio=0.8):
    split = int(len(X) * ratio)
    return X[:split], X[split:], y[:split], y[split:]


In [5]:
# ================================
# === STEP 4: Model Creator ===
# ================================
def create_model(units, optimizer_name, lr, input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(LSTM(units, return_sequences=True, activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(LSTM(units // 2, activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))

    optimizer = tf.keras.optimizers.get(optimizer_name)
    optimizer.learning_rate = lr

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [6]:
def get_class_weight(y_train):
    y_int = np.argmax(y_train, axis=1)
    weights = compute_class_weight(class_weight='balanced',
                                   classes=np.unique(y_int),
                                   y=y_int)
    return dict(enumerate(weights))

In [7]:
# ================================
# === STEP 5: Genetic Fitness ===
# ================================
def eval_fitness(ind, X_train, y_train, X_test, y_test, window_size):
    units = int(ind[0])
    opt_idx = int(round(ind[1]))
    lr_exp = ind[2]
    epochs = int(ind[3])

    # Clamp optimizer index to valid range
    opt_idx = max(0, min(3, opt_idx))
    opt_name = ['adam', 'rmsprop', 'nadam', 'adamax'][opt_idx % 4]
    lr = 10 ** (-lr_exp)
    input_shape = (X_train.shape[1], X_train.shape[2])

    model = create_model(units, opt_name, lr, input_shape)
    
    class_weight = get_class_weight(y_train)

    early = EarlyStopping(patience=3, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=epochs, batch_size=16, verbose=0,
              validation_data=(X_test, y_test), callbacks=[early], class_weight=class_weight)
    _, acc = model.evaluate(X_test, y_test, verbose=0)
    
    # Kembalikan model untuk disimpan nanti
    return acc, model


In [8]:
# ================================
# === STEP 6: Setup GA Toolbox ===
# ================================
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_units", lambda: random.randint(50, 150))
toolbox.register("attr_opt", lambda: random.randint(0, 3))  # 0 = adam, 1 = rmsprop
toolbox.register("attr_lr", lambda: random.uniform(2, 4))   # exponent
toolbox.register("attr_epochs", lambda: random.randint(10, 50))

toolbox.register(
    "individual",
    tools.initCycle,
    creator.Individual,
    (toolbox.attr_units, toolbox.attr_opt, toolbox.attr_lr, toolbox.attr_epochs),
    n=1,
)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selTournament, tournsize=3)

In [9]:
# ================================
# === Custom Mutate Function ===
# ================================
def custom_mutate(ind):
    if random.random() < 0.2:
        ind[0] = int(np.clip(ind[0] + random.gauss(0, 5), 32, 128))
    if random.random() < 0.2:
        ind[1] = 1 - int(ind[1])  # Toggle optimizer
    if random.random() < 0.2:
        ind[2] = np.clip(ind[2] + random.gauss(0, 0.3), 2, 4)
    if random.random() < 0.2:
        ind[3] = int(np.clip(ind[3] + random.gauss(0, 2), 5, 20))
    return ind,

toolbox.register("mutate", custom_mutate)


In [None]:
# ================================
# === STEP 6: GA + Window Loop ===
# ================================
window_sizes = [10, 20, 30, 40, 50, 60, 70]
results = []

for window_size in window_sizes:
    X, y = generate_dataset(data_scaled, window_size)
    X_train, X_test, y_train, y_test = split_dataset(X, y)

    # Register fresh fitness function
    if "evaluate" in toolbox.__dict__:
        del toolbox.evaluate

    def fitness_wrapper(ind):
        acc, model = eval_fitness(ind, X_train, y_train, X_test, y_test, window_size)
        ind.model = model  # Simpan model dalam individu
        return (acc,)

    toolbox.register("evaluate", fitness_wrapper)

    pop = toolbox.population(n=20)
    NGEN = 10

    for gen in range(NGEN):
        offspring = algorithms.varAnd(pop, toolbox, cxpb=0.5, mutpb=0.2)
        fits = list(map(toolbox.evaluate, offspring))
        for fit, ind in zip(fits, offspring):
            ind.fitness.values = fit
        pop = toolbox.select(offspring, k=len(pop))

    best = tools.selBest(pop, k=1)[0]
    units, opt_idx, lr_exp, epochs = best
    opt_name = ['adam', 'rmsprop', 'nadam', 'adamax'][int(round(opt_idx))]
    lr = 10 ** (-lr_exp)
    acc = best.fitness.values[0]

    results.append((window_size, acc, units, opt_name, lr, int(epochs), best.model))
    print(f"Window: {window_size}, Akurasi: {acc:.4f}, Units: {units}, Opt: {opt_name}, "
          f"LR: {lr:.5f}, Epochs: {int(epochs)}")

    #evaluasi klasifikasi
    y_pred = best.model.predict(X_test)
    y_pred_class = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    print(classification_report(y_true, y_pred_class, target_names=['Turun', 'Stasioner', 'Naik']))


In [None]:
# ================================
# === STEP 7: Save Model & Scaler
# ================================
import joblib

best_result = max(results, key=lambda x: x[1])
window_size, acc, units, opt_name, lr, epochs, best_model = best_result

# Simpan model ke format baru
best_model.save("model_lstm_tren_terbaik.keras")

# Simpan scaler
joblib.dump(scaler, "scaler_lstm_tren.save")

print("Model dan Scaler berhasil disimpan.")


In [None]:
# ================================
# === STEP 8: Prediksi Data Baru
# ================================
last_window = data_scaled[-window_size:]
last_window = np.expand_dims(last_window, axis=0)
pred = best_model.predict(last_window)
predicted_class = np.argmax(pred)

label_map = {0: 'Turun', 1: 'Stasioner', 2: 'Naik'}
print("Prediksi tren data terbaru:", label_map[predicted_class])

In [None]:
# Ambil data asli
close_t = df["Close"].iloc[-2]
close_t_plus_1 = df["Close"].iloc[-1]
delta = (close_t_plus_1 - close_t) / close_t  # Persentase perubahan

# Ubah ke label
if delta > 0.002:
    actual_class = 2  # Naik
elif delta < -0.002:
    actual_class = 0  # Turun
else:
    actual_class = 1  # Stasioner

print("Label aktual:", label_map[actual_class])
print("Prediksi benar?" , actual_class == predicted_class)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

# Buat dataset untuk seluruh data
X_all, y_all = generate_dataset(data_scaled, window_size=20)

# Ambil label-nya
y_labels = np.argmax(y_all, axis=1)

# Hitung jumlah masing-masing label
label_counts = Counter(y_labels)
label_map = {0: 'Turun', 1: 'Stasioner', 2: 'Naik'}

# Tampilkan hasil dalam angka
for label, count in label_counts.items():
    print(f"{label_map[label]}: {count} data")

# Visualisasi histogram
plt.hist(y_labels, bins=np.arange(4)-0.5, edgecolor='black')
plt.xticks([0, 1, 2], ['Turun', 'Stasioner', 'Naik'])
plt.title('Distribusi Label')
plt.xlabel('Label')
plt.ylabel('Jumlah')
plt.show()


In [None]:
# ================================
# === Ringkasan Hasil Terbaik ===
# ================================
print("\n=== Ringkasan Hasil Terbaik ===")
print(f"Window Size Optimal: {window_size}")
print(f"Akurasi Tertinggi: {acc:.4f}")
print(f"Units: {units}, Optimizer: {opt_name}, Learning Rate: {lr:.5f}, Epochs: {epochs}")