### FIX !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [None]:
import numpy as np
import pandas as pd
import random

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score
from deap import base, creator, tools, algorithms

# 1. LOAD dan PREPROCESS DATA
#    - Memuat data dari file CSV, menggunakan kolom 'Date' sebagai index bertipe datetime
#    - Membuat fitur teknikal tambahan: MA5, MA10 (moving averages), dan ROC5 (rate of change)
#    - Menghapus data yang mengandung nilai NaN akibat rolling window
df = pd.read_csv('usd_idr_processed_data.csv', parse_dates=['Date'], index_col='Date')
# Buat fitur teknikal tambahan: MA5, MA10, ROC5
df['MA5'] = df['Close'].rolling(5).mean()
df['MA10'] = df['Close'].rolling(10).mean()
df['ROC5'] = df['Close'].pct_change(5)
df.dropna(inplace=True)

# List fitur yang digunakan untuk training
features = ['Open','High','Low','Close','MA5','MA10','ROC5']
data = df[features].values

# Melakukan scaling fitur ke rentang 0-1 agar model dapat belajar lebih efektif
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# 2. BUAT DATASET dengan metode sliding window
#    - Membuat input sekuens dengan panjang window_size
#    - Label dibuat berdasarkan perubahan persentase Close satu langkah setelah window
window_size = 20
threshold_pct = 0.002  # ±0.2%

X, y = [], []
for i in range(len(data_scaled) - window_size - 1):
    X.append(data_scaled[i:i+window_size])
    # Label berdasarkan perubahan persentase Close
    c0 = data_scaled[i+window_size][3]
    c1 = data_scaled[i+window_size+1][3]
    pct = (c1 - c0) / c0
    if pct > threshold_pct:
        y.append(2)  # naik
    elif pct < -threshold_pct:
        y.append(0)  # turun
    else:
        y.append(1)  # stasioner


# Konversi ke numpy array dan one-hot encoding untuk label
X = np.array(X)
y = to_categorical(y, num_classes=3)

# Membagi data menjadi data pelatihan dan pengujian (80% train, 20% test)
split = int(len(X)*0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# 3. DEFINISI MODEL LSTM bertingkat dengan Dropout
def create_model(units, optimizer_name, lr):
    model = Sequential()
    model.add(LSTM(units, return_sequences=True, activation='relu',
                   input_shape=(window_size, len(features))))
    model.add(Dropout(0.2))  # Regularisasi Dropout
    model.add(LSTM(units//2, return_sequences=True, activation='relu'))
    model.add(Dropout(0.2))  # Regularisasi Dropout
    model.add(LSTM(units//4, activation='relu'))  # Tambahan LSTM layer
    model.add(Dense(3, activation='softmax'))
    optimizer = Adam(lr) if optimizer_name=='adam' else RMSprop(lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# 4. FITNESS FUNCTION untuk GA
def eval_fitness(ind):
    units, opt_idx, lr_exp, epochs = ind
    opt_name = ['adam','rmsprop'][opt_idx]
    lr = 10**(-lr_exp)

    model = create_model(units, opt_name, lr)
    hist = model.fit(X_train, y_train, epochs=epochs,
                     batch_size=16, verbose=0,
                     validation_data=(X_test, y_test))

    # gunakan akurasi maksimal selama validasi
    fitness = max(hist.history['val_accuracy'])
    return fitness,

# 5. SETUP GA (DEAP)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("units", random.randint, 50, 200)
toolbox.register("opt_idx", random.randint, 0, 1)
toolbox.register("lr_exp", random.randint, 2, 5)  # lr = 1e-2 → 1e-5
toolbox.register("epochs", random.randint, 10, 50)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.units, toolbox.opt_idx, toolbox.lr_exp, toolbox.epochs), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", eval_fitness)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt,
                 low=[50,0,2,10], up=[200,1,5,50], indpb=0.2) 
toolbox.register("select", tools.selTournament, tournsize=3)

# 6. RUN EVOLUTIONARY ALGORITHM untuk cari hyperparameter terbaik
pop = toolbox.population(n=20)
NGEN = 15

for gen in range(NGEN):
    print(f"-- Generation {gen+1} --")
    offs = algorithms.varAnd(pop, toolbox, cxpb=0.5, mutpb=0.2)
    fits = list(map(toolbox.evaluate, offs))
    for fit, ind in zip(fits, offs):
        ind.fitness.values = fit
    pop = toolbox.select(offs, k=len(pop))
    print("Best fitness:", max(ind.fitness.values[0] for ind in pop))

# 7. PILIH INDIVIDU TERBAIK
best = tools.selBest(pop, k=1)[0]
units, opt_idx, lr_exp, epochs = best
opt_name = ['adam','rmsprop'][opt_idx]
lr = 10**(-lr_exp)
print("Best hyperparameters:", units, opt_name, lr, epochs)

# 8. TRAIN final dan EVALUASI
model = create_model(units, opt_name, lr)
hist = model.fit(X_train, y_train, epochs=epochs,
                 batch_size=16, verbose=1,
                 validation_data=(X_test, y_test))
loss, acc = model.evaluate(X_test, y_test, verbose=0)

print("Final Test Loss:", loss, "Test Accuracy:", acc)
# Tambahkan evaluasi klasifikasi lengkap supaya bisa dibandingkan dengan prediksi data baru
y_pred_prob = model.predict(X_test)                  # Probabilitas prediksi dari model
y_pred = np.argmax(y_pred_prob, axis=1)              # Ambil kelas hasil prediksi
y_true = np.argmax(y_test, axis=1)                    # Ambil kelas sebenarnya
labels = ['Turun', 'Stagnan', 'Naik']                # Label nama kelas
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=labels))
print("Accuracy Score:", accuracy_score(y_true, y_pred))

# 9. SIMPAN MODEL & SCALER UNTUK TAHAP 5
import joblib
model.save("model_usdidr_final.h5")
joblib.dump(scaler, "scaler_usdidr.pkl")
print("Model dan scaler berhasil disimpan.")

-- Generation 1 --


  super().__init__(**kwargs)


Best fitness: 0.5111940503120422
-- Generation 2 --


# Prediksi Data Baru

In [None]:
import joblib
from tensorflow.keras.models import load_model

# Load model dan scaler
model = load_model("model_usdidr_final.h5")
scaler = joblib.load("scaler_usdidr.pkl")

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score

# 1. Load Data Baru
# --------------------
# Memuat data historis baru dengan parsing kolom 'Date' sebagai index bertipe datetime
new_df = pd.read_csv('USDJPY_processed_data.csv', parse_dates=['Date'], index_col='Date')

# 2. Buat fitur teknikal tambahan
# --------------------
# MA5 dan MA10 = Moving Average (rata-rata pergerakan)
# ROC5 = Rate of Change selama 5 hari
new_df['MA5'] = new_df['Close'].rolling(5).mean()
new_df['MA10'] = new_df['Close'].rolling(10).mean()
new_df['ROC5'] = new_df['Close'].pct_change(5)

# Hapus baris dengan nilai NaN akibat rolling/pct_change
new_df.dropna(inplace=True)

# 3. Ambil fitur yang sama seperti saat training
# --------------------
features = ['Open', 'High', 'Low', 'Close', 'MA5', 'MA10', 'ROC5']
new_data = new_df[features].values

# 4. Normalisasi data menggunakan scaler dari training
# --------------------
# Pastikan variabel `scaler` sudah dimuat sebelumnya (hasil dari joblib.load misalnya)
new_data_scaled = scaler.transform(new_data)

# 5. Buat input model dengan sliding window
# --------------------
window_size = 20
X_new = []
for i in range(len(new_data_scaled) - window_size):
    X_new.append(new_data_scaled[i:i+window_size])
X_new = np.array(X_new)

# 6. Prediksi kelas tren menggunakan model
# --------------------
y_pred = model.predict(X_new)
y_class = np.argmax(y_pred, axis=1)  # ambil kelas tertinggi (0 = turun, 1 = stagnan, 2 = naik)

# 7. Gabungkan hasil prediksi ke DataFrame asli
# --------------------
# Karena input sequence menggunakan window, kita hilangkan N awal dari DataFrame
new_df = new_df.iloc[window_size:]
new_df['Predicted_Trend'] = y_class

# 8. Evaluasi jika tersedia label asli (misal kolom 'True_Trend')
# --------------------
if 'True_Trend' in new_df.columns:
    y_true = new_df['True_Trend'].values
    print("\n[INFO] Classification Report (data baru):")
    print(classification_report(y_true, y_class, target_names=['Turun', 'Stagnan', 'Naik']))
    print("Accuracy Score:", accuracy_score(y_true, y_class))

    # Tandai kesalahan prediksi
    new_df['Wrong_Pred'] = (new_df['True_Trend'] != new_df['Predicted_Trend']).astype(int)
    print("\n[INFO] Jumlah prediksi salah:", new_df['Wrong_Pred'].sum())
else:
    print("[WARNING] Tidak ada kolom 'True_Trend'. Evaluasi klasifikasi dilewati.")

# 9. Visualisasi Tren Prediksi
# --------------------
plt.figure(figsize=(15,6))
plt.plot(new_df.index, new_df['Close'], label='Close Price', color='black')
plt.scatter(new_df.index, new_df['Close'], c=new_df['Predicted_Trend'],
            cmap='coolwarm', label='Predicted Trend', marker='o', s=20)
plt.title("Prediksi Tren USD/IDR vs Harga Penutupan")
plt.xlabel("Tanggal")
plt.ylabel("Harga Penutupan")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(14,6))
plt.plot(new_df.index, new_df['Close'], label='Harga USDJPY')
colors = ['red', 'gray', 'green']
labels_map = {0: 'Turun', 1: 'Stagnan', 2: 'Naik'}

# Tandai prediksi tren di grafik
for label in [0,1,2]:
    mask = new_df['Predicted_Trend'] == label
    plt.scatter(new_df.index[mask], new_df['Close'][mask], label=labels_map[label], alpha=0.6, color=colors[label])

plt.legend()
plt.title("Prediksi Tren USDJPY")
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import classification_report

# Misal: y_true adalah label aktual (0, 1, 2)
# y_class sudah dihasilkan model
print(classification_report(y_true, y_class, target_names=['Turun', 'Stagnan', 'Naik']))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))
plt.plot(new_df.index, new_df['Close'], label='Close Price')
plt.scatter(new_df.index, new_df['Close'], c=new_df['Predicted_Trend'],
            cmap='coolwarm', label='Predicted Trend', marker='o')
plt.title("USDJPY - Predicted Trend")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Buat label aktual (y_true)
threshold_pct = 0.002
close_prices = new_df['Close'].values

labels = []
for i in range(len(close_prices) - 1):
    pct_change = (close_prices[i+1] - close_prices[i]) / close_prices[i]
    if pct_change > threshold_pct:
        labels.append(2)
    elif pct_change < -threshold_pct:
        labels.append(0)
    else:
        labels.append(1)

# Samakan panjang
y_class = y_class[:len(y_true)]

# Konfirmasi panjang
print(f'y_true: {len(y_true)}, y_class: {len(y_class)}')

# Evaluasi performa
from sklearn.metrics import classification_report
report = classification_report(y_true, y_class, target_names=['Turun', 'Stagnan', 'Naik'], output_dict=True)

# Simpan ke CSV
import pandas as pd
report_df = pd.DataFrame(report).transpose()
report_df.to_csv('laporan_klasifikasi_usdjpy.csv')
print(report_df)
